Attachment "testword.tcl" to
ticket [2890743fff]
added by
andreas_kupries
2009-11-03 05:04:24.
proc Min {a b} {
if {$a < $b} {
return $a
} else {
return $b
}
}
proc At {dv what start max} {
upvar 1 $dv data
set idx [string first $what $data $start]
if {$idx < 0} { set idx $max }
return $idx
}
proc Word0StrFirst {dv} {
upvar 1 $dv data
if {[regexp -indices "^\\s*(\{)" $data -> bracerange]} {
# The word is brace-quoted, starting at index 'lindex
# bracerange 0'. We now have to find the closing brace,
# counting inner braces, ignoring quoted braces. We fail if
# there is no proper closing brace.
foreach {s e} $bracerange break
incr s ; # index of the first char after the brace.
incr e ; # same. but this is our running index.
set level 1
set max [string length $data]
while {$level} {
set open [At data "\{" $e $max]
set qopen [At data "\\\{" $e $max]
set qclose [At data "\\\}" $e $max]
set close [At data "\}" $e $max]
set nearest [Min [Min $open $qopen] [Min $close $qclose]]
#puts $open/$qopen/$close/$qclose//$nearest//$max
if {$nearest == $max} {
# None of the possibilities were found.
# The word has to be incomplete.
return -code error "no complete word found/1"
}
if {($qopen == $nearest) || ($qclose == $nearest)} {
set e $nearest
incr e 2 ; # First character after the quoted brace
continue ; # We are ignoring everything before.
} elseif {$open == $nearest} {
# One more bracing level to look for.
incr level
set e $nearest
incr e
continue
} elseif {$close == $nearest} {
# Get one bracing level down. We may have found the
# end of the word.
incr level -1
set e $nearest
incr e
continue
} else {
return -code error "internal error"
}
}
incr e -2 ; # index of character just before the brace.
return [list $s $e 2]
} elseif {[regexp -indices {^\s*(\S+)\s} $data -> wordrange]} {
# The word is a simple literal which ends at the next
# whitespace character. Note that there has to be a whitespace
# for us to recognize a word, for while there is no whitespace
# behind it in the buffer the word itself may be incomplete.
return [linsert $wordrange end 1]
}
return -code error "no complete word found/2"
}
proc Word0Regexp {dv} {
upvar 1 $dv data
if {[regexp -indices "^\\s*(\{)" $data -> bracerange]} {
# The word is brace-quoted, starting at index 'lindex
# bracerange 0'. We now have to find the closing brace,
# counting inner braces, ignoring quoted braces. We fail if
# there is no proper closing brace.
#puts r/bracerange=$bracerange
foreach {s e} $bracerange break
incr s ; # index of the first char after the brace.
incr e ; # same. but this is our running index.
set level 1
set max [string length $data]
while {$level} {
# We are looking for the first regular or backslash-quoted
# opening or closing brace in the string. If none is found
# then the word is not complete, and we abort our search.
#puts X$e'[string range $data $e end]'
#puts X$e|[regexp -indices -start $e {(([{}])|(\\[{}]))} $data -> any regular quoted]
if {![regexp -indices -start $e {(([{}])|(\\[{}]))} $data -> any regular quoted]} {
# ^^ ^
# |regular \quoted
# any
return -code error "no complete word found/1"
}
#puts "| ${->} | $any | r=$regular | q=$quoted |"
foreach {qs qe} $quoted break
foreach {rs re} $regular break
if {$qs >= 0} {
# Skip quoted braces ...
set e $qe
incr e
continue
} elseif {$rs >= 0} {
# Step one nesting level in or out.
if {[string index $data $rs] eq "\{"} {
incr level
} else {
incr level -1
}
set e $re
incr e
#puts @$e
continue
} else {
return -code error "internal error"
}
}
#puts "<$s $e>"
incr e -2 ; # index of character just before the brace.
return [list $s $e 2]
} elseif {[regexp -indices {^\s*(\S+)\s} $data -> wordrange]} {
# The word is a simple literal which ends at the next
# whitespace character. Note that there has to be a whitespace
# for us to recognize a word, for while there is no whitespace
# behind it in the buffer the word itself may be incomplete.
return [linsert $wordrange end 1]
}
return -code error "no complete word found/2"
}
proc cap {x} {
if {[string match *complete* $x]} return
foreach {s e step} $x break
#if {$e < $s } { set x $e ; set e $s ; set s $x }
set step [expr {$e + $step}]
set e [expr {$e - $s - 1}]
if {$e < 0} {
puts --------.--'[string repeat - $s]^
} else {
puts --------.--'[string repeat - $s]^[string repeat - $e]^
}
puts --------.--'[string repeat - $step]|
return
}
set ta 0
set tb 0
set n 0
proc test {data} {
global ta tb n
set timea [lindex [time {catch {Word0StrFirst data} msga}] 0]
set timeb [lindex [time {catch {Word0Regexp data} msgb}] 0]
incr ta $timea
incr tb $timeb
incr n
puts "[format %8d $timea] A '[string range $data 0 30]' = ($msga)"
puts "[format %8d $timeb] B '[string range $data 0 30]' = ($msgb)"
cap $msga
cap $msgb
puts ""
}
test { word }
test { word}
test {word }
test { {}}
test {{}}
test { {} }
test {{xx yy}}
test {{xx {} yy}}
test {{xx {}}}
test {{{} yy}}
test "\{xx \\\{\}"
test " {} x"
test "{xx yy} z"
test " \{xx "
test " {xx {} yy } "
puts ============================================================
foreach n {
1 10 100 1000 10000 100000 1000000
} {
test "word[string repeat { } $n]"
test "word[string repeat { } $n]\{"
test "\{xx yy\}[string repeat { } $n]"
test "\{xx yy\}[string repeat { } $n]\{"
}
puts ============================================================
puts "A $ta ([expr {$ta/double($n)}])"
puts "B $tb ([expr {$tb/double($n)}])"