Tk Library Source Code

Artifact [5a7d3c5e5d]
Login

Artifact 5a7d3c5e5d960db55b0a4f0536970f9d9e3d1bc2:

Attachment "testword.tcl" to ticket [2890743fff] added by andreas_kupries 2009-11-03 05:04:24.

proc Min {a b} {
    if {$a < $b} {
	return $a
    } else { 
	return $b
    }
}

proc At {dv what start max} {
    upvar 1 $dv data
    set idx [string first $what $data $start]
    if {$idx < 0} { set idx $max }
    return $idx
}


proc Word0StrFirst {dv} {
    upvar 1 $dv data

    if {[regexp -indices "^\\s*(\{)" $data -> bracerange]} {

	# The word is brace-quoted, starting at index 'lindex
	# bracerange 0'. We now have to find the closing brace,
	# counting inner braces, ignoring quoted braces. We fail if
	# there is no proper closing brace.

	foreach {s e} $bracerange break
	incr s ; # index of the first char after the brace.
	incr e ; # same. but this is our running index.

	set level 1
	set max [string length $data]

	while {$level} {
	    set open   [At data "\{"   $e $max]
	    set qopen  [At data "\\\{" $e $max]
	    set qclose [At data "\\\}" $e $max]
	    set close  [At data "\}"   $e $max]
 
	    set nearest [Min [Min $open $qopen] [Min $close $qclose]]

	    #puts $open/$qopen/$close/$qclose//$nearest//$max

	    if {$nearest == $max} {
		# None of the possibilities were found.
		# The word has to be incomplete.
		return -code error "no complete word found/1"
	    }

	    if {($qopen == $nearest) || ($qclose == $nearest)} {
		set  e $nearest
		incr e 2 ; # First character after the quoted brace
		continue ; # We are ignoring everything before.
	    } elseif {$open == $nearest} {
		# One more bracing level to look for.
		incr level
		set e $nearest
		incr e
		continue
	    } elseif {$close == $nearest} {
		# Get one bracing level down. We may have found the
		# end of the word.
		incr level -1
		set  e $nearest
		incr e
		continue
	    } else {
		return -code error "internal error"
	    }
	}

	incr e -2 ; # index of character just before the brace.
	return [list $s $e 2]

    } elseif {[regexp -indices {^\s*(\S+)\s} $data -> wordrange]} {

	# The word is a simple literal which ends at the next
	# whitespace character. Note that there has to be a whitespace
	# for us to recognize a word, for while there is no whitespace
	# behind it in the buffer the word itself may be incomplete.

	return [linsert $wordrange end 1]
    }

    return -code error "no complete word found/2"
}

proc Word0Regexp {dv} {
    upvar 1 $dv data

    if {[regexp -indices "^\\s*(\{)" $data -> bracerange]} {

	# The word is brace-quoted, starting at index 'lindex
	# bracerange 0'. We now have to find the closing brace,
	# counting inner braces, ignoring quoted braces. We fail if
	# there is no proper closing brace.

	#puts r/bracerange=$bracerange

	foreach {s e} $bracerange break
	incr s ; # index of the first char after the brace.
	incr e ; # same. but this is our running index.

	set level 1
	set max [string length $data]

	while {$level} {
	    # We are looking for the first regular or backslash-quoted
	    # opening or closing brace in the string. If none is found
	    # then the word is not complete, and we abort our search.

	    #puts X$e'[string range $data $e end]'
	    #puts X$e|[regexp -indices -start $e {(([{}])|(\\[{}]))} $data -> any regular quoted]
	    if {![regexp -indices -start $e {(([{}])|(\\[{}]))} $data -> any regular quoted]} {
		#                            ^^      ^
		#                            |regular \quoted
		#                            any
		return -code error "no complete word found/1"
	    }

	    #puts "| ${->} | $any | r=$regular | q=$quoted |"

	    foreach {qs qe} $quoted break
	    foreach {rs re} $regular break

	    if {$qs >= 0} {
		# Skip quoted braces ...
		set e $qe
		incr e
		continue
	    } elseif {$rs >= 0} {
		# Step one nesting level in or out.
		if {[string index $data $rs] eq "\{"} {
		    incr level
		} else {
		    incr level -1
		}
		set  e $re
		incr e
		#puts @$e
		continue
	    } else {
		return -code error "internal error"
	    }
	}

	#puts "<$s $e>"

	incr e -2 ; # index of character just before the brace.
	return [list $s $e 2]

    } elseif {[regexp -indices {^\s*(\S+)\s} $data -> wordrange]} {

	# The word is a simple literal which ends at the next
	# whitespace character. Note that there has to be a whitespace
	# for us to recognize a word, for while there is no whitespace
	# behind it in the buffer the word itself may be incomplete.

	return [linsert $wordrange end 1]
    }

    return -code error "no complete word found/2"
}

proc cap {x} {
    if {[string match *complete* $x]} return
    foreach {s e step} $x break
    #if {$e < $s } { set x $e ; set e $s ; set s $x }

    set step [expr {$e + $step}]

    set e [expr {$e - $s - 1}]
    if {$e < 0} {
	puts --------.--'[string repeat - $s]^
    } else {
	puts --------.--'[string repeat - $s]^[string repeat - $e]^
    }

    puts --------.--'[string repeat - $step]|

    return
}


set ta 0
set tb 0
set n 0

proc test {data} {
    global ta tb n

    set timea [lindex [time {catch {Word0StrFirst data} msga}] 0]
    set timeb [lindex [time {catch {Word0Regexp   data} msgb}] 0]

    incr ta $timea
    incr tb $timeb
    incr n

    puts "[format %8d $timea] A '[string range $data 0 30]' = ($msga)"
    puts "[format %8d $timeb] B '[string range $data 0 30]' = ($msgb)"
    cap $msga
    cap $msgb
    puts ""
}


test {   word  }
test { word}
test {word }

test { {}}
test {{}}
test { {} }

test {{xx yy}}
test {{xx {} yy}}
test {{xx {}}}
test {{{} yy}}
test "\{xx \\\{\}"

test    "  {} x"
test    "{xx yy} z"
test    " \{xx "
test    " {xx {} yy } "

puts ============================================================

foreach n {
    1 10 100 1000 10000 100000 1000000
} {
    test "word[string repeat { } $n]"
    test "word[string repeat { } $n]\{"

    test "\{xx yy\}[string repeat { } $n]"
    test "\{xx yy\}[string repeat { } $n]\{"
}

puts ============================================================




puts "A $ta ([expr {$ta/double($n)}])"
puts "B $tb ([expr {$tb/double($n)}])"