diff -cb d:/devtools/tcl.gen/tcllib0.8/textutil/tabify.tcl ./tabify.tcl *** d:/devtools/tcl.gen/tcllib0.8/textutil/tabify.tcl Thu Nov 2 14:19:04 2000 --- ./tabify.tcl Sun Jul 1 01:11:28 2001 *************** *** 1,3 **** --- 1,58 ---- + # + # As the author of the procs 'tabify2' and 'untabify2' I suggest that the + # comments explaining their behaviour be kept in this file. + # 1) Beginners in any programming language (I am new to Tcl so I know what I + # am talking about) can profit enormously from studying 'correct' code. + # Of course comments will help a lot in this regard. + # 2) Many problems newbies face can be solved by directing them towards + # available libraries - after all, libraries have been written to solve + # recurring problems. Then they can just use them, or have a closer look + # to see and to discover how things are done the 'Tcl way'. + # 3) And if ever a proc from a library should be less than perfect, having + # comments explaining the behaviour of the code will surely help. + # + # This said, I will welcome any error reports or suggestions for improvements + # (especially on the 'doing things the Tcl way' aspect). + # + # Use of these sources is licensed under the same conditions as is Tcl. + # + # June 2001, Helmut Giese (hgiese@ratiosoft.com) + # + # ---------------------------------------------------------------------------- + # + # The original procs 'tabify' and 'untabify' each work with complete blocks + # of $num spaces ('num' holding the tab size). While this is certainly useful + # in some circumstances, it does not reflect the way an editor works: + # Counting columns from 1, assuming a tab size of 8 and entering '12345' + # followed by a tab, you expect to advance to column 9. Your editor might + # put a tab into the file or 3 spaces, depending on its configuration. + # Now, on 'tabifying' you will expect to see those 3 spaces converted to a + # tab (and on the other hand expect the tab *at this position* to be + # converted to 3 spaces). + # + # This behaviour is mimicked by the new procs 'tabify2' and 'untabify2'. + # Both have one feature in common: They accept multi-line strings (a whole + # file if you want to) but in order to make life simpler for the programmer, + # they split the incoming string into individual lines and hand each line to + # a proc that does the real work. + # + # One design decision worth mentioning here: + # A single space is never converted to a tab even if its position would + # allow to do so. + # Single spaces occur very often, say in arithmetic expressions like + # [expr (($a + $b) * $c) < $d]. If we didn't follow the above rule we might + # need to replace one or more of them to tabs. However if the tab size gets + # changed, this expression would be formatted quite differently - which is + # probably not a good idea. + # + # 'untabifying' on the other hand might need to replace a tab with a single + # space: If the current position requires it, what else to do? + # As a consequence those two procs are unsymmetric in this aspect, but I + # couldn't think of a better solution. Could you? + # + # ---------------------------------------------------------------------------- + # + namespace eval ::textutil { namespace eval tabify { *************** *** 6,23 **** variable TabLen 8 variable TabStr [ $StrRepeat " " $TabLen ] ! namespace export tabify untabify # This will be redefined later. We need it just to let # a chance for the next import subcommand to work # proc tabify { string { num 8 } } { } proc untabify { string { num 8 } } { } } ! namespace import -force tabify::tabify tabify::untabify ! namespace export tabify untabify } --- 61,90 ---- variable TabLen 8 variable TabStr [ $StrRepeat " " $TabLen ] ! namespace export tabify untabify tabify2 untabify2 # This will be redefined later. We need it just to let # a chance for the next import subcommand to work # proc tabify { string { num 8 } } { } proc untabify { string { num 8 } } { } + proc tabify2 { string { num 8 } } { } + proc untabify2 { string { num 8 } } { } + # The proc 'untabify2' uses the following variables for efficiency. + # Since a tab can be replaced by 1 up to 'tab size' spaces, it is handy + # to have the appropriate 'space strings' available. This is the use of + # the array 'Spaces', where 'Spaces(n)' contains just 'n' spaces. + # The variable 'TabLen2' remembers the last tab size used. + + variable TabLen2 0 + variable Spaces + array set Spaces {0 ""} } ! namespace import -force tabify::tabify tabify::untabify \ ! tabify::tabify2 tabify::untabify2 ! namespace export tabify untabify tabify2 untabify2 } *************** *** 50,53 **** --- 117,271 ---- } return $TabStr + } + + + # ---------------------------------------------------------------------------- + # + # tabifyLine: Works on a single line of text, replacing 'spaces at correct + # positions' with tabs. $num is the requested tab size. + # Returns the (possibly modified) line. + # + # 'spaces at correct positions': Only spaces which 'fill the space' between + # an arbitrary position and the next tab stop can be replaced. The proc works + # backwards: + # - Set the position to start the search from ('lastPos') to 'end'. + # - Find the last occurrence of ' ' in 'line' with respect to 'lastPos'. + # - Calculate the next and the previous tab stop with respect to this ' ', + # and define the starting point for the next search. + # - The ' ' is only a candidate for replacement if + # 1) it is just one position before a tab stop *and* + # 2) there is at least one space at its left (see comment above on not + # touching an isolated space). + # Continue, if any of these conditions is not met. + # - Determine where to put the tab (that is: how many spaces to replace?) + # by stepping backwards until + # -- you hit a non-space or + # -- you are at the previous tab position + # - Do the replacement and continue. + # + proc ::textutil::tabify::tabifyLine { line num } { + + set lastPos end + while { $lastPos > 0 } { + set currPos [string last " " $line $lastPos] + if { $currPos == -1 } { + # no more spaces + break; + } + + set nextTab [expr ($currPos + $num) - ($currPos % $num)] + set prevTab [expr $nextTab - $num] + + # prepare for next round: continue at 'previous tab stop - 1' + set lastPos [expr $prevTab - 1] + + if { [expr ($currPos + 1) != $nextTab] } { + continue ;# crit. (1) + } + + if { [string index $line [expr $currPos - 1]] != " " } { + continue ;# crit. (2) + } + + # now step backwards while there are spaces + for {set pos [expr $currPos - 2]} {$pos >= $prevTab} {incr pos -1} { + if { [string index $line $pos] != " " } { + break; + } + } + + # ... and replace them + set line [string replace $line [expr $pos + 1] $currPos \t] + } + return $line + } + + + # + # Helper proc for 'untabifyLine': Checks if all needed elements of array + # 'Spaces' exist and creates the missing ones if needed. + # + proc ::textutil::tabify::checkArr { num } { + variable TabLen2 + variable Spaces + variable StrRepeat + + if { $num > $TabLen2 } { + for { set i [expr $TabLen2 + 1] } { $i <= $num } { incr i } { + set Spaces($i) [$StrRepeat " " $i] + } + set TabLen2 $num + } + } + + + # untabifyLine: Works on a single line of text, replacing tabs with enough + # spaces to get to the next tab position. + # Returns the (possibly modified) line. + # + # The procedure is straight forward: + # - Find the next tab. + # - Calculate the next tab position following it. + # - Delete the tab and insert as many spaces as needed to get there. + # + proc ::textutil::tabify::untabifyLine { line num } { + variable Spaces + + set currPos 0 + while { 1 } { + set currPos [string first \t $line $currPos] + if { $currPos == -1 } { + # no more tabs + break + } + + # how far is the next tab position ? + set dist [expr $num - ($currPos % $num)] + # replace '\t' at $currPos with $dist spaces + set line [string replace $line $currPos $currPos $Spaces($dist)] + + # set up for next round (not absolutely necessary but maybe a trifle + # more efficient) + incr currPos $dist + } + return $line + } + + + # tabify2: Replace all 'appropriate' spaces as discussed above with tabs. + # 'string' might hold any number of lines, 'num' is the requested tab size. + # Returns (possibly modified) 'string'. + # + proc ::textutil::tabify::tabify2 { string { num 8 } } { + + # split string into individual lines + set inLst [split $string \n] + + # now work on each line + foreach line $inLst { + lappend outLst [tabifyLine $line $num] + } + + # return all as one string + return [join $outLst \n] + } + + + # untabify2: Replace all tabs with the appropriate number of spaces. + # 'string' might hold any number of lines, 'num' is the requested tab size. + # Returns (possibly modified) 'string'. + # + proc ::textutil::tabify::untabify2 { string { num 8 } } { + + # assure array 'Spaces' is set up 'comme il faut' + checkArr $num + + set inLst [split $string \n] + + foreach line $inLst { + lappend outLst [untabifyLine $line $num] + } + + return [join $outLst \n] } diff -cb d:/devtools/tcl.gen/tcllib0.8/textutil/tabify.test ./tabify.test *** d:/devtools/tcl.gen/tcllib0.8/textutil/tabify.test Thu Nov 2 14:19:04 2000 --- ./tabify.test Sun Jul 1 00:50:20 2001 *************** *** 40,42 **** --- 40,124 ---- ::textutil::untabify "\t hello,\t world\t " 5 } " hello, world " + # + # Tests for version 2 of (un)tabify + # + + # + # tests 2.1 - 2.3: see how a single space (after 'hello') is not converted + # to a tab + # + test tabify-2.1 {version 2: tabify, tab size 3} { + ::textutil::tabify2 "hello world" 3 + } "hello \tworld" + + test tabify-2.2 {version 2: tabify, tab size 3, more spaces than needed} { + ::textutil::tabify2 "hello world" 3 + } "hello \t world" + + test tabify-2.3 {version 2: tabify, tab size 3, less spaces than needed} { + ::textutil::tabify2 "hello world" 3 + } "hello world" + + test tabify-2.4 {version 2: tabify, tab size 8} { + ::textutil::tabify2 "hello world" + } "hello\tworld" + + test tabify-2.5 {version 2: tabify, tab size 8, more spaces than needed} { + ::textutil::tabify2 "hello world" + } "hello\t world" + + test tabify-2.6 {version 2: tabify, tab size 8, less spaces than needed} { + ::textutil::tabify2 "hello world" + } "hello world" + + # + # tests 2.7 & 2.8: 'end of line' (\n or not) of last line is preserved + # + test tabify-2.7 {version 2: tabify, tab size 8, multi line} { + ::textutil::tabify2 "line 1 \n line 2\nline 3 \n line 4" + } "line 1\t\n\tline 2\nline 3\t\n\tline 4" + + test tabify-2.8 {version 2: tabify, tab size 8, multi line} { + ::textutil::tabify2 "line 1 \n line 2\nline 3 \n line 4\n" + } "line 1\t\n\tline 2\nline 3\t\n\tline 4\n" + + + # + # untabify + # + test tabify-3.1 {version 2: untabify, tab size 3} { + ::textutil::untabify2 "hello \tworld" 3 + } "hello world" + + test tabify-3.2 {version 2: untabify, tab size 3, tab to single space} { + ::textutil::untabify2 "hello\t\tworld" 3 + } "hello world" + + # + # The change in tab size from 3 to 8 (silently) results in building the + # appropriate 'Spaces' strings (in 3.5 'Spaces(6)' is needed) + # + test tabify-3.3 {version 2: untabify, tab size 8} { + ::textutil::untabify2 "hello\tworld" + } "hello world" + + test tabify-3.4 {version 2: untabify, tab size 8, mix of tab and spaces} { + ::textutil::untabify2 "hello \tworld" + } "hello world" + + test tabify-3.5 {version 2: untabify, tab size 8, requires 'long' space string} { + ::textutil::untabify2 "hello\tmy\tworld" + } "hello my world" + + + # + # tests 3.6 & 3.7: 'end of line' (\n or not) of last line is preserved + # + test tabify-3.6 {version 2: untabify, tab size 8, multi line} { + ::textutil::untabify2 "line 1\t\n\tline 2\nline 3\t\n\tline 4" + } "line 1 \n line 2\nline 3 \n line 4" + + test tabify-3.7 {version 2: untabify, tab size 8, multi line} { + ::textutil::untabify2 "line 1\t\n\tline 2\nline 3\t\n\tline 4\n" + } "line 1 \n line 2\nline 3 \n line 4\n"