Tcl Source Code

Check-in [3d8301e3c6]
Login
Bounty program for improvements to Tcl and certain Tcl packages.
Tcl 2019 Conference, Houston/TX, US, Nov 4-8
Send your abstracts to [email protected]
or submit via the online form by Sep 9.

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Bug-fix in Tcl_UtfAtIndex (for TCL_UTF_MAX=4 only). With test-case (in "string totitle") demonstrating the bug.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core-8-6-branch
Files: files | file ages | folders
SHA3-256: 3d8301e3c681db466d87afdd47755bbccaa58d6d409f0b1353bfa1bd927f2328
User & Date: jan.nijtmans 2018-04-23 23:23:00
Context
2018-04-25
11:48
Doc typo fix from Andy Goth. check-in: bbad47db82 user: dgp tags: core-8-6-branch
2018-04-24
13:45
Merge 8.6 (bug-fix and test-case for Tcl_UtfAtIndex with TCL_UTF_MAX=4) ((Replacement checkin for ea... check-in: a4b3649641 user: dgp tags: core-8-branch
2018-04-23
23:32
Merge 8.6 (bug-fix and test-case for Tcl_UtfAtIndex with TCL_UTF_MAX=4) check-in: 567e61b329 user: jan.nijtmans tags: mistake
23:23
Bug-fix in Tcl_UtfAtIndex (for TCL_UTF_MAX=4 only). With test-case (in "string totitle") demonstrati... check-in: 3d8301e3c6 user: jan.nijtmans tags: core-8-6-branch
14:56
Add some state to encodings, so we can do better surrogate handling for TCL_UTF_MAX >= 4. Backported... check-in: c41cbc5340 user: jan.nijtmans tags: core-8-6-branch
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to generic/tclUtf.c.

758
759
760
761
762
763
764

765
766






767
768

769
770
771
772
773
774
775
const char *
Tcl_UtfAtIndex(
    register const char *src,	/* The UTF-8 string. */
    register int index)		/* The position of the desired character. */
{
    Tcl_UniChar ch = 0;


    while (index-- > 0) {






	src += TclUtfToUniChar(src, &ch);
    }

    return src;
}
 
/*
 *---------------------------------------------------------------------------
 *
 * Tcl_UtfBackslash --






>


>
>
>
>
>
>


>







758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
const char *
Tcl_UtfAtIndex(
    register const char *src,	/* The UTF-8 string. */
    register int index)		/* The position of the desired character. */
{
    Tcl_UniChar ch = 0;
    int len = 1;

    while (index-- > 0) {
	len = TclUtfToUniChar(src, &ch);
	src += len;
    }
#if TCL_UTF_MAX == 4
     if (!len) {
	/* Index points at character following High Surrogate */
	src += TclUtfToUniChar(src, &ch);
    }
#endif
    return src;
}
 
/*
 *---------------------------------------------------------------------------
 *
 * Tcl_UtfBackslash --

Changes to tests/string.test.

20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
...
287
288
289
290
291
292
293



294
295
296
297
298
299
300
....
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
....
1473
1474
1475
1476
1477
1478
1479




1480
1481
1482
1483
1484
1485
1486
::tcltest::loadTestedCommands
catch [list package require -exact Tcltest [info patchlevel]]

# Some tests require the testobj command

testConstraint testobj [expr {[info commands testobj] != {}}]
testConstraint testindexobj [expr {[info commands testindexobj] != {}}]
testConstraint fullutf [expr {[format %c 0x010000] != "\ufffd"}]

# Used for constraining memory leak tests
testConstraint memory [llength [info commands memory]]

test string-1.1 {error conditions} {
    list [catch {string gorp a b} msg] $msg
} {1 {unknown or ambiguous subcommand "gorp": must be bytelength, cat, compare, equal, first, index, is, last, length, map, match, range, repeat, replace, reverse, tolower, totitle, toupper, trim, trimleft, trimright, wordend, or wordstart}}
................................................................................
} -match glob -result {1 {*invalid octal number*}}
test string-5.19 {string index, bytearray object out of bounds} {
    string index [binary format I* {0x50515253 0x52}] -1
} {}
test string-5.20 {string index, bytearray object out of bounds} {
    string index [binary format I* {0x50515253 0x52}] 20
} {}





proc largest_int {} {
    # This will give us what the largest valid int on this machine is,
    # so we can test for overflow properly below on >32 bit systems
    set int 1
    set exp 7; # assume we get at least 8 bits
................................................................................
    list $input_hex $rxBuffer_hex $rxCRC_hex
} {000341 000341 0341}
test string-12.22 {string range, shimmering binary/index} {
    set s 0000000001
    binary scan $s a* x
    string range $s $s end
} 000000001
test string-12.23 {string range, surrogates, bug [11ae2be95dac9417]} fullutf {
    list [string range a\U100000b 1 1] [string range a\U100000b 2 2] [string range a\U100000b 3 3]
} [list \U100000 {} b]

test string-13.1 {string repeat} {
    list [catch {string repeat} msg] $msg
} {1 {wrong # args: should be "string repeat string count"}}
test string-13.2 {string repeat} {
................................................................................
} "Abcabc\xe7\xe7"
test string-17.7 {string totitle, unicode} {
    string totitle \u01f3BCabc\xc7\xe7
} "\u01f2bcabc\xe7\xe7"
test string-17.8 {string totitle, compiled} {
    lindex [string totitle [list aa bb [list cc]]] 0
} Aa





test string-18.1 {string trim} {
    list [catch {string trim} msg] $msg
} {1 {wrong # args: should be "string trim string ?chars?"}}
test string-18.2 {string trim} {
    list [catch {string trim a b c} msg] $msg
} {1 {wrong # args: should be "string trim string ?chars?"}}






|







 







>
>
>







 







|







 







>
>
>
>







20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
...
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
....
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
....
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
::tcltest::loadTestedCommands
catch [list package require -exact Tcltest [info patchlevel]]

# Some tests require the testobj command

testConstraint testobj [expr {[info commands testobj] != {}}]
testConstraint testindexobj [expr {[info commands testindexobj] != {}}]
testConstraint tip389 [expr {[string length \U010000] == 2}]

# Used for constraining memory leak tests
testConstraint memory [llength [info commands memory]]

test string-1.1 {error conditions} {
    list [catch {string gorp a b} msg] $msg
} {1 {unknown or ambiguous subcommand "gorp": must be bytelength, cat, compare, equal, first, index, is, last, length, map, match, range, repeat, replace, reverse, tolower, totitle, toupper, trim, trimleft, trimright, wordend, or wordstart}}
................................................................................
} -match glob -result {1 {*invalid octal number*}}
test string-5.19 {string index, bytearray object out of bounds} {
    string index [binary format I* {0x50515253 0x52}] -1
} {}
test string-5.20 {string index, bytearray object out of bounds} {
    string index [binary format I* {0x50515253 0x52}] 20
} {}
test string-5.21 {string index, surrogates, bug [11ae2be95dac9417]} tip389 {
    list [string index a\U100000b 1] [string index a\U100000b 2] [string index a\U100000b 3]
} [list \U100000 {} b]


proc largest_int {} {
    # This will give us what the largest valid int on this machine is,
    # so we can test for overflow properly below on >32 bit systems
    set int 1
    set exp 7; # assume we get at least 8 bits
................................................................................
    list $input_hex $rxBuffer_hex $rxCRC_hex
} {000341 000341 0341}
test string-12.22 {string range, shimmering binary/index} {
    set s 0000000001
    binary scan $s a* x
    string range $s $s end
} 000000001
test string-12.23 {string range, surrogates, bug [11ae2be95dac9417]} tip389 {
    list [string range a\U100000b 1 1] [string range a\U100000b 2 2] [string range a\U100000b 3 3]
} [list \U100000 {} b]

test string-13.1 {string repeat} {
    list [catch {string repeat} msg] $msg
} {1 {wrong # args: should be "string repeat string count"}}
test string-13.2 {string repeat} {
................................................................................
} "Abcabc\xe7\xe7"
test string-17.7 {string totitle, unicode} {
    string totitle \u01f3BCabc\xc7\xe7
} "\u01f2bcabc\xe7\xe7"
test string-17.8 {string totitle, compiled} {
    lindex [string totitle [list aa bb [list cc]]] 0
} Aa
test string-17.9 {string totitle, surrogates, bug [11ae2be95dac9417]} tip389 {
    list [string totitle a\U118c0c 1 1] [string totitle a\U118c0c 2 2] \
	[string totitle a\U118c0c 3 3]
} [list a\U118a0c a\U118c0C a\U118c0C]

test string-18.1 {string trim} {
    list [catch {string trim} msg] $msg
} {1 {wrong # args: should be "string trim string ?chars?"}}
test string-18.2 {string trim} {
    list [catch {string trim a b c} msg] $msg
} {1 {wrong # args: should be "string trim string ?chars?"}}