Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Merge 8.7 |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA3-256: |
78f530026be0e2ab929f4c69fba03edd |
User & Date: | dgp 2018-04-24 13:51:12.974 |
Context
2018-04-24
| ||
23:00 | Merge fork check-in: 0f76722982 user: andy tags: trunk | |
13:51 | Merge 8.7 check-in: 78f530026b user: dgp tags: trunk | |
13:45 | Merge 8.6 (bug-fix and test-case for Tcl_UtfAtIndex with TCL_UTF_MAX=4) ((Replacement checkin for ea... check-in: a4b3649641 user: dgp tags: core-8-branch | |
2018-04-23
| ||
15:07 | merge 8.7 check-in: aaa3ab6a72 user: jan.nijtmans tags: trunk | |
Changes
Changes to generic/tclUtf.c.
︙ | ︙ | |||
210 211 212 213 214 215 216 | * to this previously initialized DString. */ { const Tcl_UniChar *w, *wEnd; char *p, *string; int oldLength; /* | | < | | 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 | * to this previously initialized DString. */ { const Tcl_UniChar *w, *wEnd; char *p, *string; int oldLength; /* * UTF-8 string length in bytes will be <= Unicode string length * 4. */ oldLength = Tcl_DStringLength(dsPtr); Tcl_DStringSetLength(dsPtr, (oldLength + uniLength + 1) * 4); string = Tcl_DStringValue(dsPtr) + oldLength; p = string; wEnd = uniStr + uniLength; for (w = uniStr; w < wEnd; ) { p += Tcl_UniCharToUtf(*w, p); w++; |
︙ | ︙ | |||
751 752 753 754 755 756 757 | /* *--------------------------------------------------------------------------- * * Tcl_UtfAtIndex -- * * Returns a pointer to the specified character (not byte) position in | | > > > > > > > > > > | 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 | /* *--------------------------------------------------------------------------- * * Tcl_UtfAtIndex -- * * Returns a pointer to the specified character (not byte) position in * the UTF-8 string. If TCL_UTF_MAX <= 4, characters > U+FFFF count as * 2 positions, but then the pointer should never be placed between * the two positions. * * Results: * As above. * * Side effects: * None. * *--------------------------------------------------------------------------- */ const char * Tcl_UtfAtIndex( register const char *src, /* The UTF-8 string. */ register int index) /* The position of the desired character. */ { Tcl_UniChar ch = 0; int len = 1; while (index-- > 0) { len = TclUtfToUniChar(src, &ch); src += len; } #if TCL_UTF_MAX <= 4 if (!len) { /* Index points at character following High Surrogate */ src += TclUtfToUniChar(src, &ch); } #endif return src; } /* *--------------------------------------------------------------------------- * * Tcl_UtfBackslash -- |
︙ | ︙ | |||
863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 | * Iterate over the string until we hit the terminating null. */ src = dst = str; while (*src) { bytes = TclUtfToUniChar(src, &ch); upChar = ch; if (!bytes) { /* TclUtfToUniChar only returns 0 for chars > 0xffff ! */ bytes = TclUtfToUniChar(src, &ch); /* Combine surrogates */ upChar = (((upChar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000; } upChar = Tcl_UniCharToUpper(upChar); /* * To keep badly formed Utf strings from getting inflated by the * conversion (thereby causing a segfault), only copy the upper case * char to dst if its size is <= the original char. */ | > > | 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 | * Iterate over the string until we hit the terminating null. */ src = dst = str; while (*src) { bytes = TclUtfToUniChar(src, &ch); upChar = ch; #if TCL_UTF_MAX <= 4 if (!bytes) { /* TclUtfToUniChar only returns 0 for chars > 0xffff ! */ bytes = TclUtfToUniChar(src, &ch); /* Combine surrogates */ upChar = (((upChar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000; } #endif upChar = Tcl_UniCharToUpper(upChar); /* * To keep badly formed Utf strings from getting inflated by the * conversion (thereby causing a segfault), only copy the upper case * char to dst if its size is <= the original char. */ |
︙ | ︙ | |||
924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 | * Iterate over the string until we hit the terminating null. */ src = dst = str; while (*src) { bytes = TclUtfToUniChar(src, &ch); lowChar = ch; if (!bytes) { /* TclUtfToUniChar only returns 0 for chars > 0xffff ! */ bytes = TclUtfToUniChar(src, &ch); /* Combine surrogates */ lowChar = (((lowChar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000; } lowChar = Tcl_UniCharToLower(lowChar); /* * To keep badly formed Utf strings from getting inflated by the * conversion (thereby causing a segfault), only copy the lower case * char to dst if its size is <= the original char. */ | > > | 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 | * Iterate over the string until we hit the terminating null. */ src = dst = str; while (*src) { bytes = TclUtfToUniChar(src, &ch); lowChar = ch; #if TCL_UTF_MAX <= 4 if (!bytes) { /* TclUtfToUniChar only returns 0 for chars > 0xffff ! */ bytes = TclUtfToUniChar(src, &ch); /* Combine surrogates */ lowChar = (((lowChar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000; } #endif lowChar = Tcl_UniCharToLower(lowChar); /* * To keep badly formed Utf strings from getting inflated by the * conversion (thereby causing a segfault), only copy the lower case * char to dst if its size is <= the original char. */ |
︙ | ︙ | |||
988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 | */ src = dst = str; if (*src) { bytes = TclUtfToUniChar(src, &ch); titleChar = ch; if (!bytes) { /* TclUtfToUniChar only returns 0 for chars > 0xffff ! */ bytes = TclUtfToUniChar(src, &ch); /* Combine surrogates */ titleChar = (((titleChar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000; } titleChar = Tcl_UniCharToTitle(titleChar); if (bytes < TclUtfCount(titleChar)) { memcpy(dst, src, (size_t) bytes); dst += bytes; } else { dst += Tcl_UniCharToUtf(titleChar, dst); } src += bytes; } while (*src) { bytes = TclUtfToUniChar(src, &ch); lowChar = ch; if (!bytes) { /* TclUtfToUniChar only returns 0 for chars > 0xffff ! */ bytes = TclUtfToUniChar(src, &ch); /* Combine surrogates */ lowChar = (((lowChar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000; } lowChar = Tcl_UniCharToLower(lowChar); if (bytes < TclUtfCount(lowChar)) { memcpy(dst, src, (size_t) bytes); dst += bytes; } else { dst += Tcl_UniCharToUtf(lowChar, dst); | > > > > | 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 | */ src = dst = str; if (*src) { bytes = TclUtfToUniChar(src, &ch); titleChar = ch; #if TCL_UTF_MAX <= 4 if (!bytes) { /* TclUtfToUniChar only returns 0 for chars > 0xffff ! */ bytes = TclUtfToUniChar(src, &ch); /* Combine surrogates */ titleChar = (((titleChar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000; } #endif titleChar = Tcl_UniCharToTitle(titleChar); if (bytes < TclUtfCount(titleChar)) { memcpy(dst, src, (size_t) bytes); dst += bytes; } else { dst += Tcl_UniCharToUtf(titleChar, dst); } src += bytes; } while (*src) { bytes = TclUtfToUniChar(src, &ch); lowChar = ch; #if TCL_UTF_MAX <= 4 if (!bytes) { /* TclUtfToUniChar only returns 0 for chars > 0xffff ! */ bytes = TclUtfToUniChar(src, &ch); /* Combine surrogates */ lowChar = (((lowChar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000; } #endif lowChar = Tcl_UniCharToLower(lowChar); if (bytes < TclUtfCount(lowChar)) { memcpy(dst, src, (size_t) bytes); dst += bytes; } else { dst += Tcl_UniCharToUtf(lowChar, dst); |
︙ | ︙ |
Changes to tests/string.test.
︙ | ︙ | |||
27 28 29 30 31 32 33 | proc makeShared {s} {uplevel 1 [list lappend copy $s]; return $s} # Some tests require the testobj command testConstraint testobj [expr {[info commands testobj] ne {}}] testConstraint testindexobj [expr {[info commands testindexobj] ne {}}] testConstraint testevalex [expr {[info commands testevalex] ne {}}] | | | 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 | proc makeShared {s} {uplevel 1 [list lappend copy $s]; return $s} # Some tests require the testobj command testConstraint testobj [expr {[info commands testobj] ne {}}] testConstraint testindexobj [expr {[info commands testindexobj] ne {}}] testConstraint testevalex [expr {[info commands testevalex] ne {}}] testConstraint tip389 [expr {[string length \U010000] == 2}] # Used for constraining memory leak tests testConstraint memory [llength [info commands memory]] if {[testConstraint memory]} { proc getbytes {} { set lines [split [memory info] \n] return [lindex $lines 3 3] |
︙ | ︙ | |||
483 484 485 486 487 488 489 | } -match glob -result {1 {*}} test string-5.19.$noComp {string index, bytearray object out of bounds} { run {string index [binary format I* {0x50515253 0x52}] -1} } {} test string-5.20.$noComp {string index, bytearray object out of bounds} { run {string index [binary format I* {0x50515253 0x52}] 20} } {} | | | | 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 | } -match glob -result {1 {*}} test string-5.19.$noComp {string index, bytearray object out of bounds} { run {string index [binary format I* {0x50515253 0x52}] -1} } {} test string-5.20.$noComp {string index, bytearray object out of bounds} { run {string index [binary format I* {0x50515253 0x52}] 20} } {} test string-5.21.$noComp {string index, surrogates, bug [11ae2be95dac9417]} tip389 { run {list [string index a\U100000b 1] [string index a\U100000b 2] [string index a\U100000b 3]} } [list \U100000 {} b] proc largest_int {} { # This will give us what the largest valid int on this machine is, # so we can test for overflow properly below on >32 bit systems set int 1 |
︙ | ︙ | |||
1480 1481 1482 1483 1484 1485 1486 | list $input_hex $rxBuffer_hex $rxCRC_hex } {000341 000341 0341} test string-12.22.$noComp {string range, shimmering binary/index} { set s 0000000001 binary scan $s a* x run {string range $s $s end} } 000000001 | | | 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 | list $input_hex $rxBuffer_hex $rxCRC_hex } {000341 000341 0341} test string-12.22.$noComp {string range, shimmering binary/index} { set s 0000000001 binary scan $s a* x run {string range $s $s end} } 000000001 test string-12.23.$noComp {string range, surrogates, bug [11ae2be95dac9417]} tip389 { run {list [string range a\U100000b 1 1] [string range a\U100000b 2 2] [string range a\U100000b 3 3]} } [list \U100000 {} b] test string-13.1.$noComp {string repeat} { list [catch {run {string repeat}} msg] $msg } {1 {wrong # args: should be "string repeat string count"}} test string-13.2.$noComp {string repeat} { |
︙ | ︙ | |||
1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 | } "Abcabc\xe7\xe7" test string-17.7.$noComp {string totitle, unicode} { run {string totitle \u01f3BCabc\xc7\xe7} } "\u01f2bcabc\xe7\xe7" test string-17.8.$noComp {string totitle, compiled} { lindex [run {string totitle [list aa bb [list cc]]}] 0 } Aa test string-18.1.$noComp {string trim} { list [catch {run {string trim}} msg] $msg } {1 {wrong # args: should be "string trim string ?chars?"}} test string-18.2.$noComp {string trim} { list [catch {run {string trim a b c}} msg] $msg } {1 {wrong # args: should be "string trim string ?chars?"}} | > > > > | 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 | } "Abcabc\xe7\xe7" test string-17.7.$noComp {string totitle, unicode} { run {string totitle \u01f3BCabc\xc7\xe7} } "\u01f2bcabc\xe7\xe7" test string-17.8.$noComp {string totitle, compiled} { lindex [run {string totitle [list aa bb [list cc]]}] 0 } Aa test string-17.9.$noComp {string totitle, surrogates, bug [11ae2be95dac9417]} tip389 { run {list [string totitle a\U118c0c 1 1] [string totitle a\U118c0c 2 2] \ [string totitle a\U118c0c 3 3]} } [list a\U118a0c a\U118c0C a\U118c0C] test string-18.1.$noComp {string trim} { list [catch {run {string trim}} msg] $msg } {1 {wrong # args: should be "string trim string ?chars?"}} test string-18.2.$noComp {string trim} { list [catch {run {string trim a b c}} msg] $msg } {1 {wrong # args: should be "string trim string ?chars?"}} |
︙ | ︙ |