Index: generic/tclParse.c ================================================================== --- generic/tclParse.c +++ generic/tclParse.c @@ -866,20 +866,31 @@ if (count == 2) { /* * No hexdigits -> This is just "u". */ result = 'u'; - } else if (((result & 0xFC00) == 0xD800) && (count == 6) +#if TCL_UTF_MAX > 3 + } else if ((result & 0xF800) == 0xD800) { + result = 0xFFFD; +#else + } else if ((result & 0xFC00) == 0xDC00) { + result = 0xFFFD; + } else if ((result & 0xFC00) == 0xD800) { + if ((count == 6) && (p[5] == '\\') && (p[6] == 'u') && (numBytes >= 10)) { - /* If high surrogate is immediately followed by a low surrogate - * escape, combine them into one character. */ - int low; - int count2 = ParseHex(p+7, 4, &low); - if ((count2 == 4) && ((low & 0xFC00) == 0xDC00)) { - result = ((result & 0x3FF)<<10 | (low & 0x3FF)) + 0x10000; - count += count2 + 2; - } + /* If high surrogate is immediately followed by a low surrogate + * escape, combine them into one character. */ + int low; + int count2 = ParseHex(p+7, 4, &low); + if ((count2 == 4) && ((low & 0xFC00) == 0xDC00)) { + result = ((result & 0x3FF)<<10 | (low & 0x3FF)) + 0x10000; + count += count2 + 2; + break; + } + } + result = 0xFFFD; +#endif } break; case 'U': count += ParseHex(p+1, (numBytes > 9) ? 8 : numBytes-2, &result); if (count == 2) { Index: generic/tclUtf.c ================================================================== --- generic/tclUtf.c +++ generic/tclUtf.c @@ -66,11 +66,11 @@ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* Tcl_UtfCharComplete() might point to 2nd byte of valid 4-byte sequence */ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /* End of "continuation byte section" */ - 2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, #if TCL_UTF_MAX > 3 4,4,4,4,4, #else 1,1,1,1,1, @@ -85,25 +85,19 @@ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* Tcl_UtfCharComplete() might point to 2nd byte of valid 4-byte sequence */ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /* End of "continuation byte section" */ - 2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, #if TCL_UTF_MAX > 3 4,4,4,4,4, #else 3,3,3,3,3, #endif 1,1,1,1,1,1,1,1,1,1,1 }; - -/* - * Functions used only in this module. - */ - -static int Invalid(const char *src); /* *--------------------------------------------------------------------------- * * TclUtfCount -- @@ -132,66 +126,10 @@ if (((unsigned)(ch - 0x10000) <= 0xFFFFF)) { return 4; } return 3; } - -/* - *--------------------------------------------------------------------------- - * - * Invalid -- - * - * Given a pointer to a two-byte prefix of a well-formed UTF-8 byte - * sequence (a lead byte followed by a trail byte) this routine - * examines those two bytes to determine whether the sequence is - * invalid in UTF-8. This might be because it is an overlong - * encoding, or because it encodes something out of the proper range. - * - * Given a pointer to the bytes \xF8 or \xFC , this routine will - * try to read beyond the end of the "bounds" table. Callers must - * prevent this. - * - * Given a pointer to something else (an ASCII byte, a trail byte, - * or another byte that can never begin a valid byte sequence such - * as \xF5) this routine returns false. That makes the routine poorly - * named, as it does not detect and report all invalid sequences. - * - * Callers have to take care that this routine does something useful - * for their needs. - * - * Results: - * A boolean. - *--------------------------------------------------------------------------- - */ - -static const unsigned char bounds[28] = { - 0x80, 0x80, /* \xC0 accepts \x80 only */ - 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, - 0x80, 0xBF, /* (\xC4 - \xDC) -- all sequences valid */ - 0xA0, 0xBF, /* \xE0\x80 through \xE0\x9F are invalid prefixes */ - 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, /* (\xE4 - \xEC) -- all valid */ - 0x90, 0xBF, /* \xF0\x80 through \xF0\x8F are invalid prefixes */ - 0x80, 0x8F /* \xF4\x90 and higher are invalid prefixes */ -}; - -static int -Invalid( - const char *src) /* Points to lead byte of a UTF-8 byte sequence */ -{ - unsigned char byte = UCHAR(*src); - int index; - - if ((byte & 0xC3) == 0xC0) { - /* Only lead bytes 0xC0, 0xE0, 0xF0, 0xF4 need examination */ - index = (byte - 0xC0) >> 1; - if (UCHAR(src[1]) < bounds[index] || UCHAR(src[1]) > bounds[index+1]) { - /* Out of bounds - report invalid. */ - return 1; - } - } - return 0; -} /* *--------------------------------------------------------------------------- * * Tcl_UniCharToUtf -- @@ -499,10 +437,13 @@ */ *chPtr = (((byte & 0x0F) << 12) | ((src[1] & 0x3F) << 6) | (src[2] & 0x3F)); if (*chPtr > 0x7FF) { + if ((*chPtr & 0xF800) == 0xD800) { + *chPtr = 0xFFFD; + } return 3; } } /* @@ -595,10 +536,13 @@ */ *chPtr = (((byte & 0x0F) << 12) | ((src[1] & 0x3F) << 6) | (src[2] & 0x3F)); if (*chPtr > 0x7FF) { + if ((*chPtr & 0xF800) == 0xD800) { + *chPtr = 0xFFFD; + } return 3; } } /* @@ -964,22 +908,20 @@ const char * Tcl_UtfNext( const char *src) /* The current location in the string. */ { - int left; - const char *next; + int left = totalBytes[UCHAR(*src)]; + const char *next = src + 1; if (((*src) & 0xC0) == 0x80) { if ((((*++src) & 0xC0) == 0x80) && (((*++src) & 0xC0) == 0x80)) { ++src; } return src; } - left = totalBytes[UCHAR(*src)]; - next = src + 1; while (--left) { if ((*next & 0xC0) != 0x80) { /* * src points to non-trail byte; We ran out of trail bytes * before the needs of the lead byte were satisfied. @@ -987,20 +929,10 @@ */ return src + 1; } next++; } - /* - * Call Invalid() here only if required conditions are met: - * src[0] is known a lead byte. - * src[1] is known a trail byte. - * Especially important to prevent calls when src[0] == '\xF8' or '\xFC' - * See tests utf-6.37 through utf-6.43 through valgrind or similar tool. - */ - if ((next == src + 1) || Invalid(src)) { - return src + 1; - } return next; } /* *--------------------------------------------------------------------------- @@ -1023,103 +955,36 @@ *--------------------------------------------------------------------------- */ const char * Tcl_UtfPrev( - const char *src, /* A location in a UTF-8 string. */ - const char *start) /* Pointer to the beginning of the string */ -{ - int trailBytesSeen = 0; /* How many trail bytes have been verified? */ - const char *fallback = src - 1; - /* If we cannot find a lead byte that might - * start a prefix of a valid UTF byte sequence, - * we will fallback to a one-byte back step */ - const char *look = fallback; - /* Start search at the fallback position */ - - /* Quick boundary case exit. */ - if (fallback <= start) { - return start; - } - - do { - unsigned char byte = UCHAR(look[0]); - - if (byte < 0x80) { - /* - * Single byte character. Either this is a correct previous - * character, or it is followed by at least one trail byte - * which indicates a malformed sequence. In either case the - * correct result is to return the fallback. - */ - return fallback; - } - if (byte >= 0xC0) { - /* Non-trail byte; May be multibyte lead. */ - - if ((trailBytesSeen == 0) - /* - * We've seen no trailing context to use to check - * anything. From what we know, this non-trail byte - * is a prefix of a previous character, and accepting - * it (the fallback) is correct. - */ - - || (trailBytesSeen >= complete[byte])) { - /* - * That is, (1 + trailBytesSeen > needed). - * We've examined more bytes than needed to complete - * this lead byte. No matter about well-formedness or - * validity, the sequence starting with this lead byte - * will never include the fallback location, so we must - * return the fallback location. See test utf-7.17 - */ - return fallback; - } - - /* - * trailBytesSeen > 0, so we can examine look[1] safely. - * Use that capability to screen out invalid sequences. - */ - - if (Invalid(look)) { - /* Reject */ - return fallback; - } - return (const char *)look; - } - - /* We saw a trail byte. */ - trailBytesSeen++; - - if ((const char *)look == start) { - /* - * Do not read before the start of the string - * - * If we get here, we've examined bytes at every location - * >= start and < src and all of them are trail bytes, - * including (*start). We need to return our fallback - * and exit this loop before we run past the start of the string. - */ - return fallback; - } - - /* Continue the search backwards... */ + const char *src, /* The current location in the string. */ + const char *start) /* Pointer to the beginning of the string, to + * avoid going backwards too far. */ +{ + const char *look; + int i, byte; + + look = --src; + for (i = 0; i < 4; i++) { + if (look < start) { + if (src < start) { + src = start; + } + break; + } + byte = UCHAR(*look); + if ((byte & 0xC0) != 0x80) { + if (look + totalBytes[UCHAR(byte)] < src) { + src = look + totalBytes[UCHAR(byte)]; + break; + } + return look + ((i > 0) && (totalBytes[UCHAR(byte)] == 1)); + } look--; - } while (trailBytesSeen < TCL_UTF_MAX); - - /* - * We've seen TCL_UTF_MAX trail bytes, so we know there will not be a - * properly formed byte sequence to find, and we can stop looking, - * accepting the fallback (for TCL_UTF_MAX > 3) or just go back as - * far as we can. - */ -#if TCL_UTF_MAX > 3 - return fallback; -#else - return src - TCL_UTF_MAX; -#endif + } + return src; } /* *--------------------------------------------------------------------------- * Index: tests/encoding.test ================================================================== --- tests/encoding.test +++ tests/encoding.test @@ -36,11 +36,12 @@ testConstraint testencoding [llength [info commands testencoding]] testConstraint testbytestring [llength [info commands testbytestring]] testConstraint teststringbytes [llength [info commands teststringbytes]] testConstraint exec [llength [info commands exec]] testConstraint testgetencpath [llength [info commands testgetencpath]] - +testConstraint tip389 [expr {[string length [format %c 0x10000]] eq 2}] + # TclInitEncodingSubsystem is tested by the rest of this file # TclFinalizeEncodingSubsystem is not currently tested test encoding-1.1 {Tcl_GetEncoding: system encoding} -setup { set old [encoding system] @@ -323,76 +324,76 @@ } c080 test encoding-15.4 {UtfToUtfProc emoji character input} -body { set x \xED\xA0\xBD\xED\xB8\x82 set y [encoding convertfrom utf-8 \xED\xA0\xBD\xED\xB8\x82] list [string length $x] $y -} -result "6 \U1F602" +} -result "6 \uFFFD\uFFFD" test encoding-15.5 {UtfToUtfProc emoji character input} { set x \xF0\x9F\x98\x82 set y [encoding convertfrom utf-8 \xF0\x9F\x98\x82] list [string length $x] $y } "4 \U1F602" -test encoding-15.6 {UtfToUtfProc emoji character output} { +test encoding-15.6 {UtfToUtfProc emoji character output} tip389 { set x \uDE02\uD83D\uDE02\uD83D set y [encoding convertto utf-8 \uDE02\uD83D\uDE02\uD83D] binary scan $y H* z list [string length $y] $z -} {10 edb882f09f9882eda0bd} +} {10 efbfbdf09f9882efbfbd} test encoding-15.7 {UtfToUtfProc emoji character output} { set x \uDE02\uD83D\uD83D set y [encoding convertto utf-8 \uDE02\uD83D\uD83D] binary scan $y H* z list [string length $x] [string length $y] $z -} {3 9 edb882eda0bdeda0bd} +} {3 9 efbfbdefbfbdefbfbd} test encoding-15.8 {UtfToUtfProc emoji character output} { set x \uDE02\uD83D\xE9 set y [encoding convertto utf-8 \uDE02\uD83D\xE9] binary scan $y H* z list [string length $x] [string length $y] $z -} {3 8 edb882eda0bdc3a9} +} {3 8 efbfbdefbfbdc3a9} test encoding-15.9 {UtfToUtfProc emoji character output} { set x \uDE02\uD83DX set y [encoding convertto utf-8 \uDE02\uD83DX] binary scan $y H* z list [string length $x] [string length $y] $z -} {3 7 edb882eda0bd58} +} {3 7 efbfbdefbfbd58} test encoding-15.10 {UtfToUtfProc high surrogate character output} { set x \uDE02\xE9 set y [encoding convertto utf-8 \uDE02\xE9] binary scan $y H* z list [string length $x] [string length $y] $z -} {2 5 edb882c3a9} +} {2 5 efbfbdc3a9} test encoding-15.11 {UtfToUtfProc low surrogate character output} { set x \uDA02\xE9 set y [encoding convertto utf-8 \uDA02\xE9] binary scan $y H* z list [string length $x] [string length $y] $z -} {2 5 eda882c3a9} +} {2 5 efbfbdc3a9} test encoding-15.12 {UtfToUtfProc high surrogate character output} { set x \uDE02Y set y [encoding convertto utf-8 \uDE02Y] binary scan $y H* z list [string length $x] [string length $y] $z -} {2 4 edb88259} +} {2 4 efbfbd59} test encoding-15.13 {UtfToUtfProc low surrogate character output} { set x \uDA02Y set y [encoding convertto utf-8 \uDA02Y] binary scan $y H* z list [string length $x] [string length $y] $z -} {2 4 eda88259} +} {2 4 efbfbd59} test encoding-15.14 {UtfToUtfProc high surrogate character output} { set x \uDE02 set y [encoding convertto utf-8 \uDE02] binary scan $y H* z list [string length $x] [string length $y] $z -} {1 3 edb882} +} {1 3 efbfbd} test encoding-15.15 {UtfToUtfProc low surrogate character output} { set x \uDA02 set y [encoding convertto utf-8 \uDA02] binary scan $y H* z list [string length $x] [string length $y] $z -} {1 3 eda882} +} {1 3 efbfbd} test encoding-15.16 {UtfToUtfProc: Invalid 4-byte UTF-8, see [ed29806ba]} { set x \xF0\xA0\xA1\xC2 set y [encoding convertfrom utf-8 \xF0\xA0\xA1\xC2] list [string length $x] $y } "4 \xF0\xA0\xA1\xC2" @@ -410,13 +411,13 @@ test encoding-16.2 {Utf16ToUtfProc} -body { set val [encoding convertfrom utf-16 "\xD8\xD8\xDC\xDC"] list $val [format %x [scan $val %c]] } -result "\U460DC 460dc" test encoding-16.3 {Utf16ToUtfProc} -body { - set val [encoding convertfrom utf-16 "\xDC\xDC"] + set val [encoding convertfrom utf-16 "\xD4\xD4"] list $val [format %x [scan $val %c]] -} -result "\uDCDC dcdc" +} -result "\uD4D4 d4d4" test encoding-16.4 {Ucs2ToUtfProc} -body { set val [encoding convertfrom ucs-2 NN] list $val [format %x [scan $val %c]] } -result "\u4E4E 4e4e" test encoding-16.4 {Ucs2ToUtfProc} -body { @@ -426,15 +427,15 @@ test encoding-17.1 {UtfToUtf16Proc} -body { encoding convertto utf-16 "\U460DC" } -result "\xD8\xD8\xDC\xDC" test encoding-17.2 {UtfToUtf16Proc} -body { - encoding convertto utf-16 "\uDCDC" -} -result "\xDC\xDC" + encoding convertto utf-16 "\uD4D4" +} -result "\xD4\xD4" test encoding-17.3 {UtfToUtf16Proc} -body { - encoding convertto utf-16 "\uD8D8" -} -result "\xD8\xD8" + encoding convertto utf-16 "\uD0D0" +} -result "\xD0\xD0" test encoding-17.4 {UtfToUcs2Proc} -body { encoding convertfrom utf-16 [encoding convertto ucs-2 "\U460DC"] } -result "\uFFFD" test encoding-18.1 {TableToUtfProc} { Index: tests/utf.test ================================================================== --- tests/utf.test +++ tests/utf.test @@ -19,11 +19,10 @@ testConstraint ucs2 [expr {[format %c 0x010000] eq "\uFFFD"}] testConstraint fullutf [expr {[format %c 0x010000] ne "\uFFFD"}] testConstraint utf16 [expr {[string length [format %c 0x10000]] == 2}] testConstraint ucs4 [expr {[testConstraint fullutf] && [string length [format %c 0x10000]] == 1}] -testConstraint ucs2_utf16 [expr {![testConstraint ucs4]}] testConstraint Uesc [expr {"\U0041" eq "A"}] testConstraint pre388 [expr {"\x741" eq "A"}] testConstraint pairsTo4bytes [expr {[llength [info commands teststringbytes]] && [string length [teststringbytes \uD83D\uDCA9]] == 4}] @@ -51,26 +50,26 @@ } 1 test utf-1.4 {Tcl_UniCharToUtf: 3 byte sequences} testbytestring { expr {"\u4E4E" eq [testbytestring \xE4\xB9\x8E]} } 1 test utf-1.5 {Tcl_UniCharToUtf: overflowed Tcl_UniChar} testbytestring { - expr {[format %c 0x110000] eq [testbytestring \xEF\xBF\xBD]} + expr {[format %c 0x110000] eq "\uFFFD"} } 1 test utf-1.6 {Tcl_UniCharToUtf: negative Tcl_UniChar} testbytestring { - expr {[format %c -1] eq [testbytestring \xEF\xBF\xBD]} + expr {[format %c -1] eq "\uFFFD"} } 1 test utf-1.7.0 {Tcl_UniCharToUtf: 4 byte sequences} {fullutf Uesc testbytestring} { expr {"\U014E4E" eq [testbytestring \xF0\x94\xB9\x8E]} } 1 test utf-1.7.1 {Tcl_UniCharToUtf: 4 byte sequences} {ucs2 Uesc testbytestring} { expr {"\U014E4E" eq [testbytestring \xF0\x94\xB9\x8E]} } 0 test utf-1.8 {Tcl_UniCharToUtf: 3 byte sequence, high surrogate} testbytestring { - expr {"\uD842" eq [testbytestring \xED\xA1\x82]} + expr {"\uD842" eq "\uFFFD"} } 1 test utf-1.9 {Tcl_UniCharToUtf: 3 byte sequence, low surrogate} testbytestring { - expr {"\uDC42" eq [testbytestring \xED\xB1\x82]} + expr {"\uDC42" eq "\uFFFD"} } 1 test utf-1.10 {Tcl_UniCharToUtf: 3 byte sequence, high surrogate} testbytestring { expr {[format %c 0xD842] eq [testbytestring \xED\xA1\x82]} } 1 test utf-1.11 {Tcl_UniCharToUtf: 3 byte sequence, low surrogate} testbytestring { @@ -78,12 +77,12 @@ } 1 test utf-1.12 {Tcl_UniCharToUtf: 4 byte sequence, high/low surrogate} {pairsTo4bytes testbytestring} { expr {"\uD842\uDC42" eq [testbytestring \xF0\xA0\xA1\x82]} } 1 test utf-1.13 {Tcl_UniCharToUtf: Invalid surrogate} {Uesc testbytestring} { - expr {"\UD842" eq [testbytestring \xEF\xBF\xBD]} -} 1 + expr {"\uD842" eq "\uFFFD"} + } 1 test utf-2.1 {Tcl_UtfToUniChar: low ascii} { string length "abc" } 3 test utf-2.2 {Tcl_UtfToUniChar: naked trail bytes} testbytestring { @@ -102,22 +101,28 @@ string length [testbytestring \xE2\xA2] } 2 test utf-2.7 {Tcl_UtfToUniChar: lead (3-byte) followed by 2 trail} testbytestring { string length [testbytestring \xE4\xB9\x8E] } 1 -test utf-2.8.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring ucs2_utf16} { +test utf-2.8.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring ucs2} { string length [testbytestring \xF0\x90\x80\x80] -} 2 +} 4 test utf-2.8.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring ucs4} { string length [testbytestring \xF0\x90\x80\x80] } 1 +test utf-2.8.2 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring utf16} { + string length [testbytestring \xF0\x90\x80\x80] +} 2 test utf-2.9.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {testbytestring ucs2} { string length [testbytestring \xF4\x8F\xBF\xBF] -} 2 +} 4 test utf-2.9.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {Uesc ucs4} { string length \U10FFFF } 1 +test utf-2.9.2 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} utf16 { + string length \uDBFF\uDFFF +} 2 test utf-2.10 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, underflow} testbytestring { string length [testbytestring \xF0\x8F\xBF\xBF] } 4 test utf-2.11 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, overflow} testbytestring { # Would decode to U+110000 but that is outside the Unicode range. @@ -143,11 +148,11 @@ testnumutfchars \x00 } 1 test utf-4.5 {Tcl_NumUtfChars: zero length, calc len} testnumutfchars { testnumutfchars "" 0 } 0 -test utf-4.6 {Tcl_NumUtfChars: length 1, calc len} {testnumutfchars testbytestring} { +test utf-4.6 {Tcl_NumUtfChars: length 1, calc len} testnumutfchars { testnumutfchars \xA2 end } 1 test utf-4.7 {Tcl_NumUtfChars: long string, calc len} {testnumutfchars testbytestring} { testnumutfchars abc\xA2[testbytestring \xE4\xB9\x8E\xA2\x4E] end } 7 @@ -164,14 +169,17 @@ test utf-4.11 {Tcl_NumUtfChars: 3 bytes of 4-byte UTF-8 characater} {testnumutfchars testbytestring} { testnumutfchars [testbytestring \xF0\x9F\x92\xA9] end-1 } 3 test utf-4.12.0 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring ucs2} { testnumutfchars [testbytestring \xF0\x9F\x92\xA9] end -} 2 +} 4 test utf-4.12.1 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring ucs4} { testnumutfchars [testbytestring \xF0\x9F\x92\xA9] end } 1 +test utf-4.12.2 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring utf16} { + testnumutfchars [testbytestring \xF0\x9F\x92\xA9] end +} 2 test utf-4.13 {Tcl_NumUtfChars: end of string} {testnumutfchars testbytestring} { testnumutfchars foobar[testbytestring \xF2\xC2\xA0] end } 8 test utf-4.14 {Tcl_NumUtfChars: 3 bytes of 4-byte UTF-8 characater} {testnumutfchars testbytestring} { testnumutfchars [testbytestring \xF4\x90\x80\x80] end-1 @@ -394,11 +402,11 @@ testutfnext \u8820[testbytestring \xF8] } 3 test utf-6.68 {Tcl_UtfNext} {testutfnext testbytestring} { testutfnext [testbytestring \xF2\xA0\xA0]G } 1 -test utf-6.69.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} { +test utf-6.69.0 {Tcl_UtfNext} {testutfnext testbytestring utf16} { testutfnext [testbytestring \xF2\xA0\xA0\xA0] } 1 test utf-6.69.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} { testutfnext [testbytestring \xF2\xA0\xA0\xA0] } 4 @@ -412,41 +420,41 @@ testutfnext [testbytestring \xF2\xA0\xA0\xF2] } 1 test utf-6.73 {Tcl_UtfNext} {testutfnext testbytestring} { testutfnext [testbytestring \xF2\xA0\xA0\xF8] } 1 -test utf-6.74.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} { +test utf-6.74.0 {Tcl_UtfNext} {testutfnext testbytestring utf16} { testutfnext [testbytestring \xF2\xA0\xA0\xA0]G } 1 test utf-6.74.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} { testutfnext [testbytestring \xF2\xA0\xA0\xA0]G } 4 -test utf-6.75.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} { +test utf-6.75.0 {Tcl_UtfNext} {testutfnext testbytestring utf16} { testutfnext [testbytestring \xF2\xA0\xA0\xA0\xA0] } 1 test utf-6.75.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} { testutfnext [testbytestring \xF2\xA0\xA0\xA0\xA0] } 4 -test utf-6.76.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} { +test utf-6.76.0 {Tcl_UtfNext} {testutfnext testbytestring utf16} { testutfnext [testbytestring \xF2\xA0\xA0\xA0\xD0] } 1 test utf-6.76.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} { testutfnext [testbytestring \xF2\xA0\xA0\xA0\xD0] } 4 -test utf-6.77.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} { +test utf-6.77.0 {Tcl_UtfNext} {testutfnext testbytestring utf16} { testutfnext [testbytestring \xF2\xA0\xA0\xA0\xE8] } 1 test utf-6.77.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} { testutfnext [testbytestring \xF2\xA0\xA0\xA0\xE8] } 4 -test utf-6.78.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} { +test utf-6.78.0 {Tcl_UtfNext} {testutfnext testbytestring utf16} { testutfnext [testbytestring \xF2\xA0\xA0\xA0\xF2] } 1 test utf-6.78.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} { testutfnext [testbytestring \xF2\xA0\xA0\xA0\xF2] } 4 -test utf-6.79.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} { +test utf-6.79.0 {Tcl_UtfNext} {testutfnext testbytestring utf16} { testutfnext [testbytestring \xF2\xA0\xA0\xA0G\xF8] } 1 test utf-6.79.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} { testutfnext [testbytestring \xF2\xA0\xA0\xA0G\xF8] } 4 @@ -453,27 +461,30 @@ test utf-6.80 {Tcl_UtfNext - overlong sequences} testutfnext { testutfnext \x00 } 2 test utf-6.81 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring} { testutfnext [testbytestring \xC0\x81] -} 1 +} 2 test utf-6.82 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring} { testutfnext [testbytestring \xC1\x80] -} 1 +} 2 test utf-6.83 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring} { testutfnext [testbytestring \xC2\x80] } 2 test utf-6.84 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring} { testutfnext [testbytestring \xE0\x80\x80] -} 1 +} 3 test utf-6.85 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring} { testutfnext [testbytestring \xE0\xA0\x80] } 3 -test utf-6.86 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring} { +test utf-6.86.0 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring utf16} { + testutfnext [testbytestring \xF0\x80\x80\x80] +} 1 +test utf-6.86.1 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring ucs4} { testutfnext [testbytestring \xF0\x80\x80\x80] -} 1 -test utf-6.87.0 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring ucs2_utf16} { +} 4 +test utf-6.87.0 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring utf16} { testutfnext [testbytestring \xF0\x90\x80\x80] } 1 test utf-6.87.1 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring ucs4} { testutfnext [testbytestring \xF0\x90\x80\x80] } 4 @@ -481,19 +492,22 @@ testutfnext [testbytestring \xA0\xA0\x00] } 2 test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext testbytestring} { testutfnext [testbytestring \x80\x80\x00] } 2 -test utf-6.90.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext testbytestring ucs2_utf16} { - testutfnext [testbytestring \xF4\x8F\xBF\xBF] +test utf-6.90.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext utf16} { + testutfnext \uDBFF\uDFFF } 1 -test utf-6.90.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext testbytestring ucs4} { - testutfnext [testbytestring \xF4\x8F\xBF\xBF] +test utf-6.90.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext ucs4} { + testutfnext \U10FFFF } 4 -test utf-6.91 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext testbytestring} { +test utf-6.91.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext testbytestring utf16} { + testutfnext [testbytestring \xF4\x90\x80\x80] +} 1 +test utf-6.91.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext testbytestring ucs4} { testutfnext [testbytestring \xF4\x90\x80\x80] -} 1 +} 4 test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} {testutfnext testbytestring} { testutfnext [testbytestring \xA0\xA0\xA0] } 3 test utf-6.93 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext testbytestring} { testutfnext [testbytestring \x80\x80\x80] @@ -566,26 +580,26 @@ testutfprev A[testbytestring \xF8\xA0\xA0\xA0] 3 } 2 test utf-7.9.2 {Tcl_UtfPrev} {testutfprev testbytestring} { testutfprev A[testbytestring \xF8\xA0\xF8\xA0] 3 } 2 -test utf-7.10.0 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} { - testutfprev A[testbytestring \xF2\xA0] -} 2 -test utf-7.10.1 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} { - testutfprev A[testbytestring \xF2\xA0] -} 1 -test utf-7.10.2 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} { - testutfprev A[testbytestring \xF2\xA0\xA0\xA0] 3 -} 2 -test utf-7.10.3 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} { - testutfprev A[testbytestring \xF2\xA0\xA0\xA0] 3 -} 1 -test utf-7.10.4 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} { - testutfprev A[testbytestring \xF2\xA0\xF8\xA0] 3 -} 2 -test utf-7.10.5 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} { +test utf-7.10.0 {Tcl_UtfPrev} {testutfprev testbytestring utf16} { + testutfprev A[testbytestring \xF2\xA0] +} 2 +test utf-7.10.1 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { + testutfprev A[testbytestring \xF2\xA0] +} 1 +test utf-7.10.2 {Tcl_UtfPrev} {testutfprev testbytestring utf16} { + testutfprev A[testbytestring \xF2\xA0\xA0\xA0] 3 +} 2 +test utf-7.10.3 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { + testutfprev A[testbytestring \xF2\xA0\xA0\xA0] 3 +} 1 +test utf-7.10.4 {Tcl_UtfPrev} {testutfprev testbytestring utf16} { + testutfprev A[testbytestring \xF2\xA0\xF8\xA0] 3 +} 2 +test utf-7.10.5 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xF2\xA0\xF8\xA0] 3 } 1 test utf-7.11 {Tcl_UtfPrev} {testutfprev testbytestring} { testutfprev A[testbytestring \xE8\xA0] } 1 @@ -607,42 +621,42 @@ test utf-7.12.2 {Tcl_UtfPrev} {testutfprev testbytestring} { testutfprev A[testbytestring \xD0\xA0\xF8\xA0] 3 } 1 test utf-7.13 {Tcl_UtfPrev} {testutfprev testbytestring} { testutfprev A[testbytestring \xA0\xA0] -} 2 +} 1 test utf-7.13.1 {Tcl_UtfPrev} {testutfprev testbytestring} { testutfprev A[testbytestring \xA0\xA0\xA0\xA0] 3 -} 2 +} 1 test utf-7.13.2 {Tcl_UtfPrev} {testutfprev testbytestring} { testutfprev A[testbytestring \xA0\xA0\xF8\xA0] 3 -} 2 +} 1 test utf-7.14 {Tcl_UtfPrev} {testutfprev testbytestring} { testutfprev A[testbytestring \xF8\xA0\xA0] -} 3 +} 2 test utf-7.14.1 {Tcl_UtfPrev} {testutfprev testbytestring} { testutfprev A[testbytestring \xF8\xA0\xA0\xA0] 4 -} 3 +} 2 test utf-7.14.2 {Tcl_UtfPrev} {testutfprev testbytestring} { testutfprev A[testbytestring \xF8\xA0\xA0\xF8] 4 -} 3 -test utf-7.15.0 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} { - testutfprev A[testbytestring \xF2\xA0\xA0] -} 3 -test utf-7.15.1 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} { - testutfprev A[testbytestring \xF2\xA0\xA0] -} 1 -test utf-7.15.2 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} { - testutfprev A[testbytestring \xF2\xA0\xA0\xA0] 4 -} 3 -test utf-7.15.3 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} { - testutfprev A[testbytestring \xF2\xA0\xA0\xA0] 4 -} 1 -test utf-7.15.4 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} { - testutfprev A[testbytestring \xF2\xA0\xA0\xF8] 4 -} 3 -test utf-7.15.5 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} { +} 2 +test utf-7.15.0 {Tcl_UtfPrev} {testutfprev testbytestring utf16} { + testutfprev A[testbytestring \xF2\xA0\xA0] +} 2 +test utf-7.15.1 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { + testutfprev A[testbytestring \xF2\xA0\xA0] +} 1 +test utf-7.15.2 {Tcl_UtfPrev} {testutfprev testbytestring utf16} { + testutfprev A[testbytestring \xF2\xA0\xA0\xA0] 4 +} 2 +test utf-7.15.3 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { + testutfprev A[testbytestring \xF2\xA0\xA0\xA0] 4 +} 1 +test utf-7.15.4 {Tcl_UtfPrev} {testutfprev testbytestring utf16} { + testutfprev A[testbytestring \xF2\xA0\xA0\xF8] 4 +} 2 +test utf-7.15.5 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xF2\xA0\xA0\xF8] 4 } 1 test utf-7.16 {Tcl_UtfPrev} testutfprev { testutfprev A\u8820 } 1 @@ -652,80 +666,92 @@ test utf-7.16.2 {Tcl_UtfPrev} {testutfprev testbytestring} { testutfprev A\u8820[testbytestring \xF8] 4 } 1 test utf-7.17 {Tcl_UtfPrev} {testutfprev testbytestring} { testutfprev A[testbytestring \xD0\xA0\xA0] -} 3 +} 1 test utf-7.17.1 {Tcl_UtfPrev} {testutfprev testbytestring} { testutfprev A[testbytestring \xD0\xA0\xA0\xA0] 4 -} 3 +} 1 test utf-7.17.2 {Tcl_UtfPrev} {testutfprev testbytestring} { testutfprev A[testbytestring \xD0\xA0\xA0\xF8] 4 -} 3 -test utf-7.18.0 {Tcl_UtfPrev} {testutfprev testbytestring utf16} { +} 1 +test utf-7.18.0 {Tcl_UtfPrev} {testutfprev testbytestring} { testutfprev A[testbytestring \xA0\xA0\xA0] } 1 -test utf-7.18.1 {Tcl_UtfPrev} {testutfprev testbytestring utf16} { +test utf-7.18.1 {Tcl_UtfPrev} {testutfprev testbytestring} { testutfprev A[testbytestring \xA0\xA0\xA0\xA0] 4 } 1 -test utf-7.18.2 {Tcl_UtfPrev} {testutfprev testbytestring utf16} { +test utf-7.18.2 {Tcl_UtfPrev} {testutfprev testbytestring} { testutfprev A[testbytestring \xA0\xA0\xA0\xF8] 4 } 1 -test utf-7.19 {Tcl_UtfPrev} {testutfprev testbytestring utf16} { +test utf-7.19 {Tcl_UtfPrev} {testutfprev testbytestring} { testutfprev A[testbytestring \xF8\xA0\xA0\xA0] } 2 -test utf-7.20 {Tcl_UtfPrev} {testutfprev testbytestring utf16} { +test utf-7.20.0 {Tcl_UtfPrev} {testutfprev testbytestring utf16} { + testutfprev A[testbytestring \xF2\xA0\xA0\xA0] +} 2 +test utf-7.20.1 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} { testutfprev A[testbytestring \xF2\xA0\xA0\xA0] -} 2 -test utf-7.21 {Tcl_UtfPrev} {testutfprev testbytestring utf16} { +} 1 +test utf-7.21 {Tcl_UtfPrev} {testutfprev testbytestring} { testutfprev A\u8820[testbytestring \xA0] -} 2 -test utf-7.22 {Tcl_UtfPrev} {testutfprev testbytestring utf16} { +} 1 +test utf-7.22 {Tcl_UtfPrev} {testutfprev testbytestring} { testutfprev A[testbytestring \xD0\xA0\xA0\xA0] -} 2 -test utf-7.23 {Tcl_UtfPrev} {testutfprev testbytestring utf16} { +} 3 +test utf-7.23 {Tcl_UtfPrev} {testutfprev testbytestring} { testutfprev A[testbytestring \xA0\xA0\xA0\xA0] -} 2 +} 4 test utf-7.24 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { testutfprev A[testbytestring \xC0\x81] -} 2 +} 1 test utf-7.25 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { testutfprev A[testbytestring \xC0\x81] 2 } 1 test utf-7.26 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { testutfprev A[testbytestring \xE0\x80\x80] -} 3 +} 1 test utf-7.27 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { testutfprev A[testbytestring \xE0\x80] -} 2 +} 1 test utf-7.27.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { testutfprev A[testbytestring \xE0\x80\x80] 3 -} 2 +} 1 test utf-7.28 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { testutfprev A[testbytestring \xE0] } 1 test utf-7.28.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { testutfprev A[testbytestring \xE0\x80\x80] 2 } 1 -test utf-7.29 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring utf16} { +test utf-7.29.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring utf16} { + testutfprev A[testbytestring \xF0\x80\x80\x80] +} 2 +test utf-7.29.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xF0\x80\x80\x80] +} 1 +test utf-7.30.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring utf16} { + testutfprev A[testbytestring \xF0\x80\x80\x80] 4 } 2 -test utf-7.30 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { +test utf-7.30.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xF0\x80\x80\x80] 4 -} 3 -test utf-7.31 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { +} 1 +test utf-7.31.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring utf16} { + testutfprev A[testbytestring \xF0\x80\x80\x80] 3 +} 2 +test utf-7.31.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xF0\x80\x80\x80] 3 -} 2 +} 1 test utf-7.32 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { testutfprev A[testbytestring \xF0\x80\x80\x80] 2 } 1 test utf-7.33 {Tcl_UtfPrev -- overlong sequence} testutfprev { testutfprev A\x00 } 1 test utf-7.34 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { testutfprev A[testbytestring \xC1\x80] -} 2 +} 1 test utf-7.35 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { testutfprev A[testbytestring \xC2\x80] } 1 test utf-7.36 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { testutfprev A[testbytestring \xE0\xA0\x80] @@ -734,23 +760,26 @@ testutfprev A[testbytestring \xE0\xA0\x80] 3 } 1 test utf-7.38 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { testutfprev A[testbytestring \xE0\xA0\x80] 2 } 1 -test utf-7.39 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring utf16} { +test utf-7.39.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring utf16} { + testutfprev A[testbytestring \xF0\x90\x80\x80] +} 2 +test utf-7.39.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xF0\x90\x80\x80] -} 2 -test utf-7.40.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs2} { +} 1 +test utf-7.40.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring utf16} { testutfprev A[testbytestring \xF0\x90\x80\x80] 4 -} 3 -test utf-7.40.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring fullutf} { +} 2 +test utf-7.40.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xF0\x90\x80\x80] 4 } 1 -test utf-7.41.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs2} { +test utf-7.41.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring utf16} { testutfprev A[testbytestring \xF0\x90\x80\x80] 3 } 2 -test utf-7.41.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring fullutf} { +test utf-7.41.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xF0\x90\x80\x80] 3 } 1 test utf-7.42 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { testutfprev A[testbytestring \xF0\x90\x80\x80] 2 } 1 @@ -761,50 +790,65 @@ testutfprev [testbytestring \xA0\xA0] } 1 test utf-7.45 {Tcl_UtfPrev -- no lead byte at start} {testutfprev testbytestring} { testutfprev [testbytestring \xA0\xA0\xA0] } 2 -test utf-7.46 {Tcl_UtfPrev -- no lead byte at start} {testutfprev testbytestring utf16} { +test utf-7.46.0 {Tcl_UtfPrev -- no lead byte at start} {testutfprev testbytestring utf16} { + testutfprev [testbytestring \xA0\xA0\xA0\xA0] +} 3 +test utf-7.46.1 {Tcl_UtfPrev -- no lead byte at start} {testutfprev testbytestring ucs4} { testutfprev [testbytestring \xA0\xA0\xA0\xA0] -} 1 +} 3 test utf-7.47 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} {testutfprev testbytestring} { testutfprev [testbytestring \xE8\xA0] } 0 -test utf-7.47.1 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} testutfprev { +test utf-7.47.1 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} {testutfprev testbytestring} { testutfprev \u8820 2 } 0 test utf-7.47.2 {Tcl_UtfPrev, pointing to 3th byte of 3-byte invalid sequence} {testutfprev testbytestring} { testutfprev [testbytestring \xE8\xA0\x00] 2 } 0 test utf-7.48.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring utf16} { - testutfprev A[testbytestring \xF4\x8F\xBF\xBF] -} 2 -test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs2} { - testutfprev A[testbytestring \xF4\x8F\xBF\xBF] 4 -} 3 -test utf-7.48.2 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring fullutf} { - testutfprev A[testbytestring \xF4\x8F\xBF\xBF] 4 -} 1 -test utf-7.48.3 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs2} { - testutfprev A[testbytestring \xF4\x8F\xBF\xBF] 3 -} 2 -test utf-7.48.4 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring fullutf} { - testutfprev A[testbytestring \xF4\x8F\xBF\xBF] 3 -} 1 -test utf-7.48.5 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring} { - testutfprev A[testbytestring \xF4\x8F\xBF\xBF] 2 + testutfprev A\uDBFF\uDFFF +} 2 +test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs4} { + testutfprev A\U10FFFF +} 1 +test utf-7.48.2 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring utf16} { + testutfprev A\uDBFF\uDFFF 4 +} 2 +test utf-7.48.3 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs4} { + testutfprev A\U10FFFF 4 +} 1 +test utf-7.48.4 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring utf16} { + testutfprev A\uDBFF\uDFFF 3 +} 2 +test utf-7.48.5 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs4} { + testutfprev A\U10FFFF 3 +} 1 +test utf-7.48.6 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring} { + testutfprev A\U10FFFF 2 } 1 test utf-7.49.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring utf16} { testutfprev A[testbytestring \xF4\x90\x80\x80] } 2 -test utf-7.49.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring} { +test utf-7.49.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs4} { + testutfprev A[testbytestring \xF4\x90\x80\x80] +} 1 +test utf-7.49.2 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring utf16} { + testutfprev A[testbytestring \xF4\x90\x80\x80] 4 +} 2 +test utf-7.49.3 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xF4\x90\x80\x80] 4 -} 3 -test utf-7.49.2 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring} { +} 1 +test utf-7.49.4 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring utf16} { testutfprev A[testbytestring \xF4\x90\x80\x80] 3 } 2 -test utf-7.49.3 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring} { +test utf-7.49.5 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs4} { + testutfprev A[testbytestring \xF4\x90\x80\x80] 3 +} 1 +test utf-7.49.6 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring} { testutfprev A[testbytestring \xF4\x90\x80\x80] 2 } 1 test utf-8.1 {Tcl_UniCharAtIndex: index = 0} { string index abcd 0 @@ -815,11 +859,11 @@ test utf-8.3 {Tcl_UniCharAtIndex: index > 0} { string index abcd 2 } c test utf-8.4 {Tcl_UniCharAtIndex: index > 0} { string index \u4E4E\u25A\xFF\u543 2 -} \xFF +} \uFF test utf-8.5.0 {Tcl_UniCharAtIndex: high surrogate} ucs2 { string index \uD842 0 } \uD842 test utf-8.5.1 {Tcl_UniCharAtIndex: high surrogate} ucs4 { string index \uD842 0 @@ -833,29 +877,29 @@ test utf-8.7.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { string index \uD83D\uDE00G 0 } \uD83D test utf-8.7.1 {Tcl_UniCharAtIndex: Emoji} ucs4 { string index \uD83D\uDE00G 0 -} \U1F600 +} \uFFFD test utf-8.7.2 {Tcl_UniCharAtIndex: Emoji} utf16 { string index \uD83D\uDE00G 0 } \U1F600 test utf-8.8.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { string index \uD83D\uDE00G 1 } \uDE00 test utf-8.8.1 {Tcl_UniCharAtIndex: Emoji} ucs4 { string index \uD83D\uDE00G 1 -} G +} \uFFFD test utf-8.8.2 {Tcl_UniCharAtIndex: Emoji} utf16 { string index \uD83D\uDE00G 1 } {} test utf-8.9.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { string index \uD83D\uDE00G 2 } G test utf-8.9.1 {Tcl_UniCharAtIndex: Emoji} ucs4 { string index \uD83D\uDE00G 2 -} {} +} G test utf-8.9.2 {Tcl_UniCharAtIndex: Emoji} utf16 { string index \uD83D\uDE00G 2 } G test utf-8.10.0 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs2} { string index \U1F600G 0 @@ -894,29 +938,29 @@ test utf-9.3.0 {Tcl_UtfAtIndex: index = 0, Emoji} ucs2 { string range \uD83D\uDE00G 0 0 } \uD83D test utf-9.3.1 {Tcl_UtfAtIndex: index = 0, Emoji} ucs4 { string range \uD83D\uDE00G 0 0 -} \U1F600 +} \uFFFD test utf-9.3.2 {Tcl_UtfAtIndex: index = 0, Emoji} utf16 { string range \uD83D\uDE00G 0 0 } \U1F600 test utf-9.4.0 {Tcl_UtfAtIndex: index > 0, Emoji} ucs2 { string range \uD83D\uDE00G 1 1 } \uDE00 test utf-9.4.1 {Tcl_UtfAtIndex: index > 0, Emoji} ucs4 { string range \uD83D\uDE00G 1 1 -} G +} \uFFFD test utf-9.4.2 {Tcl_UtfAtIndex: index > 0, Emoji} utf16 { string range \uD83D\uDE00G 1 1 } {} test utf-9.5.0 {Tcl_UtfAtIndex: index > 0, Emoji} ucs2 { string range \uD83D\uDE00G 2 2 } G test utf-9.5.1 {Tcl_UtfAtIndex: index > 0, Emoji} ucs4 { string range \uD83D\uDE00G 2 2 -} {} +} G test utf-9.5.2 {Tcl_UtfAtIndex: index > 0, Emoji} utf16 { string range \uD83D\uDE00G 2 2 } G test utf-9.6.0 {Tcl_UtfAtIndex: index = 0, Emoji} {Uesc ucs2} { string range \U1f600G 0 0 @@ -1304,20 +1348,11 @@ UniCharCaseCmpTest < \uFFFF \U10000 {Uesc ucs4} UniCharCaseCmpTest > [format %c 0x10000] \uFFFF ucs4 UniCharCaseCmpTest > \U10000 \uFFFF {Uesc ucs4} -test utf-26.1 {Tcl_UniCharDString} -setup { - testobj freeallvars -} -constraints {teststringobj testbytestring} -cleanup { - testobj freeallvars -} -body { - teststringobj set 1 foo - teststringobj maxchars 1 - teststringobj append 1 [testbytestring barsoom\xF2\xC2\x80] 10 - scan [string index [teststringobj get 1] 11] %c -} -result 128 + unset count rename UniCharCaseCmpTest {}