Index: .github/workflows/linux-build.yml ================================================================== --- .github/workflows/linux-build.yml +++ .github/workflows/linux-build.yml @@ -14,11 +14,10 @@ runs-on: ubuntu-22.04 strategy: matrix: cfgopt: - "" - - "CFLAGS=-DTCL_UTF_MAX=3" - "CFLAGS=-DTCL_NO_DEPRECATED=1" - "--disable-shared" - "--enable-symbols" - "--enable-symbols=mem" - "--enable-symbols=all" Index: .github/workflows/win-build.yml ================================================================== --- .github/workflows/win-build.yml +++ .github/workflows/win-build.yml @@ -59,11 +59,10 @@ working-directory: win strategy: matrix: cfgopt: - "" - - "CFLAGS=-DTCL_UTF_MAX=3" - "CFLAGS=-DTCL_NO_DEPRECATED=1" - "--disable-shared" - "--enable-symbols" - "--enable-symbols=mem" - "--enable-symbols=all" Index: .travis.yml ================================================================== --- .travis.yml +++ .travis.yml @@ -18,17 +18,10 @@ os: linux dist: focal compiler: gcc env: - BUILD_DIR=unix - - name: "Linux/GCC/Shared: UTF_MAX=4" - os: linux - dist: focal - compiler: gcc - env: - - BUILD_DIR=unix - - CFGOPT=CFLAGS=-DTCL_UTF_MAX=4 - name: "Linux/GCC/Shared: NO_DEPRECATED" os: linux dist: focal compiler: gcc env: @@ -213,19 +206,10 @@ - cd ${BUILD_DIR} install: [] script: - cmd.exe //C vcvarsall.bat x64 '&&' nmake '-f' makefile.vc all tcltest - cmd.exe //C vcvarsall.bat x64 '&&' nmake '-f' makefile.vc test - - name: "Windows/MSVC/Shared: UTF_MAX=4" - os: windows - compiler: cl - env: *vcenv - before_install: *vcpreinst - install: [] - script: - - cmd.exe //C vcvarsall.bat x64 '&&' nmake 'OPTS=utf16' '-f' makefile.vc all tcltest - - cmd.exe //C vcvarsall.bat x64 '&&' nmake 'OPTS=utf16' '-f' makefile.vc test - name: "Windows/MSVC/Shared: NO_DEPRECATED" os: windows compiler: cl env: *vcenv before_install: *vcpreinst @@ -268,19 +252,10 @@ before_install: *vcpreinst install: [] script: - cmd.exe //C vcvarsall.bat x86 '&&' nmake '-f' makefile.vc all tcltest - cmd.exe //C vcvarsall.bat x86 '&&' nmake '-f' makefile.vc test - - name: "Windows/MSVC-x86/Shared: UTF_MAX=4" - os: windows - compiler: cl - env: *vcenv - before_install: *vcpreinst - install: [] - script: - - cmd.exe //C vcvarsall.bat x86 '&&' nmake 'OPTS=utf16' '-f' makefile.vc all tcltest - - cmd.exe //C vcvarsall.bat x86 '&&' nmake 'OPTS=utf16' '-f' makefile.vc test - name: "Windows/MSVC-x86/Shared: NO_DEPRECATED" os: windows compiler: cl env: *vcenv before_install: *vcpreinst @@ -324,17 +299,10 @@ - CFGOPT="--enable-64bit" before_install: &makepreinst - touch generic/tclStubInit.c generic/tclOOStubInit.c generic/tclOOScript.h - choco install -y make zip - cd ${BUILD_DIR} - - name: "Windows/GCC/Shared: UTF_MAX=4" - os: windows - compiler: gcc - env: - - BUILD_DIR=win - - CFGOPT="--enable-64bit CFLAGS=-DTCL_UTF_MAX=4" - before_install: *makepreinst - name: "Windows/GCC/Shared: NO_DEPRECATED" os: windows compiler: gcc env: - BUILD_DIR=win @@ -366,17 +334,10 @@ os: windows compiler: gcc env: - BUILD_DIR=win before_install: *makepreinst - - name: "Windows/GCC-x86/Shared: UTF_MAX=4" - os: windows - compiler: gcc - env: - - BUILD_DIR=win - - CFGOPT="CFLAGS=-DTCL_UTF_MAX=4" - before_install: *makepreinst - name: "Windows/GCC-x86/Shared: NO_DEPRECATED" os: windows compiler: gcc env: - BUILD_DIR=win Index: generic/regcustom.h ================================================================== --- generic/regcustom.h +++ generic/regcustom.h @@ -86,19 +86,13 @@ typedef unsigned uchr; /* Unsigned type that will hold a chr. */ typedef int celt; /* Type to hold chr, or NOCELT */ #define NOCELT (-1) /* Celt value which is not valid chr */ #define CHR(c) (UCHAR(c)) /* Turn char literal into chr literal */ #define DIGITVAL(c) ((c)-'0') /* Turn chr digit into its value */ -#if TCL_UTF_MAX > 3 #define CHRBITS 32 /* Bits in a chr; must not use sizeof */ #define CHR_MIN 0x00000000 /* Smallest and largest chr; the value */ #define CHR_MAX 0x10FFFF /* CHR_MAX-CHR_MIN+1 should fit in uchr */ -#else -#define CHRBITS 16 /* Bits in a chr; must not use sizeof */ -#define CHR_MIN 0x0000 /* Smallest and largest chr; the value */ -#define CHR_MAX 0xFFFF /* CHR_MAX-CHR_MIN+1 should fit in uchr */ -#endif /* * Functions operating on chr. */ Index: generic/tclBinary.c ================================================================== --- generic/tclBinary.c +++ generic/tclBinary.c @@ -433,11 +433,11 @@ int ucs4; irPtr = TclFetchInternalRep(objPtr, &tclByteArrayType); baPtr = GET_BYTEARRAY(irPtr); nonbyte = TclUtfAtIndex(Tcl_GetString(objPtr), baPtr->bad); - TclUtfToUCS4(nonbyte, &ucs4); + Tcl_UtfToUniChar(nonbyte, &ucs4); Tcl_SetObjResult(interp, Tcl_ObjPrintf( "expected byte sequence but character %d " "was '%1s' (U+%06X)", baPtr->bad, nonbyte, ucs4)); Tcl_SetErrorCode(interp, "TCL", "VALUE", "BYTES", NULL); @@ -472,11 +472,11 @@ int ucs4; irPtr = TclFetchInternalRep(objPtr, &tclByteArrayType); baPtr = GET_BYTEARRAY(irPtr); nonbyte = TclUtfAtIndex(Tcl_GetString(objPtr), baPtr->bad); - TclUtfToUCS4(nonbyte, &ucs4); + Tcl_UtfToUniChar(nonbyte, &ucs4); Tcl_SetObjResult(interp, Tcl_ObjPrintf( "expected byte sequence but character %d " "was '%1s' (U+%06X)", baPtr->bad, nonbyte, ucs4)); Tcl_SetErrorCode(interp, "TCL", "VALUE", "BYTES", NULL); @@ -2716,11 +2716,11 @@ badChar: if (pure) { ucs4 = c; } else { - TclUtfToUCS4((const char *)(data - 1), &ucs4); + Tcl_UtfToUniChar((const char *)(data - 1), &ucs4); } TclDecrRefCount(resultObj); Tcl_SetObjResult(interp, Tcl_ObjPrintf( "invalid hexadecimal digit \"%c\" (U+%06X) at position %d", ucs4, ucs4, (int) (data - datastart - 1))); @@ -3173,11 +3173,11 @@ badUu: if (pure) { ucs4 = c; } else { - TclUtfToUCS4((const char *)(data - 1), &ucs4); + Tcl_UtfToUniChar((const char *)(data - 1), &ucs4); } Tcl_SetObjResult(interp, Tcl_ObjPrintf( "invalid uuencode character \"%c\" (U+%06X) at position %d", ucs4, ucs4, (int) (data - datastart - 1))); Tcl_SetErrorCode(interp, "TCL", "BINARY", "DECODE", "INVALID", NULL); @@ -3347,11 +3347,11 @@ /* The decoder is byte-oriented. If we saw a byte that's not a * valid member of the base64 alphabet, it could be the lead byte * of a multi-byte character. */ /* Safe because we know data is NUL-terminated */ - TclUtfToUCS4((const char *)(data - 1), &ucs4); + Tcl_UtfToUniChar((const char *)(data - 1), &ucs4); } Tcl_SetObjResult(interp, Tcl_ObjPrintf( "invalid base64 character \"%c\" (U+%06X) at position %d", ucs4, ucs4, (int) (data - datastart - 1))); Index: generic/tclCmdIL.c ================================================================== --- generic/tclCmdIL.c +++ generic/tclCmdIL.c @@ -5429,12 +5429,12 @@ * string is at the terminating null, do a byte-wise comparison and * bail out immediately. */ if ((*left != '\0') && (*right != '\0')) { - left += TclUtfToUCS4(left, &uniLeft); - right += TclUtfToUCS4(right, &uniRight); + left += Tcl_UtfToUniChar(left, &uniLeft); + right += Tcl_UtfToUniChar(right, &uniRight); /* * Convert both chars to lower for the comparison, because * dictionary sorts are case-insensitive. Covert to lower, not * upper, so chars between Z and a will sort before A (where most Index: generic/tclCmdMZ.c ================================================================== --- generic/tclCmdMZ.c +++ generic/tclCmdMZ.c @@ -1215,11 +1215,11 @@ */ Tcl_InitHashTable(&charReuseTable, TCL_ONE_WORD_KEYS); for ( ; stringPtr < end; stringPtr += len) { - len = TclUtfToUCS4(stringPtr, &ch); + len = Tcl_UtfToUniChar(stringPtr, &ch); hPtr = Tcl_CreateHashEntry(&charReuseTable, INT2PTR(ch), &isNew); if (isNew) { TclNewStringObj(objPtr, stringPtr, len); /* @@ -1261,13 +1261,13 @@ */ splitEnd = splitChars + splitCharLen; for (element = stringPtr; stringPtr < end; stringPtr += len) { - len = TclUtfToUCS4(stringPtr, &ch); + len = Tcl_UtfToUniChar(stringPtr, &ch); for (p = splitChars; p < splitEnd; p += splitLen) { - splitLen = TclUtfToUCS4(p, &splitChar); + splitLen = Tcl_UtfToUniChar(p, &splitChar); if (ch == splitChar) { TclNewStringObj(objPtr, element, stringPtr - element); Tcl_ListObjAppendElement(NULL, listPtr, objPtr); element = stringPtr + len; break; @@ -1893,11 +1893,11 @@ } end = string1 + length1; for (; string1 < end; string1 += length2, failat++) { int ucs4; - length2 = TclUtfToUCS4(string1, &ucs4); + length2 = Tcl_UtfToUniChar(string1, &ucs4); if (!chcomp(ucs4)) { result = 0; break; } } @@ -2515,23 +2515,24 @@ } cur = 0; if (index > 0) { p = &string[index]; - (void)TclUniCharToUCS4(p, &ch); + ch = *p; for (cur = index; cur >= 0; cur--) { int delta = 0; const Tcl_UniChar *next; if (!Tcl_UniCharIsWordChar(ch)) { break; } - next = TclUCS4Prev(p, string); + next = (p > string) ? p - 1 : p; do { next += delta; - delta = TclUniCharToUCS4(next, &ch); + ch = *next; + delta = 1; } while (next + delta < p); p = next; } if (cur != index) { cur += 1; @@ -2585,11 +2586,11 @@ } if (index < length) { p = &string[index]; end = string+length; for (cur = index; p < end; cur++) { - p += TclUniCharToUCS4(p, &ch); + ch = *p++; if (!Tcl_UniCharIsWordChar(ch)) { break; } } if (cur == index) { @@ -2929,11 +2930,11 @@ } else { int first, last; const char *start, *end; Tcl_Obj *resultPtr; - length1 = Tcl_NumUtfChars(string1, length1) - 1; + length1 = TclNumUtfChars(string1, length1) - 1; if (TclGetIntForIndexM(interp,objv[2],length1, &first) != TCL_OK) { return TCL_ERROR; } if (first < 0) { first = 0; @@ -3014,11 +3015,11 @@ } else { int first, last; const char *start, *end; Tcl_Obj *resultPtr; - length1 = Tcl_NumUtfChars(string1, length1) - 1; + length1 = TclNumUtfChars(string1, length1) - 1; if (TclGetIntForIndexM(interp,objv[2],length1, &first) != TCL_OK) { return TCL_ERROR; } if (first < 0) { first = 0; @@ -3099,11 +3100,11 @@ } else { int first, last; const char *start, *end; Tcl_Obj *resultPtr; - length1 = Tcl_NumUtfChars(string1, length1) - 1; + length1 = TclNumUtfChars(string1, length1) - 1; if (TclGetIntForIndexM(interp,objv[2],length1, &first) != TCL_OK) { return TCL_ERROR; } if (first < 0) { first = 0; Index: generic/tclCompExpr.c ================================================================== --- generic/tclCompExpr.c +++ generic/tclCompExpr.c @@ -2144,17 +2144,17 @@ * have no direct relevance here. */ if (!TclIsBareword(*start) || *start == '_') { if (Tcl_UtfCharComplete(start, numBytes)) { - scanned = TclUtfToUCS4(start, &ch); + scanned = Tcl_UtfToUniChar(start, &ch); } else { char utfBytes[8]; memcpy(utfBytes, start, numBytes); utfBytes[numBytes] = '\0'; - scanned = TclUtfToUCS4(utfBytes, &ch); + scanned = Tcl_UtfToUniChar(utfBytes, &ch); } *lexemePtr = INVALID; Tcl_DecrRefCount(literal); return scanned; } Index: generic/tclDisassemble.c ================================================================== --- generic/tclDisassemble.c +++ generic/tclDisassemble.c @@ -873,11 +873,11 @@ Tcl_AppendToObj(appendObj, "\"", -1); p = stringPtr; for (; (*p != '\0') && (i < maxChars); p+=len) { int ucs4; - len = TclUtfToUCS4(p, &ucs4); + len = Tcl_UtfToUniChar(p, &ucs4); switch (ucs4) { case '"': Tcl_AppendToObj(appendObj, "\\\"", -1); i += 2; continue; @@ -1197,14 +1197,14 @@ * Convert byte offsets to character offsets; important if multibyte * characters are present in the source! */ Tcl_DictObjPut(NULL, cmd, Tcl_NewStringObj("scriptfrom", -1), - Tcl_NewWideIntObj(Tcl_NumUtfChars(codePtr->source, + Tcl_NewWideIntObj(TclNumUtfChars(codePtr->source, sourceOffset))); Tcl_DictObjPut(NULL, cmd, Tcl_NewStringObj("scriptto", -1), - Tcl_NewWideIntObj(Tcl_NumUtfChars(codePtr->source, + Tcl_NewWideIntObj(TclNumUtfChars(codePtr->source, sourceOffset + sourceLength - 1))); Tcl_DictObjPut(NULL, cmd, Tcl_NewStringObj("script", -1), Tcl_NewStringObj(codePtr->source+sourceOffset, sourceLength)); Tcl_ListObjAppendElement(NULL, commands, cmd); } Index: generic/tclEncoding.c ================================================================== --- generic/tclEncoding.c +++ generic/tclEncoding.c @@ -1636,14 +1636,14 @@ */ *errorLocPtr = result == TCL_OK ? TCL_INDEX_NONE : nBytesProcessed; } else { /* Caller wants error message on failure */ if (result != TCL_OK && interp != NULL) { - Tcl_Size pos = Tcl_NumUtfChars(srcStart, nBytesProcessed); + Tcl_Size pos = TclNumUtfChars(srcStart, nBytesProcessed); int ucs4; char buf[TCL_INTEGER_SPACE]; - TclUtfToUCS4(&srcStart[nBytesProcessed], &ucs4); + Tcl_UtfToUniChar(&srcStart[nBytesProcessed], &ucs4); snprintf(buf, sizeof(buf), "%" TCL_SIZE_MODIFIER "u", nBytesProcessed); Tcl_SetObjResult( interp, Tcl_ObjPrintf( "unexpected character at index %" TCL_SIZE_MODIFIER @@ -2585,11 +2585,11 @@ } } else if (!Tcl_UtfCharComplete(src, srcEnd - src)) { /* * Incomplete byte sequence. - * Always check before using TclUtfToUCS4. Not doing can so + * Always check before using Tcl_UtfToUniChar. Not doing can so * cause it run beyond the end of the buffer! If we happen such an * incomplete char its bytes are made to represent themselves * unless the user has explicitly asked to be told. */ @@ -2607,17 +2607,17 @@ ++src; } else { /* TCL_ENCODING_PROFILE_TCL8 */ char chbuf[2]; chbuf[0] = UCHAR(*src++); chbuf[1] = 0; - TclUtfToUCS4(chbuf, &ch); + Tcl_UtfToUniChar(chbuf, &ch); } dst += Tcl_UniCharToUtf(ch, dst); } else { int low; int isInvalid = 0; - size_t len = TclUtfToUCS4(src, &ch); + size_t len = Tcl_UtfToUniChar(src, &ch); if (flags & ENCODING_INPUT) { if ((len < 2) && (ch != 0)) { isInvalid = 1; } else if ((ch > 0xFFFF) && !(flags & ENCODING_UTF)) { isInvalid = 1; @@ -2655,11 +2655,11 @@ } if (PROFILE_REPLACE(profile)) { ch = UNICODE_REPLACE_CHAR; } else { low = ch; - len = (src <= srcEnd - 3) ? TclUtfToUCS4(src, &low) : 0; + len = (src <= srcEnd - 3) ? Tcl_UtfToUniChar(src, &low) : 0; if ((!LOW_SURROGATE(low)) || (ch & 0x400)) { if (PROFILE_STRICT(profile)) { result = TCL_CONVERT_UNKNOWN; @@ -2922,11 +2922,11 @@ } if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; break; } - len = TclUtfToUCS4(src, &ch); + len = Tcl_UtfToUniChar(src, &ch); if (SURROGATE(ch)) { if (PROFILE_STRICT(flags)) { result = TCL_CONVERT_UNKNOWN; break; } @@ -3181,11 +3181,11 @@ } if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; break; } - len = TclUtfToUCS4(src, &ch); + len = Tcl_UtfToUniChar(src, &ch); if (SURROGATE(ch)) { if (PROFILE_STRICT(flags)) { result = TCL_CONVERT_UNKNOWN; break; } @@ -3290,31 +3290,18 @@ } if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; break; } -#if TCL_UTF_MAX < 4 - len = TclUtfToUniChar(src, &ch); - if ((ch >= 0xD800) && (len < 3)) { - if (PROFILE_STRICT(flags)) { - result = TCL_CONVERT_UNKNOWN; - break; - } - src += len; - src += TclUtfToUniChar(src, &ch); - ch = UNICODE_REPLACE_CHAR; - } -#else len = TclUtfToUniChar(src, &ch); if (ch > 0xFFFF) { if (PROFILE_STRICT(flags)) { result = TCL_CONVERT_UNKNOWN; break; } ch = UNICODE_REPLACE_CHAR; } -#endif if (PROFILE_STRICT(flags) && SURROGATE(ch)) { result = TCL_CONVERT_SYNTAX; break; } @@ -3539,20 +3526,14 @@ result = TCL_CONVERT_MULTIBYTE; break; } len = TclUtfToUniChar(src, &ch); -#if TCL_UTF_MAX > 3 /* Unicode chars > +U0FFFF cannot be represented in any table encoding */ if (ch & 0xFFFF0000) { word = 0; } else -#else - if (!len) { - word = 0; - } else -#endif word = fromUnicode[(ch >> 8)][ch & 0xFF]; if ((word == 0) && (ch != 0)) { if (PROFILE_STRICT(flags)) { result = TCL_CONVERT_UNKNOWN; @@ -3738,23 +3719,15 @@ /* * Check for illegal characters. */ if (ch > 0xFF -#if TCL_UTF_MAX < 4 - || ((ch >= 0xD800) && (len < 3)) -#endif ) { if (PROFILE_STRICT(flags)) { result = TCL_CONVERT_UNKNOWN; break; } -#if TCL_UTF_MAX < 4 - if ((ch >= 0xD800) && (len < 3)) { - len = 4; - } -#endif /* * Plunge on, using '?' as a fallback character. */ ch = (Tcl_UniChar) '?'; /* Profiles TCL8 and REPLACE */ Index: generic/tclEvent.c ================================================================== --- generic/tclEvent.c +++ generic/tclEvent.c @@ -1117,13 +1117,10 @@ ".purify" #endif #ifdef STATIC_BUILD ".static" #endif -#if TCL_UTF_MAX < 4 - ".utf-16" -#endif }}; const char * Tcl_InitSubsystems(void) { Index: generic/tclExecute.c ================================================================== --- generic/tclExecute.c +++ generic/tclExecute.c @@ -5726,11 +5726,11 @@ match = 1; if (length > 0) { int ch; end = ustring1 + length; for (p=ustring1 ; p 3 -# define TclUtfToUCS4 Tcl_UtfToUniChar -# define TclUniCharToUCS4(src, ptr) (*ptr = *(src),1) -# define TclUCS4Prev(src, ptr) (((src) > (ptr)) ? ((src) - 1) : (src)) -#else - MODULE_SCOPE int TclUtfToUCS4(const char *, int *); - MODULE_SCOPE int TclUniCharToUCS4(const Tcl_UniChar *, int *); - MODULE_SCOPE const Tcl_UniChar *TclUCS4Prev(const Tcl_UniChar *, const Tcl_UniChar *); -#endif MODULE_SCOPE Tcl_Obj * TclpNativeToNormalized(void *clientData); MODULE_SCOPE Tcl_Obj * TclpFilesystemPathType(Tcl_Obj *pathPtr); MODULE_SCOPE int TclpDlopen(Tcl_Interp *interp, Tcl_Obj *pathPtr, Tcl_LoadHandle *loadHandle, Tcl_FSUnloadFileProc **unloadProcPtr, int flags); @@ -3498,47 +3489,16 @@ const char *msg, Tcl_Size length); /* Tip 430 */ MODULE_SCOPE int TclZipfs_Init(Tcl_Interp *interp); -#if TCL_UTF_MAX > 3 - MODULE_SCOPE int *TclGetUnicodeFromObj_(Tcl_Obj *, int *); - MODULE_SCOPE Tcl_Obj *TclNewUnicodeObj(const int *, int); - MODULE_SCOPE void TclAppendUnicodeToObj(Tcl_Obj *, const int *, int); - MODULE_SCOPE int TclUniCharNcasecmp(const int *, const int *, unsigned long); - MODULE_SCOPE int TclUniCharCaseMatch(const int *, const int *, int); - MODULE_SCOPE int TclUniCharNcmp(const int *, const int *, unsigned long); -# undef Tcl_NumUtfChars -# define Tcl_NumUtfChars TclNumUtfChars -# undef Tcl_GetCharLength -# define Tcl_GetCharLength TclGetCharLength -# undef Tcl_UtfAtIndex -# define Tcl_UtfAtIndex TclUtfAtIndex -# undef Tcl_GetRange -# define Tcl_GetRange TclGetRange -# undef Tcl_GetUniChar -# define Tcl_GetUniChar TclGetUniChar -#else -# define tclUniCharStringType tclStringType -# define TclGetUnicodeFromObj_ Tcl_GetUnicodeFromObj -# define TclNewUnicodeObj Tcl_NewUnicodeObj -# define TclAppendUnicodeToObj Tcl_AppendUnicodeToObj -# define TclUniCharNcasecmp Tcl_UniCharNcasecmp -# define TclUniCharCaseMatch Tcl_UniCharCaseMatch -# define TclUniCharNcmp Tcl_UniCharNcmp -# undef TclNumUtfChars -# define TclNumUtfChars Tcl_NumUtfChars -# undef TclGetCharLength -# define TclGetCharLength Tcl_GetCharLength -# undef TclUtfAtIndex -# define TclUtfAtIndex Tcl_UtfAtIndex -# undef TclGetRange -# define TclGetRange Tcl_GetRange -# undef TclGetUniChar -# define TclGetUniChar Tcl_GetUniChar -#endif - +MODULE_SCOPE int *TclGetUnicodeFromObj_(Tcl_Obj *, int *); +MODULE_SCOPE Tcl_Obj *TclNewUnicodeObj(const int *, int); +MODULE_SCOPE void TclAppendUnicodeToObj(Tcl_Obj *, const int *, int); +MODULE_SCOPE int TclUniCharNcasecmp(const int *, const int *, unsigned long); +MODULE_SCOPE int TclUniCharCaseMatch(const int *, const int *, int); +MODULE_SCOPE int TclUniCharNcmp(const int *, const int *, unsigned long); /* * Many parsing tasks need a common definition of whitespace. * Use this routine and macro to achieve that and place * optimization (fragile on changes) in one place. @@ -4733,21 +4693,14 @@ * * MODULE_SCOPE int TclUtfToUniChar(const char *string, Tcl_UniChar *ch); *---------------------------------------------------------------- */ -#if TCL_UTF_MAX > 3 #define TclUtfToUniChar(str, chPtr) \ (((UCHAR(*(str))) < 0x80) ? \ ((*(chPtr) = UCHAR(*(str))), 1) \ : Tcl_UtfToUniChar(str, chPtr)) -#else -#define TclUtfToUniChar(str, chPtr) \ - (((UCHAR(*(str))) < 0x80) ? \ - ((*(chPtr) = UCHAR(*(str))), 1) \ - : Tcl_UtfToChar16(str, chPtr)) -#endif /* *---------------------------------------------------------------- * Macro counterpart of the Tcl_NumUtfChars() function. To be used in speed- * -sensitive points where it pays to avoid a function call in the common case Index: generic/tclObj.c ================================================================== --- generic/tclObj.c +++ generic/tclObj.c @@ -385,14 +385,12 @@ Tcl_InitHashTable(&typeTable, TCL_STRING_KEYS); Tcl_MutexUnlock(&tableMutex); Tcl_RegisterObjType(&tclByteArrayType); Tcl_RegisterObjType(&tclDoubleType); -#if (TCL_UTF_MAX < 4) || !defined(TCL_NO_DEPRECATED) +#if !defined(TCL_NO_DEPRECATED) Tcl_RegisterObjType(&tclStringType); -#endif -#if (TCL_UTF_MAX > 3) && !defined(TCL_NO_DEPRECATED) /* Only registered for 8.7, not for 9.0 any more. * See [https://core.tcl-lang.org/tk/tktview/6b49149b4e] */ Tcl_RegisterObjType(&tclUniCharStringType); #endif Tcl_RegisterObjType(&tclListType); Index: generic/tclParse.c ================================================================== --- generic/tclParse.c +++ generic/tclParse.c @@ -934,17 +934,17 @@ * special, we shouldn't break up a correct utf-8 character. [Bug * #217987] test subst-3.2 */ if (Tcl_UtfCharComplete(p, numBytes - 1)) { - count = TclUtfToUCS4(p, &unichar) + 1; /* +1 for '\' */ + count = Tcl_UtfToUniChar(p, &unichar) + 1; /* +1 for '\' */ } else { char utfBytes[8]; memcpy(utfBytes, p, numBytes - 1); utfBytes[numBytes - 1] = '\0'; - count = TclUtfToUCS4(utfBytes, &unichar) + 1; + count = Tcl_UtfToUniChar(utfBytes, &unichar) + 1; } result = unichar; break; } Index: generic/tclScan.c ================================================================== --- generic/tclScan.c +++ generic/tclScan.c @@ -877,11 +877,11 @@ case 'c': /* * Scan a single Unicode character. */ - offset = TclUtfToUCS4(string, &i); + offset = Tcl_UtfToUniChar(string, &i); string += offset; if (!(flags & SCAN_SUPPRESS)) { TclNewIntObj(objPtr, i); Tcl_IncrRefCount(objPtr); CLANG_ASSERT(objs); Index: generic/tclStringObj.c ================================================================== --- generic/tclStringObj.c +++ generic/tclStringObj.c @@ -67,11 +67,11 @@ static Tcl_Size UnicodeLength(const Tcl_UniChar *unicode); #if !defined(TCL_NO_DEPRECATED) static int UTF16Length(const unsigned short *unicode); #endif static void UpdateStringOfString(Tcl_Obj *objPtr); -#if (TCL_UTF_MAX) > 3 && !defined(TCL_NO_DEPRECATED) +#if !defined(TCL_NO_DEPRECATED) static void DupUTF16StringInternalRep(Tcl_Obj *objPtr, Tcl_Obj *copyPtr); static int SetUTF16StringFromAny(Tcl_Interp *interp, Tcl_Obj *objPtr); static void UpdateStringOfUTF16String(Tcl_Obj *objPtr); #endif @@ -87,34 +87,10 @@ /* * The structure below defines the string Tcl object type by means of * functions that can be invoked by generic object code. */ -#if TCL_UTF_MAX < 4 - -#define tclUniCharStringType tclStringType -#define GET_UNICHAR_STRING GET_STRING -#define UniCharString String -#define UNICHAR_STRING_MAXCHARS STRING_MAXCHARS -#define uniCharStringAlloc stringAlloc -#define uniCharStringRealloc stringRealloc -#define uniCharStringAttemptAlloc stringAttemptAlloc -#define uniCharStringAttemptRealloc stringAttemptRealloc -#define uniCharStringCheckLimits stringCheckLimits -#define SET_UNICHAR_STRING SET_STRING -#define UNICHAR_STRING_SIZE STRING_SIZE - -const Tcl_ObjType tclStringType = { - "string", /* name */ - FreeStringInternalRep, /* freeIntRepPro */ - DupStringInternalRep, /* dupIntRepProc */ - UpdateStringOfString, /* updateStringProc */ - SetStringFromAny /* setFromAnyProc */ -}; - -#else - #ifndef TCL_NO_DEPRECATED const Tcl_ObjType tclStringType = { "string", /* name */ FreeStringInternalRep, /* freeIntRepPro */ DupUTF16StringInternalRep, /* dupIntRepProc */ @@ -247,12 +223,10 @@ bytes[Tcl_DStringLength(&ds)] = 0; objPtr->bytes = bytes; objPtr->length = Tcl_DStringLength(&ds); Tcl_DStringFree(&ds); } -#endif - #endif /* * TCL STRING GROWTH ALGORITHM * @@ -550,11 +524,11 @@ TclNewObj(objPtr); SetUnicodeObj(objPtr, unicode, numChars); return objPtr; } -#if (TCL_UTF_MAX > 3) && !defined(TCL_NO_DEPRECATED) +#if !defined(TCL_NO_DEPRECATED) Tcl_Obj * Tcl_NewUnicodeObj( const unsigned short *unicode, /* The unicode string used to initialize the * new object. */ int numChars) /* Number of characters in the unicode @@ -652,11 +626,11 @@ stringPtr->numChars = numChars; } return numChars; } -#if (TCL_UTF_MAX > 3) && !defined(TCL_NO_DEPRECATED) +#if !defined(TCL_NO_DEPRECATED) #undef Tcl_GetCharLength int Tcl_GetCharLength( Tcl_Obj *objPtr) /* The String object to get the num chars * of. */ @@ -685,11 +659,11 @@ if (TclIsPureByteArray(objPtr)) { (void) Tcl_GetByteArrayFromObj(objPtr, &numChars); } else { Tcl_GetString(objPtr); - numChars = Tcl_NumUtfChars(objPtr->bytes, objPtr->length); + numChars = TclNumUtfChars(objPtr->bytes, objPtr->length); } return numChars; } #endif @@ -720,11 +694,11 @@ if (objPtr->bytes == &tclEmptyString) { return TCL_EMPTYSTRING_YES; } if (TclIsPureByteArray(objPtr) - && Tcl_GetCharLength(objPtr) == 0) { + && TclGetCharLength(objPtr) == 0) { return TCL_EMPTYSTRING_YES; } if (TclListObjIsCanonical(objPtr)) { TclListObjLengthM(NULL, objPtr, &length); @@ -758,11 +732,11 @@ * Fills unichar with the index'th Unicode character. * *---------------------------------------------------------------------- */ -#if (TCL_UTF_MAX > 3) && !defined(TCL_NO_DEPRECATED) +#if !defined(TCL_NO_DEPRECATED) #undef Tcl_GetUniChar int Tcl_GetUniChar( Tcl_Obj *objPtr, /* The object to get the Unicode charater * from. */ @@ -871,26 +845,10 @@ if (index >= stringPtr->numChars) { return -1; } ch = stringPtr->unicode[index]; -#if TCL_UTF_MAX < 4 - /* See: bug [11ae2be95dac9417] */ - if (SURROGATE(ch)) { - if (ch & 0x400) { - if ((index > 0) - && HIGH_SURROGATE(stringPtr->unicode[index-1])) { - ch = -1; /* low surrogate preceded by high surrogate */ - } - } else if ((++index < stringPtr->numChars) - && LOW_SURROGATE(stringPtr->unicode[index])) { - /* high surrogate followed by low surrogate */ - ch = (((ch & 0x3FF) << 10) | - (stringPtr->unicode[index] & 0x3FF)) + 0x10000; - } - } -#endif return ch; } /* *---------------------------------------------------------------------- @@ -964,11 +922,11 @@ *lengthPtr = stringPtr->numChars; } return stringPtr->unicode; } -#if TCL_UTF_MAX > 3 && !defined(TCL_NO_DEPRECATED) +#if !defined(TCL_NO_DEPRECATED) unsigned short * Tcl_GetUnicodeFromObj( Tcl_Obj *objPtr, /* The object to find the Unicode string * for. */ int *lengthPtr) /* If non-NULL, the location where the string @@ -996,15 +954,11 @@ * rep's unichar length should be stored. If * NULL, no length is stored. */ { String *stringPtr; -#if TCL_UTF_MAX > 3 SetUTF16StringFromAny(NULL, objPtr); -#else - SetStringFromAny(NULL, objPtr); -#endif stringPtr = GET_STRING(objPtr); if (lengthPtr != NULL) { *(ptrdiff_t *)lengthPtr = stringPtr->numChars; } @@ -1030,11 +984,11 @@ * Changes the internal rep of "objPtr" to the String type. * *---------------------------------------------------------------------- */ -#if TCL_UTF_MAX > 3 && !defined(TCL_NO_DEPRECATED) +#if !defined(TCL_NO_DEPRECATED) #undef Tcl_GetRange Tcl_Obj * Tcl_GetRange( Tcl_Obj *objPtr, /* The Tcl object to find the range of. */ int first, /* First index of the range. */ @@ -1063,11 +1017,11 @@ return newObjPtr; } return Tcl_NewByteArrayObj(bytes + first, last - first + 1); } - int numChars = Tcl_NumUtfChars(objPtr->bytes, objPtr->length); + int numChars = TclNumUtfChars(objPtr->bytes, objPtr->length); if (last < 0 || last >= numChars) { last = numChars - 1; } if (last < first) { @@ -1154,22 +1108,10 @@ } if (last < first) { TclNewObj(newObjPtr); return newObjPtr; } -#if TCL_UTF_MAX < 4 - /* See: bug [11ae2be95dac9417] */ - if ((first > 0) && LOW_SURROGATE(stringPtr->unicode[first]) - && HIGH_SURROGATE(stringPtr->unicode[first-1])) { - ++first; - } - if ((last + 1 < stringPtr->numChars) - && LOW_SURROGATE(stringPtr->unicode[last+1]) - && HIGH_SURROGATE(stringPtr->unicode[last])) { - ++last; - } -#endif return TclNewUnicodeObj(stringPtr->unicode + first, last - first + 1); } /* *---------------------------------------------------------------------- @@ -1724,11 +1666,11 @@ } else { AppendUnicodeToUtfRep(objPtr, unicode, length); } } -#if TCL_UTF_MAX > 3 && !defined(TCL_NO_DEPRECATED) +#if !defined(TCL_NO_DEPRECATED) void Tcl_AppendUnicodeToObj( Tcl_Obj *objPtr, /* Points to the object to append to. */ const unsigned short *unicode, /* The unicode string to append to the * object. */ @@ -3949,11 +3891,11 @@ memCmpFn = memcmp; } else { s1 = (char *) TclGetUnicodeFromObj_(value1Ptr, NULL); s2 = (char *) TclGetUnicodeFromObj_(value2Ptr, NULL); if ( -#if defined(WORDS_BIGENDIAN) && (TCL_UTF_MAX > 3) +#if defined(WORDS_BIGENDIAN) 1 #else checkEq #endif ) { @@ -4022,12 +3964,12 @@ */ if ((reqlength < 0) && !nocase) { memCmpFn = (memCmpFn_t)(void *)TclpUtfNcmp2; } else { - s1len = Tcl_NumUtfChars(s1, s1len); - s2len = Tcl_NumUtfChars(s2, s2len); + s1len = TclNumUtfChars(s1, s1len); + s2len = TclNumUtfChars(s2, s2len); memCmpFn = (memCmpFn_t)(void *) (nocase ? Tcl_UtfNcasecmp : Tcl_UtfNcmp); } } } @@ -4309,13 +4251,10 @@ int flags) { UniCharString *stringPtr; Tcl_UniChar ch = 0; int inPlace = flags & TCL_STRING_IN_PLACE; -#if TCL_UTF_MAX < 4 - int needFlip = 0; -#endif if (TclIsPureByteArray(objPtr)) { Tcl_Size numBytes; unsigned char *from = Tcl_GetByteArrayFromObj(objPtr, &numBytes); @@ -4344,58 +4283,23 @@ objPtr = TclNewUnicodeObj(&ch, 1); Tcl_SetObjLength(objPtr, stringPtr->numChars); to = TclGetUnicodeFromObj_(objPtr, NULL); stringPtr = GET_UNICHAR_STRING(objPtr); while (--src >= from) { -#if TCL_UTF_MAX < 4 - ch = *src; - if (SURROGATE(ch)) { - needFlip = 1; - } - *to++ = ch; -#else *to++ = *src; -#endif } } else { /* * Reversing in place. */ -#if TCL_UTF_MAX < 4 - to = src; -#endif while (--src > from) { ch = *src; -#if TCL_UTF_MAX < 4 - if (SURROGATE(ch)) { - needFlip = 1; - } -#endif *src = *from; *from++ = ch; } } -#if TCL_UTF_MAX < 4 - if (needFlip) { - /* - * Flip back surrogate pairs. - */ - - from = to - stringPtr->numChars; - while (--to >= from) { - ch = *to; - if (HIGH_SURROGATE(ch)) { - if ((to-1 >= from) && LOW_SURROGATE(to[-1])) { - to[0] = to[-1]; - to[-1] = ch; - --to; - } - } - } - } -#endif } if (objPtr->bytes) { Tcl_Size numChars = stringPtr->numChars; Tcl_Size numBytes = objPtr->length; @@ -4425,11 +4329,11 @@ * NOTE: We know that the from buffer is NUL-terminated. It's * part of the contract for objPtr->bytes values. Thus, we can * skip calling Tcl_UtfCharComplete() here. */ - int bytesInChar = TclUtfToUCS4(from, &chw); + int bytesInChar = Tcl_UtfToUniChar(from, &chw); ReverseBytes((unsigned char *)to, (unsigned char *)from, bytesInChar); to += bytesInChar; from += bytesInChar; @@ -4649,18 +4553,16 @@ numAppendChars = 0; } dst = stringPtr->unicode + numOrigChars; if (numAppendChars-- > 0) { bytes += TclUtfToUniChar(bytes, &unichar); -#if TCL_UTF_MAX > 3 /* join upper/lower surrogate */ if (bytes && (stringPtr->unicode[numOrigChars - 1] | 0x3FF) == 0xDBFF && (unichar | 0x3FF) == 0xDFFF) { stringPtr->numChars--; unichar = ((stringPtr->unicode[numOrigChars - 1] & 0x3FF) << 10) + (unichar & 0x3FF) + 0x10000; dst--; } -#endif *dst++ = unichar; while (numAppendChars-- > 0) { bytes += TclUtfToUniChar(bytes, &unichar); *dst++ = unichar; } Index: generic/tclStubInit.c ================================================================== --- generic/tclStubInit.c +++ generic/tclStubInit.c @@ -48,12 +48,10 @@ #undef Tcl_SetUnicodeObj #undef Tcl_UniCharNcasecmp #undef Tcl_UniCharCaseMatch #undef Tcl_UniCharLen #undef Tcl_UniCharNcmp -#undef Tcl_GetRange -#undef Tcl_GetUniChar #undef Tcl_DumpActiveMemory #undef Tcl_ValidateAllMemory #undef Tcl_FindHashEntry #undef Tcl_CreateHashEntry #undef Tcl_Panic @@ -78,24 +76,19 @@ #undef Tcl_UtfToUniCharDString #undef Tcl_UtfToUniChar #undef Tcl_MacOSXOpenBundleResources #undef TclWinConvertWSAError #undef TclWinConvertError -#undef Tcl_NumUtfChars -#undef Tcl_GetCharLength -#undef Tcl_UtfAtIndex -#undef Tcl_GetRange -#undef Tcl_GetUniChar #undef TclObjInterpProc #if defined(_WIN32) || defined(__CYGWIN__) #define TclWinConvertWSAError (void (*)(DWORD))(void *)Tcl_WinConvertError #define TclWinConvertError (void (*)(DWORD))(void *)Tcl_WinConvertError #endif -#if TCL_UTF_MAX > 3 && defined(TCL_NO_DEPRECATED) +#if defined(TCL_NO_DEPRECATED) static void uniCodePanic(void) { Tcl_Panic("Tcl is compiled without the the UTF16 compatibility layer (-DTCL_NO_DEPRECATED)"); } # define Tcl_GetUnicode (unsigned short *(*)(Tcl_Obj *))(void *)uniCodePanic # define Tcl_GetUnicodeFromObj (unsigned short *(*)(Tcl_Obj *, int *))(void *)uniCodePanic @@ -656,20 +649,10 @@ } } return result; } #define Tcl_ExprLongObj (int(*)(Tcl_Interp*,Tcl_Obj*,long*))exprIntObj -#if TCL_UTF_MAX < 4 && !defined(TCL_NO_DEPRECATED) -static int uniCharNcmp(const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsigned int n){ - return Tcl_UniCharNcmp(ucs, uct, (unsigned long)n); -} -#define Tcl_UniCharNcmp (int(*)(const Tcl_UniChar*,const Tcl_UniChar*,unsigned long))(void *)uniCharNcmp -static int uniCharNcasecmp(const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsigned int n){ - return Tcl_UniCharNcasecmp(ucs, uct, (unsigned long)n); -} -#define Tcl_UniCharNcasecmp (int(*)(const Tcl_UniChar*,const Tcl_UniChar*,unsigned long))(void *)uniCharNcasecmp -#endif static int utfNcmp(const char *s1, const char *s2, unsigned int n){ return Tcl_UtfNcmp(s1, s2, (unsigned long)n); } #define Tcl_UtfNcmp (int(*)(const char*,const char*,unsigned long))(void *)utfNcmp static int utfNcasecmp(const char *s1, const char *s2, unsigned int n){ @@ -766,17 +749,10 @@ # define TclpLocaltime_unix 0 # define TclpGmtime_unix 0 # define Tcl_SetExitProc 0 # define Tcl_SetPanicProc 0 # define Tcl_FindExecutable 0 -#if TCL_UTF_MAX < 4 -# define Tcl_GetUnicode 0 -# define Tcl_AppendUnicodeToObj 0 -# define Tcl_UniCharCaseMatch 0 -# define Tcl_UniCharNcasecmp 0 -# define Tcl_UniCharNcmp 0 -#endif # undef Tcl_StringMatch # define Tcl_StringMatch 0 # define TclBN_reverse 0 # undef TclBN_s_mp_mul_digs_fast # define TclBN_s_mp_mul_digs_fast 0 Index: generic/tclTest.c ================================================================== --- generic/tclTest.c +++ generic/tclTest.c @@ -539,13 +539,10 @@ ".purify" #endif #ifdef STATIC_BUILD ".static" #endif -#if TCL_UTF_MAX < 4 - ".utf-16" -#endif ; int Tcltest_Init( Tcl_Interp *interp) /* Interpreter for application. */ Index: generic/tclUtf.c ================================================================== --- generic/tclUtf.c +++ generic/tclUtf.c @@ -406,11 +406,11 @@ * looking for trail bytes. If the source buffer is known to be '\0' * terminated, this cannot happen. Otherwise, the caller should call * Tcl_UtfCharComplete() before calling this routine to ensure that * enough bytes remain in the string. * - * If TCL_UTF_MAX <= 4, special handling of Surrogate pairs is done: + * Special handling of Surrogate pairs is done: * For any UTF-8 string containing a character outside of the BMP, the * first call to this function will fill *chPtr with the high surrogate * and generate a return value of 1. Calling Tcl_UtfToUniChar again * will produce the low surrogate and a return value of 3. Because *chPtr * is used to remember whether the high surrogate is already produced, it @@ -674,15 +674,15 @@ w = wString; p = src; endPtr = src + length; optPtr = endPtr - 4; while (p <= optPtr) { - p += TclUtfToUCS4(p, &ch); + p += Tcl_UtfToUniChar(p, &ch); *w++ = ch; } while ((p < endPtr) && Tcl_UtfCharComplete(p, endPtr-p)) { - p += TclUtfToUCS4(p, &ch); + p += Tcl_UtfToUniChar(p, &ch); *w++ = ch; } while (p < endPtr) { *w++ = UCHAR(*p++); } @@ -847,12 +847,11 @@ } } return i; } -#if (TCL_UTF_MAX > 3) && !defined(TCL_NO_DEPRECATED) -#undef Tcl_NumUtfChars +#if !defined(TCL_NO_DEPRECATED) int Tcl_NumUtfChars( const char *src, /* The UTF-8 string to measure. */ int length) /* The length of the string in bytes, or -1 * for strlen(string). */ @@ -927,11 +926,11 @@ Tcl_UtfFindFirst( const char *src, /* The UTF-8 string to be searched. */ int ch) /* The Unicode character to search for. */ { while (1) { - int find, len = TclUtfToUCS4(src, &find); + int find, len = Tcl_UtfToUniChar(src, &find); if (find == ch) { return src; } if (*src == '\0') { @@ -966,11 +965,11 @@ int ch) /* The Unicode character to search for. */ { const char *last = NULL; while (1) { - int find, len = TclUtfToUCS4(src, &find); + int find, len = Tcl_UtfToUniChar(src, &find); if (find == ch) { last = src; } if (*src == '\0') { @@ -1193,23 +1192,21 @@ } if ((ch >= 0xD800) && (i < 3)) { /* Index points at character following high Surrogate */ return -1; } - TclUtfToUCS4(src, &i); + Tcl_UtfToUniChar(src, &i); return i; } /* *--------------------------------------------------------------------------- * * Tcl_UtfAtIndex -- * * Returns a pointer to the specified character (not byte) position in - * the UTF-8 string. If TCL_UTF_MAX < 4, characters > U+FFFF count as - * 2 positions, but then the pointer should never be placed between - * the two positions. + * the UTF-8 string. * * Results: * As above. * * Side effects: @@ -1216,15 +1213,10 @@ * None. * *--------------------------------------------------------------------------- */ -#if TCL_UTF_MAX < 4 -# undef Tcl_UtfToUniChar -# define Tcl_UtfToUniChar Tcl_UtfToChar16 -#endif - const char * TclUtfAtIndex( const char *src, /* The UTF-8 string. */ int index) /* The position of the desired character. */ { @@ -1233,21 +1225,14 @@ while (index-- > 0) { len = (Tcl_UtfToUniChar)(src, &ch); src += len; } -#if TCL_UTF_MAX < 4 - if ((ch >= 0xD800) && (len < 3)) { - /* Index points at character following high Surrogate */ - src += (Tcl_UtfToUniChar)(src, &ch); - } -#endif return src; } -#if (TCL_UTF_MAX > 3) && !defined(TCL_NO_DEPRECATED) -#undef Tcl_UtfAtIndex +#if !defined(TCL_NO_DEPRECATED) const char * Tcl_UtfAtIndex( const char *src, /* The UTF-8 string. */ int index) /* The position of the desired character. */ { @@ -1351,11 +1336,11 @@ * Iterate over the string until we hit the terminating null. */ src = dst = str; while (*src) { - len = TclUtfToUCS4(src, &ch); + len = Tcl_UtfToUniChar(src, &ch); upChar = Tcl_UniCharToUpper(ch); /* * To keep badly formed Utf strings from getting inflated by the * conversion (thereby causing a segfault), only copy the upper case @@ -1404,11 +1389,11 @@ * Iterate over the string until we hit the terminating null. */ src = dst = str; while (*src) { - len = TclUtfToUCS4(src, &ch); + len = Tcl_UtfToUniChar(src, &ch); lowChar = Tcl_UniCharToLower(ch); /* * To keep badly formed Utf strings from getting inflated by the * conversion (thereby causing a segfault), only copy the lower case @@ -1460,11 +1445,11 @@ */ src = dst = str; if (*src) { - len = TclUtfToUCS4(src, &ch); + len = Tcl_UtfToUniChar(src, &ch); titleChar = Tcl_UniCharToTitle(ch); if ((len < TclUtfCount(titleChar)) || ((titleChar & ~0x7FF) == 0xD800)) { memmove(dst, src, len); dst += len; @@ -1472,11 +1457,11 @@ dst += Tcl_UniCharToUtf(titleChar, dst); } src += len; } while (*src) { - len = TclUtfToUCS4(src, &ch); + len = Tcl_UtfToUniChar(src, &ch); lowChar = ch; /* Special exception for Georgian Asomtavruli chars, no titlecase. */ if ((unsigned)(lowChar - 0x1C90) >= 0x30) { lowChar = Tcl_UniCharToLower(lowChar); } @@ -1579,20 +1564,10 @@ */ cs += TclUtfToUniChar(cs, &ch1); ct += TclUtfToUniChar(ct, &ch2); if (ch1 != ch2) { -#if TCL_UTF_MAX < 4 - /* Surrogates always report higher than non-surrogates */ - if (((ch1 & 0xFC00) == 0xD800)) { - if ((ch2 & 0xFC00) != 0xD800) { - return ch1; - } - } else if ((ch2 & 0xFC00) == 0xD800) { - return -ch2; - } -#endif return (ch1 - ch2); } } return 0; } @@ -1630,20 +1605,10 @@ * at least n chars long (no need for \0 check) */ cs += TclUtfToUniChar(cs, &ch1); ct += TclUtfToUniChar(ct, &ch2); if (ch1 != ch2) { -#if TCL_UTF_MAX < 4 - /* Surrogates always report higher than non-surrogates */ - if (((ch1 & 0xFC00) == 0xD800)) { - if ((ch2 & 0xFC00) != 0xD800) { - return ch1; - } - } else if ((ch2 & 0xFC00) == 0xD800) { - return -ch2; - } -#endif ch1 = Tcl_UniCharToLower(ch1); ch2 = Tcl_UniCharToLower(ch2); if (ch1 != ch2) { return (ch1 - ch2); } @@ -1679,20 +1644,10 @@ while (*cs && *ct) { cs += TclUtfToUniChar(cs, &ch1); ct += TclUtfToUniChar(ct, &ch2); if (ch1 != ch2) { -#if TCL_UTF_MAX < 4 - /* Surrogates always report higher than non-surrogates */ - if (((ch1 & 0xFC00) == 0xD800)) { - if ((ch2 & 0xFC00) != 0xD800) { - return ch1; - } - } else if ((ch2 & 0xFC00) == 0xD800) { - return -ch2; - } -#endif return ch1 - ch2; } } return UCHAR(*cs) - UCHAR(*ct); } @@ -1725,20 +1680,10 @@ while (*cs && *ct) { cs += TclUtfToUniChar(cs, &ch1); ct += TclUtfToUniChar(ct, &ch2); if (ch1 != ch2) { -#if TCL_UTF_MAX < 4 - /* Surrogates always report higher than non-surrogates */ - if (((ch1 & 0xFC00) == 0xD800)) { - if ((ch2 & 0xFC00) != 0xD800) { - return ch1; - } - } else if ((ch2 & 0xFC00) == 0xD800) { - return -ch2; - } -#endif ch1 = Tcl_UniCharToLower(ch1); ch2 = Tcl_UniCharToLower(ch2); if (ch1 != ch2) { return ch1 - ch2; } @@ -1933,11 +1878,11 @@ TclUniCharNcmp( const Tcl_UniChar *ucs, /* Unicode string to compare to uct. */ const Tcl_UniChar *uct, /* Unicode string ucs is compared to. */ unsigned long numChars) /* Number of unichars to compare. */ { -#if defined(WORDS_BIGENDIAN) && (TCL_UTF_MAX > 3) +#if defined(WORDS_BIGENDIAN) /* * We are definitely on a big-endian machine; memcmp() is safe */ return memcmp(ucs, uct, numChars*sizeof(Tcl_UniChar)); @@ -1954,18 +1899,18 @@ } return 0; #endif /* WORDS_BIGENDIAN */ } -#if (TCL_UTF_MAX > 3) && !defined(TCL_NO_DEPRECATED) +#if !defined(TCL_NO_DEPRECATED) int Tcl_UniCharNcmp( const unsigned short *ucs, /* Unicode string to compare to uct. */ const unsigned short *uct, /* Unicode string ucs is compared to. */ unsigned long numChars) /* Number of unichars to compare. */ { -#if defined(WORDS_BIGENDIAN) && (TCL_UTF_MAX > 3) +#if defined(WORDS_BIGENDIAN) /* * We are definitely on a big-endian machine; memcmp() is safe */ return memcmp(ucs, uct, numChars*sizeof(Tcl_UniChar)); @@ -2025,11 +1970,11 @@ } } return 0; } -#if (TCL_UTF_MAX > 3) && !defined(TCL_NO_DEPRECATED) +#if !defined(TCL_NO_DEPRECATED) int Tcl_UniCharNcasecmp( const unsigned short *ucs, /* Unicode string to compare to uct. */ const unsigned short *uct, /* Unicode string ucs is compared to. */ unsigned long numChars) /* Number of unichars to compare. */ @@ -2581,11 +2526,11 @@ uniStr++; uniPattern++; } } -#if (TCL_UTF_MAX > 3) && !defined(TCL_NO_DEPRECATED) +#if !defined(TCL_NO_DEPRECATED) int Tcl_UniCharCaseMatch( const unsigned short *uniStr, /* Unicode String. */ const unsigned short *uniPattern, /* Pattern, which may contain special @@ -2942,77 +2887,12 @@ string++; pattern++; } } -/* - *--------------------------------------------------------------------------- - * - * TclUtfToUCS4 -- - * - * Extracts the 4-byte codepoint from the leading bytes of the - * Modified UTF-8 string "src". This is a utility routine to - * contain the surrogate gymnastics in one place. - * - * The caller must ensure that the source buffer is long enough that this - * routine does not run off the end and dereference non-existent memory - * looking for trail bytes. If the source buffer is known to be '\0' - * terminated, this cannot happen. Otherwise, the caller should call - * Tcl_UtfCharComplete() before calling this routine to ensure that - * enough bytes remain in the string. - * - * Results: - * Fills *usc4Ptr with the UCS4 code point and returns the number of bytes - * consumed from the source string. - * - * Side effects: - * None. - * - *--------------------------------------------------------------------------- - */ - -#if TCL_UTF_MAX < 4 -int -TclUtfToUCS4( - const char *src, /* The UTF-8 string. */ - int *ucs4Ptr) /* Filled with the UCS4 codepoint represented - * by the UTF-8 string. */ -{ -# undef Tcl_UtfToUniChar - return Tcl_UtfToUniChar(src, ucs4Ptr); -} - -int -TclUniCharToUCS4( - const Tcl_UniChar *src, /* The Tcl_UniChar string. */ - int *ucs4Ptr) /* Filled with the UCS4 codepoint represented - * by the Tcl_UniChar string. */ -{ - if (((src[0] & 0xFC00) == 0xD800) && ((src[1] & 0xFC00) == 0xDC00)) { - *ucs4Ptr = (((src[0] & 0x3FF) << 10) | (src[1] & 0x3FF)) + 0x10000; - return 2; - } - *ucs4Ptr = src[0]; - return 1; -} - -const Tcl_UniChar *TclUCS4Prev(const Tcl_UniChar *src, const Tcl_UniChar *ptr) { - if (src <= ptr + 1) { - return ptr; - } - if (((src[-1] & 0xFC00) == 0xDC00) && ((src[-2] & 0xFC00) == 0xD800)) { - return src - 2; - } - return src - 1; -} - - - -#endif - /* * Local Variables: * mode: c * c-basic-offset: 4 * fill-column: 78 * End: */ Index: generic/tclUtil.c ================================================================== --- generic/tclUtil.c +++ generic/tclUtil.c @@ -1715,19 +1715,19 @@ Tcl_Size pInc = 0, bytesLeft = numTrim; pp = Tcl_UtfPrev(p, bytes); do { pp += pInc; - pInc = TclUtfToUCS4(pp, &ch1); + pInc = Tcl_UtfToUniChar(pp, &ch1); } while (pp + pInc < p); /* * Inner loop: scan trim string for match to current character. */ do { - pInc = TclUtfToUCS4(q, &ch2); + pInc = Tcl_UtfToUniChar(q, &ch2); if (ch1 == ch2) { break; } @@ -1788,20 +1788,20 @@ /* * Outer loop: iterate over string to be trimmed. */ do { - Tcl_Size pInc = TclUtfToUCS4(p, &ch1); + Tcl_Size pInc = Tcl_UtfToUniChar(p, &ch1); const char *q = trim; Tcl_Size bytesLeft = numTrim; /* * Inner loop: scan trim string for match to current character. */ do { - Tcl_Size qInc = TclUtfToUCS4(q, &ch2); + Tcl_Size qInc = Tcl_UtfToUniChar(q, &ch2); if (ch1 == ch2) { break; } @@ -1864,11 +1864,11 @@ /* If we did not trim the whole string, it starts with a character * that we will not trim. Skip over it. */ if (numBytes > 0) { int ch; const char *first = bytes + trimLeft; - bytes += TclUtfToUCS4(first, &ch); + bytes += Tcl_UtfToUniChar(first, &ch); numBytes -= (bytes - first); if (numBytes > 0) { /* When bytes is NUL-terminated, returns * 0 <= trimRight <= numBytes */ @@ -2219,11 +2219,11 @@ if (UCHAR(*pattern) < 0x80) { ch2 = (int) (nocase ? tolower(UCHAR(*pattern)) : UCHAR(*pattern)); } else { - TclUtfToUCS4(pattern, &ch2); + Tcl_UtfToUniChar(pattern, &ch2); if (nocase) { ch2 = Tcl_UniCharToLower(ch2); } } @@ -2235,11 +2235,11 @@ */ if ((p != '[') && (p != '?') && (p != '\\')) { if (nocase) { while (*str) { - charLen = TclUtfToUCS4(str, &ch1); + charLen = Tcl_UtfToUniChar(str, &ch1); if (ch2==ch1 || ch2==Tcl_UniCharToLower(ch1)) { break; } str += charLen; } @@ -2249,11 +2249,11 @@ * shorter, as the number of bytes you want to compare * each time is non-constant. */ while (*str) { - charLen = TclUtfToUCS4(str, &ch1); + charLen = Tcl_UtfToUniChar(str, &ch1); if (ch2 == ch1) { break; } str += charLen; } @@ -2263,11 +2263,11 @@ return 1; } if (*str == '\0') { return 0; } - str += TclUtfToUCS4(str, &ch1); + str += Tcl_UtfToUniChar(str, &ch1); } } /* * Check for a "?" as the next pattern character. It matches any @@ -2274,11 +2274,11 @@ * single character. */ if (p == '?') { pattern++; - str += TclUtfToUCS4(str, &ch1); + str += Tcl_UtfToUniChar(str, &ch1); continue; } /* * Check for a "[" as the next pattern character. It is followed by a @@ -2293,11 +2293,11 @@ if (UCHAR(*str) < 0x80) { ch1 = (int) (nocase ? tolower(UCHAR(*str)) : UCHAR(*str)); str++; } else { - str += TclUtfToUCS4(str, &ch1); + str += Tcl_UtfToUniChar(str, &ch1); if (nocase) { ch1 = Tcl_UniCharToLower(ch1); } } while (1) { @@ -2307,11 +2307,11 @@ if (UCHAR(*pattern) < 0x80) { startChar = (int) (nocase ? tolower(UCHAR(*pattern)) : UCHAR(*pattern)); pattern++; } else { - pattern += TclUtfToUCS4(pattern, &startChar); + pattern += Tcl_UtfToUniChar(pattern, &startChar); if (nocase) { startChar = Tcl_UniCharToLower(startChar); } } if (*pattern == '-') { @@ -2322,11 +2322,11 @@ if (UCHAR(*pattern) < 0x80) { endChar = (int) (nocase ? tolower(UCHAR(*pattern)) : UCHAR(*pattern)); pattern++; } else { - pattern += TclUtfToUCS4(pattern, &endChar); + pattern += Tcl_UtfToUniChar(pattern, &endChar); if (nocase) { endChar = Tcl_UniCharToLower(endChar); } } if (((startChar <= ch1) && (ch1 <= endChar)) @@ -2370,12 +2370,12 @@ /* * There's no special character. Just make sure that the next bytes of * each string match. */ - str += TclUtfToUCS4(str, &ch1); - pattern += TclUtfToUCS4(pattern, &ch2); + str += Tcl_UtfToUniChar(str, &ch1); + pattern += Tcl_UtfToUniChar(pattern, &ch2); if (nocase) { if (Tcl_UniCharToLower(ch1) != Tcl_UniCharToLower(ch2)) { return 0; } } else if (ch1 != ch2) { Index: tests/utf.test ================================================================== --- tests/utf.test +++ tests/utf.test @@ -16,21 +16,10 @@ ::tcltest::loadTestedCommands catch [list package require -exact tcl::test [info patchlevel]] source [file join [file dirname [info script]] tcltests.tcl] -testConstraint ucs2 [expr {[format %c 0x010000] eq "\uFFFD"}] -testConstraint fullutf [expr {[format %c 0x010000] ne "\uFFFD"}] -testConstraint utf16 [expr {[string length [format %c 0x10000]] == 2}] -testConstraint utf32 [expr {[testConstraint fullutf] - && [string length [format %c 0x10000]] == 1}] - -testConstraint Uesc [expr {"\U0041" eq "A"}] -testConstraint pre388 [expr {"\x741" eq "A"}] -testConstraint pairsTo4bytes [expr {[llength [info commands teststringbytes]] - && [string length [teststringbytes \uD83D\uDCA9]] == 4}] - testConstraint testbytestring [llength [info commands testbytestring]] testConstraint testfindfirst [llength [info commands testfindfirst]] testConstraint testfindlast [llength [info commands testfindlast]] testConstraint testnumutfchars [llength [info commands testnumutfchars]] testConstraint teststringobj [llength [info commands teststringobj]] @@ -57,16 +46,13 @@ expr {[format %c 0x110000] eq [testbytestring \xEF\xBF\xBD]} } 1 test utf-1.6 {Tcl_UniCharToUtf: negative Tcl_UniChar} testbytestring { expr {[format %c -1] eq [testbytestring \xEF\xBF\xBD]} } 1 -test utf-1.7.0 {Tcl_UniCharToUtf: 4 byte sequences} {fullutf testbytestring} { +test utf-1.7.0 {Tcl_UniCharToUtf: 4 byte sequences} {testbytestring} { expr {"\U014E4E" eq [testbytestring \xF0\x94\xB9\x8E]} } 1 -test utf-1.7.1 {Tcl_UniCharToUtf: 4 byte sequences} {Uesc ucs2 testbytestring} { - expr {"\U014E4E" eq [testbytestring \xF0\x94\xB9\x8E]} -} 0 test utf-1.8 {Tcl_UniCharToUtf: 3 byte sequence, high surrogate} testbytestring { expr {"\uD842" eq [testbytestring \xED\xA1\x82]} } 1 test utf-1.9 {Tcl_UniCharToUtf: 3 byte sequence, low surrogate} testbytestring { expr {"\uDC42" eq [testbytestring \xED\xB1\x82]} @@ -75,17 +61,14 @@ expr {[format %c 0xD842] eq [testbytestring \xED\xA1\x82]} } 1 test utf-1.11 {Tcl_UniCharToUtf: 3 byte sequence, low surrogate} testbytestring { expr {[format %c 0xDC42] eq [testbytestring \xED\xB1\x82]} } 1 -test utf-1.12 {Tcl_UniCharToUtf: 4 byte sequence, high/low surrogate} {pairsTo4bytes testbytestring} { +test utf-1.12 {Tcl_UniCharToUtf: 4 byte sequence, high/low surrogate} {testbytestring} { expr {"\uD842\uDC42" eq [testbytestring \xF0\xA0\xA1\x82]} } 1 -test utf-1.13.0 {Tcl_UniCharToUtf: Invalid surrogate} {Uesc ucs2} { - expr {"\UD842" eq "\uD842"} -} 1 -test utf-1.13.1 {Tcl_UniCharToUtf: Invalid surrogate} {fullutf testbytestring} { +test utf-1.13 {Tcl_UniCharToUtf: Invalid surrogate} {testbytestring} { expr {"\UD842" eq [testbytestring \xEF\xBF\xBD]} } 1 test utf-1.14 {Tcl_UniCharToUtf: surrogate pairs from concat} { set lo \uDE02 return \uD83D$lo @@ -125,26 +108,14 @@ string length [testbytestring \xE2\xA2] } 2 test utf-2.7 {Tcl_UtfToUniChar: lead (3-byte) followed by 2 trail} testbytestring { string length [testbytestring \xE4\xB9\x8E] } 1 -test utf-2.8.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {ucs2 testbytestring} { - string length [testbytestring \xF0\x90\x80\x80] -} 2 -test utf-2.8.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} utf16 { - string length 𐀀 -} 2 -test utf-2.8.2 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} utf32 { +test utf-2.8 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} { string length 𐀀 } 1 -test utf-2.9.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {ucs2 testbytestring} { - string length [testbytestring \xF4\x8F\xBF\xBF] -} 2 -test utf-2.9.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} utf16 { - string length \U10FFFF -} 2 -test utf-2.9.2 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} utf32 { +test utf-2.9 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} { string length \U10FFFF } 1 test utf-2.10 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, underflow} testbytestring { string length [testbytestring \xF0\x8F\xBF\xBF] } 4 @@ -240,14 +211,11 @@ testutfnext [testbytestring \xA0\x00] } 1 test utf-6.10 {Tcl_UtfNext} {testutfnext testbytestring} { testutfnext [testbytestring \xA0]G } 1 -test utf-6.11.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} { - testutfnext [testbytestring \xA0\xA0\x00] -} 1 -test utf-6.11.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} { +test utf-6.11 {Tcl_UtfNext} {testutfnext testbytestring} { testutfnext [testbytestring \xA0\xA0\x00] } 2 test utf-6.12 {Tcl_UtfNext} {testutfnext testbytestring} { testutfnext [testbytestring \xA0\xD0] } 1 @@ -300,23 +268,17 @@ testutfnext [testbytestring \xE8\xF2] } 1 test utf-6.29 {Tcl_UtfNext} {testutfnext testbytestring} { testutfnext [testbytestring \xE8\xF8] } 1 -test utf-6.30.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} { - testutfnext [testbytestring \xF2] -} 1 -test utf-6.30.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} { +test utf-6.30 {Tcl_UtfNext} {testutfnext testbytestring} { testutfnext [testbytestring \xF2\x00] } 1 test utf-6.31 {Tcl_UtfNext} {testutfnext testbytestring} { testutfnext [testbytestring \xF2]G } 1 -test utf-6.32.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} { - testutfnext [testbytestring \xF2\xA0] -} 1 -test utf-6.32.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} { +test utf-6.32 {Tcl_UtfNext} {testutfnext testbytestring} { testutfnext [testbytestring \xF2\xA0\x00] } 1 test utf-6.33 {Tcl_UtfNext} {testutfnext testbytestring} { testutfnext [testbytestring \xF2\xD0] } 1 @@ -423,14 +385,11 @@ testutfnext θ  [testbytestring \xF8] } 3 test utf-6.68 {Tcl_UtfNext} {testutfnext testbytestring} { testutfnext [testbytestring \xF2\xA0\xA0]G } 1 -test utf-6.69.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} { - testutfnext [testbytestring \xF2\xA0\xA0\xA0] -} 1 -test utf-6.69.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} { +test utf-6.69 {Tcl_UtfNext} {testutfnext testbytestring} { testutfnext [testbytestring \xF2\xA0\xA0\xA0] } 4 test utf-6.70 {Tcl_UtfNext} {testutfnext testbytestring} { testutfnext [testbytestring \xF2\xA0\xA0\xD0] } 1 @@ -441,44 +400,26 @@ testutfnext [testbytestring \xF2\xA0\xA0\xF2] } 1 test utf-6.73 {Tcl_UtfNext} {testutfnext testbytestring} { testutfnext [testbytestring \xF2\xA0\xA0\xF8] } 1 -test utf-6.74.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} { - testutfnext [testbytestring \xF2\xA0\xA0\xA0]G -} 1 -test utf-6.74.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} { - testutfnext [testbytestring \xF2\xA0\xA0\xA0]G -} 4 -test utf-6.75.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} { - testutfnext [testbytestring \xF2\xA0\xA0\xA0\xA0] -} 1 -test utf-6.75.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} { - testutfnext [testbytestring \xF2\xA0\xA0\xA0\xA0] -} 4 -test utf-6.76.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} { - testutfnext [testbytestring \xF2\xA0\xA0\xA0\xD0] -} 1 -test utf-6.76.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} { - testutfnext [testbytestring \xF2\xA0\xA0\xA0\xD0] -} 4 -test utf-6.77.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} { - testutfnext [testbytestring \xF2\xA0\xA0\xA0\xE8] -} 1 -test utf-6.77.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} { - testutfnext [testbytestring \xF2\xA0\xA0\xA0\xE8] -} 4 -test utf-6.78.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} { - testutfnext [testbytestring \xF2\xA0\xA0\xA0\xF2] -} 1 -test utf-6.78.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} { - testutfnext [testbytestring \xF2\xA0\xA0\xA0\xF2] -} 4 -test utf-6.79.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} { - testutfnext [testbytestring \xF2\xA0\xA0\xA0G\xF8] -} 1 -test utf-6.79.1 {Tcl_UtfNext} {testutfnext testbytestring fullutf} { +test utf-6.74 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xF2\xA0\xA0\xA0]G +} 4 +test utf-6.75 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xF2\xA0\xA0\xA0\xA0] +} 4 +test utf-6.76 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xF2\xA0\xA0\xA0\xD0] +} 4 +test utf-6.77 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xF2\xA0\xA0\xA0\xE8] +} 4 +test utf-6.78 {Tcl_UtfNext} {testutfnext testbytestring} { + testutfnext [testbytestring \xF2\xA0\xA0\xA0\xF2] +} 4 +test utf-6.79 {Tcl_UtfNext} {testutfnext testbytestring} { testutfnext [testbytestring \xF2\xA0\xA0\xA0G\xF8] } 4 test utf-6.80 {Tcl_UtfNext - overlong sequences} testutfnext { testutfnext \x00 } 2 @@ -498,59 +439,35 @@ testutfnext [testbytestring \xE0\xA0\x80] } 3 test utf-6.86 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring} { testutfnext [testbytestring \xF0\x80\x80\x80] } 1 -test utf-6.87.0 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring ucs2} { - testutfnext [testbytestring \xF0\x90\x80\x80] -} 1 -test utf-6.87.1 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring fullutf} { - testutfnext [testbytestring \xF0\x90\x80\x80] -} 4 -test utf-6.88.0 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext testbytestring ucs2} { - testutfnext [testbytestring \xA0\xA0\x00] -} 1 -test utf-6.88.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext testbytestring fullutf} { - testutfnext [testbytestring \xA0\xA0\x00] -} 2 -test utf-6.89.0 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext testbytestring ucs2} { - testutfnext [testbytestring \x80\x80\x00] -} 1 -test utf-6.89.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext testbytestring fullutf} { - testutfnext [testbytestring \x80\x80\x00] -} 2 -test utf-6.90.0 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext testbytestring ucs2} { - testutfnext [testbytestring \xF4\x8F\xBF\xBF] -} 1 -test utf-6.90.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext testbytestring fullutf} { +test utf-6.87 {Tcl_UtfNext - overlong sequences} {testutfnext testbytestring} { + testutfnext [testbytestring \xF0\x90\x80\x80] +} 4 +test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext testbytestring} { + testutfnext [testbytestring \xA0\xA0\x00] +} 2 +test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext testbytestring} { + testutfnext [testbytestring \x80\x80\x00] +} 2 +test utf-6.90 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext testbytestring} { testutfnext [testbytestring \xF4\x8F\xBF\xBF] } 4 test utf-6.91 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext testbytestring} { testutfnext [testbytestring \xF4\x90\x80\x80] } 1 -test utf-6.92.0 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} {testutfnext testbytestring ucs2} { - testutfnext [testbytestring \xA0\xA0\xA0] -} 1 -test utf-6.92.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} {testutfnext testbytestring fullutf} { - testutfnext [testbytestring \xA0\xA0\xA0] -} 3 -test utf-6.93.0 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext testbytestring ucs2} { - testutfnext [testbytestring \x80\x80\x80] -} 1 -test utf-6.93.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext testbytestring fullutf} { - testutfnext [testbytestring \x80\x80\x80] -} 3 -test utf-6.94.0 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext testbytestring ucs2} { - testutfnext [testbytestring \xA0\xA0\xA0\xA0] -} 1 -test utf-6.94.1 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext testbytestring fullutf} { - testutfnext [testbytestring \xA0\xA0\xA0\xA0] -} 3 -test utf-6.95.0 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext testbytestring ucs2} { - testutfnext [testbytestring \x80\x80\x80\x80] -} 1 -test utf-6.95.1 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext testbytestring fullutf} { +test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} {testutfnext testbytestring} { + testutfnext [testbytestring \xA0\xA0\xA0] +} 3 +test utf-6.93 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext testbytestring} { + testutfnext [testbytestring \x80\x80\x80] +} 3 +test utf-6.94 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext testbytestring} { + testutfnext [testbytestring \xA0\xA0\xA0\xA0] +} 3 +test utf-6.95 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext testbytestring} { testutfnext [testbytestring \x80\x80\x80\x80] } 3 test utf-7.1 {Tcl_UtfPrev} testutfprev { testutfprev {} @@ -613,26 +530,17 @@ testutfprev A[testbytestring \xF8\xA0\xA0\xA0] 3 } 2 test utf-7.9.2 {Tcl_UtfPrev} {testutfprev testbytestring} { testutfprev A[testbytestring \xF8\xA0\xF8\xA0] 3 } 2 -test utf-7.10.0 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} { - testutfprev A[testbytestring \xF2\xA0] -} 2 -test utf-7.10.1 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} { - testutfprev A[testbytestring \xF2\xA0] -} 1 -test utf-7.10.2 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} { - testutfprev A[testbytestring \xF2\xA0\xA0\xA0] 3 -} 2 -test utf-7.10.3 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} { - testutfprev A[testbytestring \xF2\xA0\xA0\xA0] 3 -} 1 -test utf-7.10.4 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} { - testutfprev A[testbytestring \xF2\xA0\xF8\xA0] 3 -} 2 -test utf-7.10.5 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} { +test utf-7.10.1 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xF2\xA0] +} 1 +test utf-7.10.2 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xF2\xA0\xA0\xA0] 3 +} 1 +test utf-7.10.3 {Tcl_UtfPrev} {testutfprev testbytestring} { testutfprev A[testbytestring \xF2\xA0\xF8\xA0] 3 } 1 test utf-7.11 {Tcl_UtfPrev} {testutfprev testbytestring} { testutfprev A[testbytestring \xE8\xA0] } 1 @@ -670,26 +578,17 @@ testutfprev A[testbytestring \xF8\xA0\xA0\xA0] 4 } 3 test utf-7.14.2 {Tcl_UtfPrev} {testutfprev testbytestring} { testutfprev A[testbytestring \xF8\xA0\xA0\xF8] 4 } 3 -test utf-7.15.0 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} { - testutfprev A[testbytestring \xF2\xA0\xA0] -} 3 -test utf-7.15.1 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} { - testutfprev A[testbytestring \xF2\xA0\xA0] -} 1 -test utf-7.15.2 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} { - testutfprev A[testbytestring \xF2\xA0\xA0\xA0] 4 -} 3 -test utf-7.15.3 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} { - testutfprev A[testbytestring \xF2\xA0\xA0\xA0] 4 -} 1 -test utf-7.15.4 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} { - testutfprev A[testbytestring \xF2\xA0\xA0\xF8] 4 -} 3 -test utf-7.15.5 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} { +test utf-7.15.1 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xF2\xA0\xA0] +} 1 +test utf-7.15.3 {Tcl_UtfPrev} {testutfprev testbytestring} { + testutfprev A[testbytestring \xF2\xA0\xA0\xA0] 4 +} 1 +test utf-7.15.5 {Tcl_UtfPrev} {testutfprev testbytestring} { testutfprev A[testbytestring \xF2\xA0\xA0\xF8] 4 } 1 test utf-7.16 {Tcl_UtfPrev} testutfprev { testutfprev Aθ   } 1 @@ -718,14 +617,11 @@ testutfprev [testbytestring A\xA0\xA0\xA0\xF8] 4 } 3 test utf-7.19 {Tcl_UtfPrev} {testutfprev testbytestring} { testutfprev [testbytestring A\xF8\xA0\xA0\xA0] } 4 -test utf-7.20.0 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} { - testutfprev [testbytestring A\xF2\xA0\xA0\xA0] -} 4 -test utf-7.20.1 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} { +test utf-7.20 {Tcl_UtfPrev} {testutfprev testbytestring} { testutfprev [testbytestring A\xF2\xA0\xA0\xA0] } 1 test utf-7.21 {Tcl_UtfPrev} {testutfprev testbytestring} { testutfprev A[testbytestring \xE8\xA0\xA0\xA0] } 4 @@ -784,26 +680,17 @@ testutfprev A[testbytestring \xE0\xA0\x80] 3 } 1 test utf-7.38 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { testutfprev A[testbytestring \xE0\xA0\x80] 2 } 1 -test utf-7.39.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs2} { - testutfprev A[testbytestring \xF0\x90\x80\x80] -} 4 -test utf-7.39.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring fullutf} { - testutfprev A[testbytestring \xF0\x90\x80\x80] -} 1 -test utf-7.40.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs2} { - testutfprev A[testbytestring \xF0\x90\x80\x80] 4 -} 3 -test utf-7.40.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring fullutf} { - testutfprev A[testbytestring \xF0\x90\x80\x80] 4 -} 1 -test utf-7.41.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs2} { - testutfprev A[testbytestring \xF0\x90\x80\x80] 3 -} 2 -test utf-7.41.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring fullutf} { +test utf-7.39 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { + testutfprev A[testbytestring \xF0\x90\x80\x80] +} 1 +test utf-7.40 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { + testutfprev A[testbytestring \xF0\x90\x80\x80] 4 +} 1 +test utf-7.41 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { testutfprev A[testbytestring \xF0\x90\x80\x80] 3 } 1 test utf-7.42 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { testutfprev A[testbytestring \xF0\x90\x80\x80] 2 } 1 @@ -826,32 +713,23 @@ testutfprev θ   2 } 0 test utf-7.47.2 {Tcl_UtfPrev, pointing to 3th byte of 3-byte invalid sequence} {testutfprev testbytestring} { testutfprev [testbytestring \xE8\xA0\x00] 2 } 0 -test utf-7.48.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs2} { - testutfprev A[testbytestring \xF4\x8F\xBF\xBF] -} 4 -test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring fullutf} { - testutfprev A[testbytestring \xF4\x8F\xBF\xBF] -} 1 -test utf-7.48.2 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs2} { - testutfprev A[testbytestring \xF4\x8F\xBF\xBF] 4 -} 3 -test utf-7.48.3 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring fullutf} { - testutfprev A[testbytestring \xF4\x8F\xBF\xBF] 4 -} 1 -test utf-7.48.4 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs2} { - testutfprev A[testbytestring \xF4\x8F\xBF\xBF] 3 -} 2 -test utf-7.48.5 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring fullutf} { - testutfprev A[testbytestring \xF4\x8F\xBF\xBF] 3 -} 1 -test utf-7.48.6 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring} { - testutfprev A[testbytestring \xF4\x8F\xBF\xBF] 2 -} 1 -test utf-7.49.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring} { +test utf-7.48 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring} { + testutfprev A[testbytestring \xF4\x8F\xBF\xBF] +} 1 +test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring} { + testutfprev A[testbytestring \xF4\x8F\xBF\xBF] 4 +} 1 +test utf-7.48.2 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring} { + testutfprev A[testbytestring \xF4\x8F\xBF\xBF] 3 +} 1 +test utf-7.48.3 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring} { + testutfprev A[testbytestring \xF4\x8F\xBF\xBF] 2 +} 1 +test utf-7.49 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring} { testutfprev A[testbytestring \xF4\x90\x80\x80] } 4 test utf-7.49.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring} { testutfprev A[testbytestring \xF4\x90\x80\x80] 4 } 3 @@ -872,137 +750,59 @@ string index abcd 2 } c test utf-8.4 {Tcl_UniCharAtIndex: index > 0} { string index δΉŽΙšΓΏΥƒ 2 } ΓΏ -test utf-8.5.0 {Tcl_UniCharAtIndex: high surrogate} ucs2 { - string index \uD842 0 -} \uD842 -test utf-8.5.1 {Tcl_UniCharAtIndex: high surrogate} utf32 { - string index \uD842 0 -} \uD842 -test utf-8.5.2 {Tcl_UniCharAtIndex: high surrogate} utf16 { +test utf-8.5 {Tcl_UniCharAtIndex: high surrogate} { string index \uD842 0 } \uD842 test utf-8.6 {Tcl_UniCharAtIndex: low surrogate} { string index \uDC42 0 } \uDC42 -test utf-8.7.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { - string index \uD83D\uDE00G 0 -} \uD83D -test utf-8.7.1 {Tcl_UniCharAtIndex: Emoji} utf32 { - string index πŸ˜€G 0 -} πŸ˜€ -test utf-8.7.2 {Tcl_UniCharAtIndex: Emoji} utf16 { - string index πŸ˜€G 0 -} πŸ˜€ -test utf-8.8.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { - string index \uD83D\uDE00G 1 -} \uDE00 -test utf-8.8.1 {Tcl_UniCharAtIndex: Emoji} utf32 { - string index πŸ˜€G 1 -} G -test utf-8.8.2 {Tcl_UniCharAtIndex: Emoji} utf16 { - string index πŸ˜€G 1 -} {} -test utf-8.9.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { - string index \uD83D\uDE00G 2 -} G -test utf-8.9.1 {Tcl_UniCharAtIndex: Emoji} utf32 { - string index πŸ˜€G 2 -} {} -test utf-8.9.2 {Tcl_UniCharAtIndex: Emoji} utf16 { - string index πŸ˜€G 2 -} G -test utf-8.10.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { - string index πŸ˜€G 0 -} \uFFFD -test utf-8.10.1 {Tcl_UniCharAtIndex: Emoji} utf32 { - string index πŸ˜€G 0 -} πŸ˜€ -test utf-8.10.2 {Tcl_UniCharAtIndex: Emoji} utf16 { - string index πŸ˜€G 0 -} πŸ˜€ -test utf-8.11.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { - string index πŸ˜€G 1 -} G -test utf-8.11.1 {Tcl_UniCharAtIndex: Emoji} utf32 { - string index πŸ˜€G 1 -} G -test utf-8.11.2 {Tcl_UniCharAtIndex: Emoji} utf16 { - string index πŸ˜€G 1 -} {} -test utf-8.12.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { - string index πŸ˜€G 2 -} {} -test utf-8.12.1 {Tcl_UniCharAtIndex: Emoji} utf32 { - string index πŸ˜€G 2 -} {} -test utf-8.12.2 {Tcl_UniCharAtIndex: Emoji} utf16 { - string index πŸ˜€G 2 -} G +test utf-8.7 {Tcl_UniCharAtIndex: Emoji} { + string index πŸ˜€G 0 +} πŸ˜€ +test utf-8.8 {Tcl_UniCharAtIndex: Emoji} { + string index πŸ˜€G 1 +} G +test utf-8.9 {Tcl_UniCharAtIndex: Emoji} { + string index πŸ˜€G 2 +} {} +test utf-8.10 {Tcl_UniCharAtIndex: Emoji} { + string index πŸ˜€G 0 +} πŸ˜€ +test utf-8.11 {Tcl_UniCharAtIndex: Emoji} { + string index πŸ˜€G 1 +} G +test utf-8.12 {Tcl_UniCharAtIndex: Emoji} { + string index πŸ˜€G 2 +} {} test utf-9.1 {Tcl_UtfAtIndex: index = 0} { string range abcd 0 2 } abc test utf-9.2 {Tcl_UtfAtIndex: index > 0} { string range δΉŽΙšΓΏΥƒklmnop 1 5 } ΙšΓΏΥƒkl -test utf-9.3.0 {Tcl_UtfAtIndex: index = 0, Emoji} ucs2 { - string range \uD83D\uDE00G 0 0 -} \uD83D -test utf-9.3.1 {Tcl_UtfAtIndex: index = 0, Emoji} utf32 { - string range πŸ˜€G 0 0 -} πŸ˜€ -test utf-9.3.2 {Tcl_UtfAtIndex: index = 0, Emoji} utf16 { - string range πŸ˜€G 0 0 -} πŸ˜€ -test utf-9.4.0 {Tcl_UtfAtIndex: index > 0, Emoji} ucs2 { - string range \uD83D\uDE00G 1 1 -} \uDE00 -test utf-9.4.1 {Tcl_UtfAtIndex: index > 0, Emoji} utf32 { - string range πŸ˜€G 1 1 -} G -test utf-9.4.2 {Tcl_UtfAtIndex: index > 0, Emoji} utf16 { - string range πŸ˜€G 1 1 -} {} -test utf-9.5.0 {Tcl_UtfAtIndex: index > 0, Emoji} ucs2 { - string range \uD83D\uDE00G 2 2 -} G -test utf-9.5.1 {Tcl_UtfAtIndex: index > 0, Emoji} utf32 { - string range πŸ˜€G 2 2 -} {} -test utf-9.5.2 {Tcl_UtfAtIndex: index > 0, Emoji} utf16 { - string range πŸ˜€G 2 2 -} G -test utf-9.6.0 {Tcl_UtfAtIndex: index = 0, Emoji} ucs2 { - string range πŸ˜€G 0 0 -} \uFFFD -test utf-9.6.1 {Tcl_UtfAtIndex: index = 0, Emoji} utf32 { - string range πŸ˜€G 0 0 -} πŸ˜€ -test utf-9.6.2 {Tcl_UtfAtIndex: index = 0, Emoji} utf16 { - string range πŸ˜€G 0 0 -} πŸ˜€ -test utf-9.7.0 {Tcl_UtfAtIndex: index > 0, Emoji} ucs2 { - string range πŸ˜€G 1 1 -} G -test utf-9.7.1 {Tcl_UtfAtIndex: index > 0, Emoji} utf32 { - string range πŸ˜€G 1 1 -} G -test utf-9.7.2 {Tcl_UtfAtIndex: index > 0, Emoji} utf16 { - string range πŸ˜€G 1 1 -} {} -test utf-9.8.0 {Tcl_UtfAtIndex: index > 0, Emoji} ucs2 { - string range πŸ˜€G 2 2 -} {} -test utf-9.8.1 {Tcl_UtfAtIndex: index > 0, Emoji} utf32 { - string range πŸ˜€G 2 2 -} {} -test utf-9.8.2 {Tcl_UtfAtIndex: index > 0, Emoji} utf16 { - string range πŸ˜€G 2 2 -} G +test utf-9.3 {Tcl_UtfAtIndex: index = 0, Emoji} { + string range πŸ˜€G 0 0 +} πŸ˜€ +test utf-9.4 {Tcl_UtfAtIndex: index > 0, Emoji} { + string range πŸ˜€G 1 1 +} G +test utf-9.5 {Tcl_UtfAtIndex: index > 0, Emoji} { + string range πŸ˜€G 2 2 +} {} +test utf-9.6 {Tcl_UtfAtIndex: index = 0, Emoji} { + string range πŸ˜€G 0 0 +} πŸ˜€ +test utf-9.7 {Tcl_UtfAtIndex: index > 0, Emoji} { + string range πŸ˜€G 1 1 +} G +test utf-9.8 {Tcl_UtfAtIndex: index > 0, Emoji} { + string range πŸ˜€G 2 2 +} {} test utf-10.1 {Tcl_UtfBackslash: dst == NULL} { set x \n } { } @@ -1016,14 +816,14 @@ expr {"\u4E2k" eq "[testbytestring \xD3\xA2]k"} } 1 test utf-10.5 {Tcl_UtfBackslash: stops after 4 hex chars} testbytestring { expr {"\u4E216" eq "[testbytestring \xE4\xB8\xA1]6"} } 1 -test utf-10.6 {Tcl_UtfBackslash: stops after 5 hex chars} {fullutf testbytestring} { +test utf-10.6 {Tcl_UtfBackslash: stops after 5 hex chars} {testbytestring} { expr {"\U1E2165" eq "[testbytestring \xF0\x9E\x88\x96]5"} } 1 -test utf-10.7 {Tcl_UtfBackslash: stops after 6 hex chars} {fullutf testbytestring} { +test utf-10.7 {Tcl_UtfBackslash: stops after 6 hex chars} {testbytestring} { expr {"\U10E2165" eq "[testbytestring \xF4\x8E\x88\x96]5"} } 1 proc bsCheck {char num {constraints {}}} { global errNum @@ -1060,39 +860,37 @@ bsCheck b\0 98 bsCheck \x 120 bsCheck \xa 10 bsCheck \xA 10 bsCheck \x41 65 -bsCheck \x541 65 pre388 ;# == \x41 -bsCheck \x541 84 !pre388 ;# == \x54 1 +bsCheck \x541 84 bsCheck \u 117 bsCheck \uk 117 bsCheck \u41 65 bsCheck \ua 10 bsCheck \uA 10 bsCheck \340 224 bsCheck \uA1 161 bsCheck \u4E21 20001 -bsCheck \741 225 pre388 ;# == \341 -bsCheck \741 60 !pre388 ;# == \74 1 +bsCheck \741 60 bsCheck \U 85 bsCheck \Uk 85 -bsCheck \U41 65 Uesc -bsCheck \Ua 10 Uesc -bsCheck \UA 10 Uesc -bsCheck \UA1 161 Uesc -bsCheck \U4E21 20001 Uesc -bsCheck \U004E21 20001 Uesc -bsCheck \U00004E21 20001 Uesc -bsCheck \U0000004E21 78 Uesc -bsCheck \U00110000 69632 fullutf -bsCheck \U01100000 69632 fullutf -bsCheck \U11000000 69632 fullutf -bsCheck \U0010FFFF 1114111 fullutf -bsCheck \U010FFFF0 1114111 fullutf -bsCheck \U10FFFF00 1114111 fullutf -bsCheck \UFFFFFFFF 1048575 fullutf +bsCheck \U41 65 +bsCheck \Ua 10 +bsCheck \UA 10 +bsCheck \UA1 161 +bsCheck \U4E21 20001 +bsCheck \U004E21 20001 +bsCheck \U00004E21 20001 +bsCheck \U0000004E21 78 +bsCheck \U00110000 69632 +bsCheck \U01100000 69632 +bsCheck \U11000000 69632 +bsCheck \U0010FFFF 1114111 +bsCheck \U010FFFF0 1114111 +bsCheck \U10FFFF00 1114111 +bsCheck \UFFFFFFFF 1048575 test utf-11.1 {Tcl_UtfToUpper} { string toupper {} } {} test utf-11.2 {Tcl_UtfToUpper} { @@ -1105,14 +903,14 @@ string toupper Η£gh } Η’GH test utf-11.5 {Tcl_UtfToUpper Georgian (new in Unicode 11)} { string toupper აᲐ } ᲐᲐ -test utf-11.6 {Tcl_UtfToUpper beyond U+FFFF} fullutf { +test utf-11.6 {Tcl_UtfToUpper beyond U+FFFF} { string toupper 𐐨 } 𐐀 -test utf-11.7 {Tcl_UtfToUpper beyond U+FFFF} fullutf { +test utf-11.7 {Tcl_UtfToUpper beyond U+FFFF} { string toupper 𐐨 } 𐐀 test utf-11.8 {Tcl_UtfToUpper low/high surrogate)} { string toupper \uDC24\uD824 } \uDC24\uD824 @@ -1133,14 +931,14 @@ string tolower აᲐ } აა test utf-12.6 {Tcl_UtfToLower low/high surrogate)} { string tolower \uDC24\uD824 } \uDC24\uD824 -test utf-12.7 {Tcl_UtfToLower beyond U+FFFF} fullutf { +test utf-12.7 {Tcl_UtfToLower beyond U+FFFF} { string tolower 𐐀 } 𐐨 -test utf-12.8 {Tcl_UtfToLower beyond U+FFFF} fullutf { +test utf-12.8 {Tcl_UtfToLower beyond U+FFFF} { string tolower 𐐀 } 𐐨 test utf-13.1 {Tcl_UtfToTitle} { string totitle {} @@ -1161,14 +959,14 @@ string totitle Აა } Აა test utf-13.7 {Tcl_UtfToTitle low/high surrogate)} { string totitle \uDC24\uD824 } \uDC24\uD824 -test utf-13.8 {Tcl_UtfToTitle beyond U+FFFF} fullutf { +test utf-13.8 {Tcl_UtfToTitle beyond U+FFFF} { string totitle 𐐨𐐀 } 𐐀𐐨 -test utf-13.9 {Tcl_UtfToTitle beyond U+FFFF} fullutf { +test utf-13.9 {Tcl_UtfToTitle beyond U+FFFF} { string totitle 𐐨𐐀 } 𐐀𐐨 test utf-14.1 {Tcl_UtfNcasecmp} { string compare -nocase a b @@ -1224,14 +1022,14 @@ list [regexp \\d abc456def foo] $foo } -cleanup { unset -nocomplain foo } -result {1 4} -test utf-20.1 {TclUniCharNcmp} utf32 { +test utf-20.1 {TclUniCharNcmp} { string compare [string range [format %c 0xFFFF] 0 0] [string range [format %c 0x10000] 0 0] } -1 -test utf-20.2 {[4c591fa487] TclUniCharNcmp/TclUtfNcmp} utf32 { +test utf-20.2 {[4c591fa487] TclUniCharNcmp/TclUtfNcmp} { set one [format %c 0xFFFF] set two [format %c 0x10000] set first [string compare $one $two] string range $one 0 0 string range $two 0 0 @@ -1354,14 +1152,14 @@ variable count 1 UniCharCaseCmpTest < a b UniCharCaseCmpTest > b a UniCharCaseCmpTest > B a UniCharCaseCmpTest > aBcB abca -UniCharCaseCmpTest < \uFFFF [format %c 0x10000] utf32 -UniCharCaseCmpTest < \uFFFF \U10000 utf32 -UniCharCaseCmpTest > [format %c 0x10000] \uFFFF utf32 -UniCharCaseCmpTest > \U10000 \uFFFF utf32 +UniCharCaseCmpTest < \uFFFF [format %c 0x10000] +UniCharCaseCmpTest < \uFFFF \U10000 +UniCharCaseCmpTest > [format %c 0x10000] \uFFFF +UniCharCaseCmpTest > \U10000 \uFFFF test utf-26.1 {Tcl_UniCharDString} -setup { testobj freeallvars } -constraints {teststringobj testbytestring} -cleanup { Index: win/makefile.vc ================================================================== --- win/makefile.vc +++ win/makefile.vc @@ -50,11 +50,11 @@ # SDK (not expressly needed), run setenv.bat after # vcvars32.bat according to the instructions for it. This can also # turn on the 64-bit compiler, if your SDK has it. # # Basic macros and options usable on the commandline (see rules.vc for more info): -# OPTS=msvcrt,noembed,nothreads,pdbs,profile,static,symbols,thrdalloc,time64bit,unchecked,utf16,none +# OPTS=msvcrt,noembed,nothreads,pdbs,profile,static,symbols,thrdalloc,time64bit,unchecked,none # Sets special options for the core. The default is for none. # Any combination of the above may be used (comma separated). # 'none' will over-ride everything to nothing. # # noembed = Without this option, the Tcl core library scripts @@ -78,11 +78,10 @@ # time64bit = Forces a build using 64-bit time_t for 32-bit build # (CRT library should support this). # unchecked = Allows a symbols build to not use the debug # enabled runtime (msvcrt.dll not msvcrtd.dll # or libcmt.lib not libcmtd.lib). -# utf16 = Forces a build using UTF-16 representation internally. # # STATS=compdbg,memdbg,none # Sets optional memory and bytecode compiler debugging code added # to the core. The default is for none. Any combination of the # above may be used (comma separated). 'none' will over-ride Index: win/rules.vc ================================================================== --- win/rules.vc +++ win/rules.vc @@ -814,11 +814,10 @@ # USE_STUBS - 1 -> compile to use stubs interfaces, 0 -> direct linking # CONFIG_CHECK - 1 -> check current build configuration against Tcl # configuration (ignored for Tcl itself) # _USE_64BIT_TIME_T - forces a build using 64-bit time_t for 32-bit build # (CRT library should support this, not needed for Tcl 9.x) -# TCL_UTF_MAX=3 - forces a build using UTF-16 internally (not recommended). # Further, LINKERFLAGS are modified based on above. # Default values for all the above STATIC_BUILD = 0 TCL_THREADS = 1 @@ -885,15 +884,10 @@ !if $(TCL_MAJOR_VERSION) == 8 !if [nmakehlp -f $(OPTS) "time64bit"] !message *** Force 64-bit time_t _USE_64BIT_TIME_T = 1 !endif - -!if [nmakehlp -f $(OPTS) "utf16"] -!message *** Force UTF-16 internally -TCL_UTF_MAX = 3 -!endif !endif # Yes, it's weird that the "symbols" option controls DEBUG and # the "pdbs" option controls SYMBOLS. That's historical. !if [nmakehlp -f $(OPTS) "symbols"] @@ -1449,13 +1443,10 @@ !endif # _ATL_XP_TARGETING - Newer SDK's need this to build for XP COMPILERFLAGS = /D_ATL_XP_TARGETING !endif -!if "$(TCL_UTF_MAX)" == "3" -OPTDEFINES = $(OPTDEFINES) /DTCL_UTF_MAX=3 -!endif !if "$(TCL_BUILD_FOR)" == "8" OPTDEFINES = $(OPTDEFINES) /DTCL_MAJOR_VERSION=8 !endif # Like the TEA system only set this non empty for non-Tk extensions