Index: .github/workflows/linux-build.yml ================================================================== --- .github/workflows/linux-build.yml +++ .github/workflows/linux-build.yml @@ -5,11 +5,11 @@ runs-on: ubuntu-20.04 strategy: matrix: cfgopt: - "" - - "CFLAGS=-DTCL_UTF_MAX=4" + - "CFLAGS=-DTCL_UTF_MAX=3" - "CFLAGS=-DTCL_NO_DEPRECATED=1" - "--disable-shared" - "--enable-symbols" - "--enable-symbols=mem" - "--enable-symbols=all" Index: .github/workflows/win-build.yml ================================================================== --- .github/workflows/win-build.yml +++ .github/workflows/win-build.yml @@ -11,11 +11,11 @@ working-directory: win strategy: matrix: cfgopt: - "" - - "OPTS=utfmax" + - "OPTS=utf16" - "CHECKS=nodep" - "OPTS=static" - "OPTS=symbols" - "OPTS=symbols STATS=compdbg,memdbg" # Using powershell means we need to explicitly stop on failure @@ -50,11 +50,11 @@ working-directory: win strategy: matrix: cfgopt: - "" - - "CFLAGS=-DTCL_UTF_MAX=4" + - "CFLAGS=-DTCL_UTF_MAX=3" - "CFLAGS=-DTCL_NO_DEPRECATED=1" - "--disable-shared" - "--enable-symbols" - "--enable-symbols=mem" - "--enable-symbols=all" Index: generic/tcl.decls ================================================================== --- generic/tcl.decls +++ generic/tcl.decls @@ -1246,11 +1246,11 @@ } declare 352 { int Tcl_Char16Len(const unsigned short *uniStr) } declare 353 {deprecated {Use Tcl_UtfNcmp}} { - int Tcl_UniCharNcmp(const Tcl_UniChar *ucs, const Tcl_UniChar *uct, + int Tcl_UniCharNcmp(const unsigned short *ucs, const unsigned short *uct, unsigned long numChars) } declare 354 { char *Tcl_Char16ToUtfDString(const unsigned short *uniStr, int uniLength, Tcl_DString *dsPtr) @@ -1336,30 +1336,30 @@ } declare 377 { void Tcl_RegExpGetInfo(Tcl_RegExp regexp, Tcl_RegExpInfo *infoPtr) } declare 378 { - Tcl_Obj *Tcl_NewUnicodeObj(const Tcl_UniChar *unicode, int numChars) + Tcl_Obj *Tcl_NewUnicodeObj(const unsigned short *unicode, int numChars) } declare 379 { - void Tcl_SetUnicodeObj(Tcl_Obj *objPtr, const Tcl_UniChar *unicode, + void Tcl_SetUnicodeObj(Tcl_Obj *objPtr, const unsigned short *unicode, int numChars) } declare 380 { int Tcl_GetCharLength(Tcl_Obj *objPtr) } declare 381 { int Tcl_GetUniChar(Tcl_Obj *objPtr, int index) } declare 382 {deprecated {No longer in use, changed to macro}} { - Tcl_UniChar *Tcl_GetUnicode(Tcl_Obj *objPtr) + unsigned short *Tcl_GetUnicode(Tcl_Obj *objPtr) } declare 383 { Tcl_Obj *Tcl_GetRange(Tcl_Obj *objPtr, int first, int last) } -declare 384 {deprecated {Use Tcl_AppendStringsToObj}} { - void Tcl_AppendUnicodeToObj(Tcl_Obj *objPtr, const Tcl_UniChar *unicode, +declare 384 { + void Tcl_AppendUnicodeToObj(Tcl_Obj *objPtr, const unsigned short *unicode, int length) } declare 385 { int Tcl_RegExpMatchObj(Tcl_Interp *interp, Tcl_Obj *textObj, Tcl_Obj *patternObj) @@ -1481,16 +1481,16 @@ } declare 418 { int Tcl_IsChannelExisting(const char *channelName) } declare 419 {deprecated {Use Tcl_UtfNcasecmp}} { - int Tcl_UniCharNcasecmp(const Tcl_UniChar *ucs, const Tcl_UniChar *uct, + int Tcl_UniCharNcasecmp(const unsigned short *ucs, const unsigned short *uct, unsigned long numChars) } declare 420 {deprecated {Use Tcl_StringCaseMatch}} { - int Tcl_UniCharCaseMatch(const Tcl_UniChar *uniStr, - const Tcl_UniChar *uniPattern, int nocase) + int Tcl_UniCharCaseMatch(const unsigned short *uniStr, + const unsigned short *uniPattern, int nocase) } declare 421 { Tcl_HashEntry *Tcl_FindHashEntry(Tcl_HashTable *tablePtr, const void *key) } declare 422 { @@ -1539,11 +1539,11 @@ Tcl_ThreadId Tcl_GetChannelThread(Tcl_Channel channel) } # introduced in 8.4a3 declare 434 { - Tcl_UniChar *Tcl_GetUnicodeFromObj(Tcl_Obj *objPtr, int *lengthPtr) + unsigned short *Tcl_GetUnicodeFromObj(Tcl_Obj *objPtr, int *lengthPtr) } # TIP#15 (math function introspection) dkf declare 435 {deprecated {}} { int Tcl_GetMathFuncInfo(Tcl_Interp *interp, const char *name, @@ -2415,11 +2415,11 @@ # TIP #481 declare 651 { char *TclGetStringFromObj(Tcl_Obj *objPtr, size_t *lengthPtr) } declare 652 { - Tcl_UniChar *TclGetUnicodeFromObj(Tcl_Obj *objPtr, size_t *lengthPtr) + unsigned short *TclGetUnicodeFromObj(Tcl_Obj *objPtr, size_t *lengthPtr) } declare 653 { unsigned char *TclGetByteArrayFromObj(Tcl_Obj *objPtr, size_t *numBytesPtr) } @@ -2452,10 +2452,25 @@ # TIP #617 declare 668 { int Tcl_UniCharLen(const int *uniStr) } +declare 669 { + int TclNumUtfChars(const char *src, int length) +} +declare 670 { + int TclGetCharLength(Tcl_Obj *objPtr) +} +declare 671 { + const char *TclUtfAtIndex(const char *src, int index) +} +declare 672 { + Tcl_Obj *TclGetRange(Tcl_Obj *objPtr, int first, int last) +} +declare 673 { + int TclGetUniChar(Tcl_Obj *objPtr, int index) +} # ----- BASELINE -- FOR -- 8.7.0 ----- # ############################################################################## Index: generic/tcl.h ================================================================== --- generic/tcl.h +++ generic/tcl.h @@ -2140,11 +2140,15 @@ * then Tcl_UniChar must be 4-bytes in size (UCS-4). At this time UTF-16 mode * is the default and recommended mode. */ #ifndef TCL_UTF_MAX -#define TCL_UTF_MAX 3 +# ifdef BUILD_tcl +# define TCL_UTF_MAX 4 +# else +# define TCL_UTF_MAX 3 +# endif #endif /* * This represents a Unicode character. Any changes to this should also be * reflected in regcustom.h. Index: generic/tclBinary.c ================================================================== --- generic/tclBinary.c +++ generic/tclBinary.c @@ -432,11 +432,11 @@ const char *nonbyte; int ucs4; irPtr = TclFetchInternalRep(objPtr, &tclByteArrayType); baPtr = GET_BYTEARRAY(irPtr); - nonbyte = Tcl_UtfAtIndex(Tcl_GetString(objPtr), baPtr->bad); + nonbyte = TclUtfAtIndex(Tcl_GetString(objPtr), baPtr->bad); TclUtfToUCS4(nonbyte, &ucs4); Tcl_SetObjResult(interp, Tcl_ObjPrintf( "expected byte sequence but character %d " "was '%1s' (U+%06X)", baPtr->bad, nonbyte, ucs4)); @@ -471,11 +471,11 @@ const char *nonbyte; int ucs4; irPtr = TclFetchInternalRep(objPtr, &tclByteArrayType); baPtr = GET_BYTEARRAY(irPtr); - nonbyte = Tcl_UtfAtIndex(Tcl_GetString(objPtr), baPtr->bad); + nonbyte = TclUtfAtIndex(Tcl_GetString(objPtr), baPtr->bad); TclUtfToUCS4(nonbyte, &ucs4); Tcl_SetObjResult(interp, Tcl_ObjPrintf( "expected byte sequence but character %d " "was '%1s' (U+%06X)", baPtr->bad, nonbyte, ucs4)); Index: generic/tclCmdMZ.c ================================================================== --- generic/tclCmdMZ.c +++ generic/tclCmdMZ.c @@ -254,11 +254,11 @@ * the termination test for -all matches. Do this before getting the * regexp to avoid shimmering problems. */ objPtr = objv[1]; - stringLength = Tcl_GetCharLength(objPtr); + stringLength = TclGetCharLength(objPtr); if (startIndex) { TclGetIntForIndexM(interp, startIndex, stringLength, &offset); Tcl_DecrRefCount(startIndex); if (offset < 0) { @@ -308,11 +308,11 @@ if (offset == 0) { eflags = 0; } else if (offset > stringLength) { eflags = TCL_REG_NOTBOL; - } else if (Tcl_GetUniChar(objPtr, offset-1) == '\n') { + } else if (TclGetUniChar(objPtr, offset-1) == '\n') { eflags = 0; } else { eflags = TCL_REG_NOTBOL; } @@ -393,11 +393,11 @@ TclNewIndexObj(objs[1], end); newPtr = Tcl_NewListObj(2, objs); } else { if ((i <= info.nsubs) && (info.matches[i].end > 0)) { - newPtr = Tcl_GetRange(objPtr, + newPtr = TclGetRange(objPtr, offset + info.matches[i].start, offset + info.matches[i].end - 1); } else { TclNewObj(newPtr); } @@ -579,11 +579,11 @@ objc -= idx; objv += idx; if (startIndex) { - int stringLength = Tcl_GetCharLength(objv[1]); + int stringLength = TclGetCharLength(objv[1]); TclGetIntForIndexM(interp, startIndex, stringLength, &offset); Tcl_DecrRefCount(startIndex); if (offset < 0) { offset = 0; @@ -602,15 +602,15 @@ int (*strCmpFn)(const Tcl_UniChar*,const Tcl_UniChar*,unsigned long); Tcl_UniChar *p; numMatches = 0; nocase = (cflags & TCL_REG_NOCASE); - strCmpFn = nocase ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp; + strCmpFn = nocase ? TclUniCharNcasecmp : TclUniCharNcmp; - wsrc = Tcl_GetUnicodeFromObj(objv[0], &slen); - wstring = Tcl_GetUnicodeFromObj(objv[1], &wlen); - wsubspec = Tcl_GetUnicodeFromObj(objv[2], &wsublen); + wsrc = TclGetUnicodeFromObj_(objv[0], &slen); + wstring = TclGetUnicodeFromObj_(objv[1], &wlen); + wsubspec = TclGetUnicodeFromObj_(objv[2], &wsublen); wend = wstring + wlen - (slen ? slen - 1 : 0); result = TCL_OK; if (slen == 0) { /* @@ -617,15 +617,15 @@ * regsub behavior for "" matches between each character. 'string * map' skips the "" case. */ if (wstring < wend) { - resultPtr = Tcl_NewUnicodeObj(wstring, 0); + resultPtr = TclNewUnicodeObj(wstring, 0); Tcl_IncrRefCount(resultPtr); for (; wstring < wend; wstring++) { - Tcl_AppendUnicodeToObj(resultPtr, wsubspec, wsublen); - Tcl_AppendUnicodeToObj(resultPtr, wstring, 1); + TclAppendUnicodeToObj(resultPtr, wsubspec, wsublen); + TclAppendUnicodeToObj(resultPtr, wstring, 1); numMatches++; } wlen = 0; } } else { @@ -634,22 +634,22 @@ if ((*wstring == *wsrc || (nocase && Tcl_UniCharToLower(*wstring)==wsrclc)) && (slen==1 || (strCmpFn(wstring, wsrc, (unsigned long) slen) == 0))) { if (numMatches == 0) { - resultPtr = Tcl_NewUnicodeObj(wstring, 0); + resultPtr = TclNewUnicodeObj(wstring, 0); Tcl_IncrRefCount(resultPtr); } if (p != wstring) { - Tcl_AppendUnicodeToObj(resultPtr, p, wstring - p); + TclAppendUnicodeToObj(resultPtr, p, wstring - p); p = wstring + slen; } else { p += slen; } wstring = p - 1; - Tcl_AppendUnicodeToObj(resultPtr, wsubspec, wsublen); + TclAppendUnicodeToObj(resultPtr, wsubspec, wsublen); numMatches++; } } if (numMatches) { wlen = wfirstChar + wlen - p; @@ -697,18 +697,18 @@ if (objv[1] == objv[0]) { objPtr = Tcl_DuplicateObj(objv[1]); } else { objPtr = objv[1]; } - wstring = Tcl_GetUnicodeFromObj(objPtr, &wlen); + wstring = TclGetUnicodeFromObj_(objPtr, &wlen); if (objv[2] == objv[0]) { subPtr = Tcl_DuplicateObj(objv[2]); } else { subPtr = objv[2]; } if (!command) { - wsubspec = Tcl_GetUnicodeFromObj(subPtr, &wsublen); + wsubspec = TclGetUnicodeFromObj_(subPtr, &wsublen); } result = TCL_OK; /* @@ -740,19 +740,19 @@ } if (match == 0) { break; } if (numMatches == 0) { - resultPtr = Tcl_NewUnicodeObj(wstring, 0); + resultPtr = TclNewUnicodeObj(wstring, 0); Tcl_IncrRefCount(resultPtr); if (offset > 0) { /* * Copy the initial portion of the string in if an offset was * specified. */ - Tcl_AppendUnicodeToObj(resultPtr, wstring, offset); + TclAppendUnicodeToObj(resultPtr, wstring, offset); } } numMatches++; /* @@ -761,11 +761,11 @@ */ Tcl_RegExpGetInfo(regExpr, &info); start = info.matches[0].start; end = info.matches[0].end; - Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, start); + TclAppendUnicodeToObj(resultPtr, wstring + offset, start); /* * In command-prefix mode, the substitutions are added as quoted * arguments to the subSpec to form a command, that is then executed * and the result used as the string to substitute in. Actually, @@ -783,11 +783,11 @@ for (idx = 0 ; idx <= info.nsubs ; idx++) { subStart = info.matches[idx].start; subEnd = info.matches[idx].end; if ((subStart >= 0) && (subEnd >= 0)) { - args[idx + numParts] = Tcl_NewUnicodeObj( + args[idx + numParts] = TclNewUnicodeObj( wstring + offset + subStart, subEnd - subStart); } else { TclNewObj(args[idx + numParts]); } Tcl_IncrRefCount(args[idx + numParts]); @@ -824,11 +824,11 @@ /* * Refetch the unicode, in case the representation was smashed by * the user code. */ - wstring = Tcl_GetUnicodeFromObj(objPtr, &wlen); + wstring = TclGetUnicodeFromObj_(objPtr, &wlen); offset += end; if (end == 0 || start == end) { /* * Always consume at least one character of the input string @@ -836,11 +836,11 @@ * technically matched the empty string; we must not match * again at the same spot. */ if (offset < wlen) { - Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, 1); + TclAppendUnicodeToObj(resultPtr, wstring + offset, 1); } offset++; } if (all) { continue; @@ -865,11 +865,11 @@ ch = wsrc[1]; if ((ch >= '0') && (ch <= '9')) { idx = ch - '0'; } else if ((ch == '\\') || (ch == '&')) { *wsrc = ch; - Tcl_AppendUnicodeToObj(resultPtr, wfirstChar, + TclAppendUnicodeToObj(resultPtr, wfirstChar, wsrc - wfirstChar + 1); *wsrc = '\\'; wfirstChar = wsrc + 2; wsrc++; continue; @@ -879,19 +879,19 @@ } else { continue; } if (wfirstChar != wsrc) { - Tcl_AppendUnicodeToObj(resultPtr, wfirstChar, + TclAppendUnicodeToObj(resultPtr, wfirstChar, wsrc - wfirstChar); } if (idx <= info.nsubs) { subStart = info.matches[idx].start; subEnd = info.matches[idx].end; if ((subStart >= 0) && (subEnd >= 0)) { - Tcl_AppendUnicodeToObj(resultPtr, + TclAppendUnicodeToObj(resultPtr, wstring + offset + subStart, subEnd - subStart); } } if (*wsrc == '\\') { @@ -899,21 +899,21 @@ } wfirstChar = wsrc + 1; } if (wfirstChar != wsrc) { - Tcl_AppendUnicodeToObj(resultPtr, wfirstChar, wsrc - wfirstChar); + TclAppendUnicodeToObj(resultPtr, wfirstChar, wsrc - wfirstChar); } if (end == 0) { /* * Always consume at least one character of the input string in * order to prevent infinite loops. */ if (offset < wlen) { - Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, 1); + TclAppendUnicodeToObj(resultPtr, wstring + offset, 1); } offset++; } else { offset += end; if (start == end) { @@ -921,11 +921,11 @@ * We matched an empty string, which means we must go forward * one more step so we don't match again at the same spot. */ if (offset < wlen) { - Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, 1); + TclAppendUnicodeToObj(resultPtr, wstring + offset, 1); } offset++; } } if (!all) { @@ -946,11 +946,11 @@ */ resultPtr = objv[1]; Tcl_IncrRefCount(resultPtr); } else if (offset < wlen) { - Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, wlen - offset); + TclAppendUnicodeToObj(resultPtr, wstring + offset, wlen - offset); } if (objc == 4) { if (Tcl_ObjSetVar2(interp, objv[3], NULL, resultPtr, TCL_LEAVE_ERR_MSG) == NULL) { result = TCL_ERROR; @@ -1314,11 +1314,11 @@ "needleString haystackString ?startIndex?"); return TCL_ERROR; } if (objc == 4) { - int size = Tcl_GetCharLength(objv[2]); + int size = TclGetCharLength(objv[2]); if (TCL_OK != TclGetIntForIndexM(interp, objv[3], size - 1, &start)) { return TCL_ERROR; } } @@ -1358,11 +1358,11 @@ "needleString haystackString ?lastIndex?"); return TCL_ERROR; } if (objc == 4) { - int size = Tcl_GetCharLength(objv[2]); + int size = TclGetCharLength(objv[2]); if (TCL_OK != TclGetIntForIndexM(interp, objv[3], size - 1, &last)) { return TCL_ERROR; } } @@ -1404,17 +1404,17 @@ /* * Get the char length to calculate what 'end' means. */ - length = Tcl_GetCharLength(objv[1]); + length = TclGetCharLength(objv[1]); if (TclGetIntForIndexM(interp, objv[2], length-1, &index) != TCL_OK) { return TCL_ERROR; } if ((index >= 0) && (index < length)) { - int ch = Tcl_GetUniChar(objv[1], index); + int ch = TclGetUniChar(objv[1], index); if (ch == -1) { return TCL_OK; } @@ -1472,11 +1472,11 @@ if (objc != 4) { Tcl_WrongNumArgs(interp, 1, objv, "string index insertString"); return TCL_ERROR; } - length = Tcl_GetCharLength(objv[1]); + length = TclGetCharLength(objv[1]); if (TclGetIntForIndexM(interp, objv[2], length, &index) != TCL_OK) { return TCL_ERROR; } if (index < 0) { @@ -1667,11 +1667,11 @@ while (TclIsSpaceProc(*p)) { p++; } TclNewStringObj(tmpStr, string1, p-string1); - failat = Tcl_GetCharLength(tmpStr); + failat = TclGetCharLength(tmpStr); TclDecrRefCount(tmpStr); break; } } } @@ -1847,11 +1847,11 @@ while (TclIsSpaceProcM(*p)) { p++; } TclNewStringObj(tmpStr, string1, p-string1); - failat = Tcl_GetCharLength(tmpStr); + failat = TclGetCharLength(tmpStr); TclDecrRefCount(tmpStr); break; } } } @@ -2058,27 +2058,27 @@ sourceObj = Tcl_DuplicateObj(objv[objc-1]); copySource = 1; } else { sourceObj = objv[objc-1]; } - ustring1 = Tcl_GetUnicodeFromObj(sourceObj, &length1); + ustring1 = TclGetUnicodeFromObj_(sourceObj, &length1); if (length1 == 0) { /* * Empty input string, just stop now. */ goto done; } end = ustring1 + length1; - strCmpFn = (nocase ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp); + strCmpFn = (nocase ? TclUniCharNcasecmp : TclUniCharNcmp); /* * Force result to be Unicode */ - resultPtr = Tcl_NewUnicodeObj(ustring1, 0); + resultPtr = TclNewUnicodeObj(ustring1, 0); if (mapElemc == 2) { /* * Special case for one map pair which avoids the extra for loop and * extra calls to get Unicode data. The algorithm is otherwise @@ -2087,35 +2087,35 @@ */ int mapLen, u2lc; Tcl_UniChar *mapString; - ustring2 = Tcl_GetUnicodeFromObj(mapElemv[0], &length2); + ustring2 = TclGetUnicodeFromObj_(mapElemv[0], &length2); p = ustring1; if ((length2 > length1) || (length2 == 0)) { /* * Match string is either longer than input or empty. */ ustring1 = end; } else { - mapString = Tcl_GetUnicodeFromObj(mapElemv[1], &mapLen); + mapString = TclGetUnicodeFromObj_(mapElemv[1], &mapLen); u2lc = (nocase ? Tcl_UniCharToLower(*ustring2) : 0); for (; ustring1 < end; ustring1++) { if (((*ustring1 == *ustring2) || (nocase&&Tcl_UniCharToLower(*ustring1)==u2lc)) && (length2==1 || strCmpFn(ustring1, ustring2, (unsigned long) length2) == 0)) { if (p != ustring1) { - Tcl_AppendUnicodeToObj(resultPtr, p, ustring1-p); + TclAppendUnicodeToObj(resultPtr, p, ustring1-p); p = ustring1 + length2; } else { p += length2; } ustring1 = p - 1; - Tcl_AppendUnicodeToObj(resultPtr, mapString, mapLen); + TclAppendUnicodeToObj(resultPtr, mapString, mapLen); } } } } else { Tcl_UniChar **mapStrings; @@ -2132,11 +2132,11 @@ mapLens = (int *)TclStackAlloc(interp, mapElemc * 2 * sizeof(int)); if (nocase) { u2lc = (int *)TclStackAlloc(interp, mapElemc * sizeof(int)); } for (index = 0; index < mapElemc; index++) { - mapStrings[index] = Tcl_GetUnicodeFromObj(mapElemv[index], + mapStrings[index] = TclGetUnicodeFromObj_(mapElemv[index], mapLens+index); if (nocase && ((index % 2) == 0)) { u2lc[index/2] = Tcl_UniCharToLower(*mapStrings[index]); } } @@ -2156,11 +2156,11 @@ if (p != ustring1) { /* * Put the skipped chars onto the result first. */ - Tcl_AppendUnicodeToObj(resultPtr, p, ustring1-p); + TclAppendUnicodeToObj(resultPtr, p, ustring1-p); p = ustring1 + length2; } else { p += length2; } @@ -2172,11 +2172,11 @@ /* * Append the map value to the unicode string. */ - Tcl_AppendUnicodeToObj(resultPtr, + TclAppendUnicodeToObj(resultPtr, mapStrings[index+1], mapLens[index+1]); break; } } } @@ -2189,11 +2189,11 @@ if (p != ustring1) { /* * Put the rest of the unmapped chars onto result. */ - Tcl_AppendUnicodeToObj(resultPtr, p, ustring1 - p); + TclAppendUnicodeToObj(resultPtr, p, ustring1 - p); } Tcl_SetObjResult(interp, resultPtr); done: if (mapWithDict) { TclStackFree(interp, mapElemv); @@ -2291,19 +2291,19 @@ /* * Get the length in actual characters; Then reduce it by one because * 'end' refers to the last character, not one past it. */ - length = Tcl_GetCharLength(objv[1]) - 1; + length = TclGetCharLength(objv[1]) - 1; if (TclGetIntForIndexM(interp, objv[2], length, &first) != TCL_OK || TclGetIntForIndexM(interp, objv[3], length, &last) != TCL_OK) { return TCL_ERROR; } if (last >= 0) { - Tcl_SetObjResult(interp, Tcl_GetRange(objv[1], first, last)); + Tcl_SetObjResult(interp, TclGetRange(objv[1], first, last)); } return TCL_OK; } /* @@ -2392,11 +2392,11 @@ if (objc < 4 || objc > 5) { Tcl_WrongNumArgs(interp, 1, objv, "string first last ?string?"); return TCL_ERROR; } - length = Tcl_GetCharLength(objv[1]); + length = TclGetCharLength(objv[1]); end = length - 1; if (TclGetIntForIndexM(interp, objv[2], end, &first) != TCL_OK || TclGetIntForIndexM(interp, objv[3], end, &last) != TCL_OK) { return TCL_ERROR; @@ -2504,11 +2504,11 @@ if (objc != 3) { Tcl_WrongNumArgs(interp, 1, objv, "string index"); return TCL_ERROR; } - string = Tcl_GetUnicodeFromObj(objv[1], &length); + string = TclGetUnicodeFromObj_(objv[1], &length); if (TclGetIntForIndexM(interp, objv[2], length-1, &index) != TCL_OK) { return TCL_ERROR; } if (index >= length) { index = length - 1; @@ -2574,11 +2574,11 @@ if (objc != 3) { Tcl_WrongNumArgs(interp, 1, objv, "string index"); return TCL_ERROR; } - string = Tcl_GetUnicodeFromObj(objv[1], &length); + string = TclGetUnicodeFromObj_(objv[1], &length); if (TclGetIntForIndexM(interp, objv[2], length-1, &index) != TCL_OK) { return TCL_ERROR; } if (index < 0) { index = 0; @@ -2878,11 +2878,11 @@ if (objc != 2) { Tcl_WrongNumArgs(interp, 1, objv, "string"); return TCL_ERROR; } - Tcl_SetObjResult(interp, Tcl_NewWideIntObj(Tcl_GetCharLength(objv[1]))); + Tcl_SetObjResult(interp, Tcl_NewWideIntObj(TclGetCharLength(objv[1]))); return TCL_OK; } /* *---------------------------------------------------------------------- @@ -2952,12 +2952,12 @@ Tcl_SetObjResult(interp, objv[1]); return TCL_OK; } string1 = TclGetStringFromObj(objv[1], &length1); - start = Tcl_UtfAtIndex(string1, first); - end = Tcl_UtfAtIndex(start, last - first + 1); + start = TclUtfAtIndex(string1, first); + end = TclUtfAtIndex(start, last - first + 1); resultPtr = Tcl_NewStringObj(string1, end - string1); string2 = TclGetString(resultPtr) + (start - string1); length2 = Tcl_UtfToLower(string2); Tcl_SetObjLength(resultPtr, length2 + (start - string1)); @@ -3037,12 +3037,12 @@ Tcl_SetObjResult(interp, objv[1]); return TCL_OK; } string1 = TclGetStringFromObj(objv[1], &length1); - start = Tcl_UtfAtIndex(string1, first); - end = Tcl_UtfAtIndex(start, last - first + 1); + start = TclUtfAtIndex(string1, first); + end = TclUtfAtIndex(start, last - first + 1); resultPtr = Tcl_NewStringObj(string1, end - string1); string2 = TclGetString(resultPtr) + (start - string1); length2 = Tcl_UtfToUpper(string2); Tcl_SetObjLength(resultPtr, length2 + (start - string1)); @@ -3122,12 +3122,12 @@ Tcl_SetObjResult(interp, objv[1]); return TCL_OK; } string1 = TclGetStringFromObj(objv[1], &length1); - start = Tcl_UtfAtIndex(string1, first); - end = Tcl_UtfAtIndex(start, last - first + 1); + start = TclUtfAtIndex(string1, first); + end = TclUtfAtIndex(start, last - first + 1); resultPtr = Tcl_NewStringObj(string1, end - string1); string2 = TclGetString(resultPtr) + (start - string1); length2 = Tcl_UtfToTitle(string2); Tcl_SetObjLength(resultPtr, length2 + (start - string1)); @@ -3788,11 +3788,11 @@ } if (matchVarObj != NULL) { Tcl_Obj *substringObj; - substringObj = Tcl_GetRange(stringObj, + substringObj = TclGetRange(stringObj, info.matches[j].start, info.matches[j].end-1); /* * Never fails; the object is always clean at this point. */ Index: generic/tclCompCmdsSZ.c ================================================================== --- generic/tclCompCmdsSZ.c +++ generic/tclCompCmdsSZ.c @@ -888,11 +888,11 @@ * something with backslashes). Just push the actual character (not * byte) length. */ char buf[TCL_INTEGER_SPACE]; - int len = Tcl_GetCharLength(objPtr); + int len = TclGetCharLength(objPtr); len = sprintf(buf, "%d", len); PushLiteral(envPtr, buf, len); } else { SetLineInformation(1); Index: generic/tclDecls.h ================================================================== --- generic/tclDecls.h +++ generic/tclDecls.h @@ -1064,12 +1064,12 @@ EXTERN int Tcl_UniCharIsWordChar(int ch); /* 352 */ EXTERN int Tcl_Char16Len(const unsigned short *uniStr); /* 353 */ TCL_DEPRECATED("Use Tcl_UtfNcmp") -int Tcl_UniCharNcmp(const Tcl_UniChar *ucs, - const Tcl_UniChar *uct, +int Tcl_UniCharNcmp(const unsigned short *ucs, + const unsigned short *uct, unsigned long numChars); /* 354 */ EXTERN char * Tcl_Char16ToUtfDString(const unsigned short *uniStr, int uniLength, Tcl_DString *dsPtr); /* 355 */ @@ -1140,28 +1140,27 @@ int offset, int nmatches, int flags); /* 377 */ EXTERN void Tcl_RegExpGetInfo(Tcl_RegExp regexp, Tcl_RegExpInfo *infoPtr); /* 378 */ -EXTERN Tcl_Obj * Tcl_NewUnicodeObj(const Tcl_UniChar *unicode, +EXTERN Tcl_Obj * Tcl_NewUnicodeObj(const unsigned short *unicode, int numChars); /* 379 */ EXTERN void Tcl_SetUnicodeObj(Tcl_Obj *objPtr, - const Tcl_UniChar *unicode, int numChars); + const unsigned short *unicode, int numChars); /* 380 */ EXTERN int Tcl_GetCharLength(Tcl_Obj *objPtr); /* 381 */ EXTERN int Tcl_GetUniChar(Tcl_Obj *objPtr, int index); /* 382 */ TCL_DEPRECATED("No longer in use, changed to macro") -Tcl_UniChar * Tcl_GetUnicode(Tcl_Obj *objPtr); +unsigned short * Tcl_GetUnicode(Tcl_Obj *objPtr); /* 383 */ EXTERN Tcl_Obj * Tcl_GetRange(Tcl_Obj *objPtr, int first, int last); /* 384 */ -TCL_DEPRECATED("Use Tcl_AppendStringsToObj") -void Tcl_AppendUnicodeToObj(Tcl_Obj *objPtr, - const Tcl_UniChar *unicode, int length); +EXTERN void Tcl_AppendUnicodeToObj(Tcl_Obj *objPtr, + const unsigned short *unicode, int length); /* 385 */ EXTERN int Tcl_RegExpMatchObj(Tcl_Interp *interp, Tcl_Obj *textObj, Tcl_Obj *patternObj); /* 386 */ EXTERN void Tcl_SetNotifier( @@ -1254,17 +1253,17 @@ EXTERN void Tcl_ClearChannelHandlers(Tcl_Channel channel); /* 418 */ EXTERN int Tcl_IsChannelExisting(const char *channelName); /* 419 */ TCL_DEPRECATED("Use Tcl_UtfNcasecmp") -int Tcl_UniCharNcasecmp(const Tcl_UniChar *ucs, - const Tcl_UniChar *uct, +int Tcl_UniCharNcasecmp(const unsigned short *ucs, + const unsigned short *uct, unsigned long numChars); /* 420 */ TCL_DEPRECATED("Use Tcl_StringCaseMatch") -int Tcl_UniCharCaseMatch(const Tcl_UniChar *uniStr, - const Tcl_UniChar *uniPattern, int nocase); +int Tcl_UniCharCaseMatch(const unsigned short *uniStr, + const unsigned short *uniPattern, int nocase); /* 421 */ EXTERN Tcl_HashEntry * Tcl_FindHashEntry(Tcl_HashTable *tablePtr, const void *key); /* 422 */ EXTERN Tcl_HashEntry * Tcl_CreateHashEntry(Tcl_HashTable *tablePtr, @@ -1302,11 +1301,11 @@ /* 432 */ EXTERN int Tcl_AttemptSetObjLength(Tcl_Obj *objPtr, int length); /* 433 */ EXTERN Tcl_ThreadId Tcl_GetChannelThread(Tcl_Channel channel); /* 434 */ -EXTERN Tcl_UniChar * Tcl_GetUnicodeFromObj(Tcl_Obj *objPtr, +EXTERN unsigned short * Tcl_GetUnicodeFromObj(Tcl_Obj *objPtr, int *lengthPtr); /* 435 */ TCL_DEPRECATED("") int Tcl_GetMathFuncInfo(Tcl_Interp *interp, const char *name, int *numArgsPtr, @@ -1928,11 +1927,11 @@ Tcl_Obj *objPtr, size_t *numBytesPtr); /* 651 */ EXTERN char * TclGetStringFromObj(Tcl_Obj *objPtr, size_t *lengthPtr); /* 652 */ -EXTERN Tcl_UniChar * TclGetUnicodeFromObj(Tcl_Obj *objPtr, +EXTERN unsigned short * TclGetUnicodeFromObj(Tcl_Obj *objPtr, size_t *lengthPtr); /* 653 */ EXTERN unsigned char * TclGetByteArrayFromObj(Tcl_Obj *objPtr, size_t *numBytesPtr); /* 654 */ @@ -1961,10 +1960,20 @@ /* Slot 665 is reserved */ /* Slot 666 is reserved */ /* Slot 667 is reserved */ /* 668 */ EXTERN int Tcl_UniCharLen(const int *uniStr); +/* 669 */ +EXTERN int TclNumUtfChars(const char *src, int length); +/* 670 */ +EXTERN int TclGetCharLength(Tcl_Obj *objPtr); +/* 671 */ +EXTERN const char * TclUtfAtIndex(const char *src, int index); +/* 672 */ +EXTERN Tcl_Obj * TclGetRange(Tcl_Obj *objPtr, int first, int last); +/* 673 */ +EXTERN int TclGetUniChar(Tcl_Obj *objPtr, int index); typedef struct { const struct TclPlatStubs *tclPlatStubs; const struct TclIntStubs *tclIntStubs; const struct TclIntPlatStubs *tclIntPlatStubs; @@ -2349,11 +2358,11 @@ int (*tcl_UniCharIsLower) (int ch); /* 348 */ int (*tcl_UniCharIsSpace) (int ch); /* 349 */ int (*tcl_UniCharIsUpper) (int ch); /* 350 */ int (*tcl_UniCharIsWordChar) (int ch); /* 351 */ int (*tcl_Char16Len) (const unsigned short *uniStr); /* 352 */ - TCL_DEPRECATED_API("Use Tcl_UtfNcmp") int (*tcl_UniCharNcmp) (const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsigned long numChars); /* 353 */ + TCL_DEPRECATED_API("Use Tcl_UtfNcmp") int (*tcl_UniCharNcmp) (const unsigned short *ucs, const unsigned short *uct, unsigned long numChars); /* 353 */ char * (*tcl_Char16ToUtfDString) (const unsigned short *uniStr, int uniLength, Tcl_DString *dsPtr); /* 354 */ unsigned short * (*tcl_UtfToChar16DString) (const char *src, int length, Tcl_DString *dsPtr); /* 355 */ Tcl_RegExp (*tcl_GetRegExpFromObj) (Tcl_Interp *interp, Tcl_Obj *patObj, int flags); /* 356 */ TCL_DEPRECATED_API("Use Tcl_EvalTokensStandard") Tcl_Obj * (*tcl_EvalTokens) (Tcl_Interp *interp, Tcl_Token *tokenPtr, int count); /* 357 */ void (*tcl_FreeParse) (Tcl_Parse *parsePtr); /* 358 */ @@ -2374,17 +2383,17 @@ int (*tcl_UniCharIsGraph) (int ch); /* 373 */ int (*tcl_UniCharIsPrint) (int ch); /* 374 */ int (*tcl_UniCharIsPunct) (int ch); /* 375 */ int (*tcl_RegExpExecObj) (Tcl_Interp *interp, Tcl_RegExp regexp, Tcl_Obj *textObj, int offset, int nmatches, int flags); /* 376 */ void (*tcl_RegExpGetInfo) (Tcl_RegExp regexp, Tcl_RegExpInfo *infoPtr); /* 377 */ - Tcl_Obj * (*tcl_NewUnicodeObj) (const Tcl_UniChar *unicode, int numChars); /* 378 */ - void (*tcl_SetUnicodeObj) (Tcl_Obj *objPtr, const Tcl_UniChar *unicode, int numChars); /* 379 */ + Tcl_Obj * (*tcl_NewUnicodeObj) (const unsigned short *unicode, int numChars); /* 378 */ + void (*tcl_SetUnicodeObj) (Tcl_Obj *objPtr, const unsigned short *unicode, int numChars); /* 379 */ int (*tcl_GetCharLength) (Tcl_Obj *objPtr); /* 380 */ int (*tcl_GetUniChar) (Tcl_Obj *objPtr, int index); /* 381 */ - TCL_DEPRECATED_API("No longer in use, changed to macro") Tcl_UniChar * (*tcl_GetUnicode) (Tcl_Obj *objPtr); /* 382 */ + TCL_DEPRECATED_API("No longer in use, changed to macro") unsigned short * (*tcl_GetUnicode) (Tcl_Obj *objPtr); /* 382 */ Tcl_Obj * (*tcl_GetRange) (Tcl_Obj *objPtr, int first, int last); /* 383 */ - TCL_DEPRECATED_API("Use Tcl_AppendStringsToObj") void (*tcl_AppendUnicodeToObj) (Tcl_Obj *objPtr, const Tcl_UniChar *unicode, int length); /* 384 */ + void (*tcl_AppendUnicodeToObj) (Tcl_Obj *objPtr, const unsigned short *unicode, int length); /* 384 */ int (*tcl_RegExpMatchObj) (Tcl_Interp *interp, Tcl_Obj *textObj, Tcl_Obj *patternObj); /* 385 */ void (*tcl_SetNotifier) (const Tcl_NotifierProcs *notifierProcPtr); /* 386 */ Tcl_Mutex * (*tcl_GetAllocMutex) (void); /* 387 */ int (*tcl_GetChannelNames) (Tcl_Interp *interp); /* 388 */ int (*tcl_GetChannelNamesEx) (Tcl_Interp *interp, const char *pattern); /* 389 */ @@ -2415,12 +2424,12 @@ int (*tcl_IsChannelRegistered) (Tcl_Interp *interp, Tcl_Channel channel); /* 414 */ void (*tcl_CutChannel) (Tcl_Channel channel); /* 415 */ void (*tcl_SpliceChannel) (Tcl_Channel channel); /* 416 */ void (*tcl_ClearChannelHandlers) (Tcl_Channel channel); /* 417 */ int (*tcl_IsChannelExisting) (const char *channelName); /* 418 */ - TCL_DEPRECATED_API("Use Tcl_UtfNcasecmp") int (*tcl_UniCharNcasecmp) (const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsigned long numChars); /* 419 */ - TCL_DEPRECATED_API("Use Tcl_StringCaseMatch") int (*tcl_UniCharCaseMatch) (const Tcl_UniChar *uniStr, const Tcl_UniChar *uniPattern, int nocase); /* 420 */ + TCL_DEPRECATED_API("Use Tcl_UtfNcasecmp") int (*tcl_UniCharNcasecmp) (const unsigned short *ucs, const unsigned short *uct, unsigned long numChars); /* 419 */ + TCL_DEPRECATED_API("Use Tcl_StringCaseMatch") int (*tcl_UniCharCaseMatch) (const unsigned short *uniStr, const unsigned short *uniPattern, int nocase); /* 420 */ Tcl_HashEntry * (*tcl_FindHashEntry) (Tcl_HashTable *tablePtr, const void *key); /* 421 */ Tcl_HashEntry * (*tcl_CreateHashEntry) (Tcl_HashTable *tablePtr, const void *key, int *newPtr); /* 422 */ void (*tcl_InitCustomHashTable) (Tcl_HashTable *tablePtr, int keyType, const Tcl_HashKeyType *typePtr); /* 423 */ void (*tcl_InitObjHashTable) (Tcl_HashTable *tablePtr); /* 424 */ ClientData (*tcl_CommandTraceInfo) (Tcl_Interp *interp, const char *varName, int flags, Tcl_CommandTraceProc *procPtr, ClientData prevClientData); /* 425 */ @@ -2430,11 +2439,11 @@ char * (*tcl_AttemptDbCkalloc) (unsigned int size, const char *file, int line); /* 429 */ char * (*tcl_AttemptRealloc) (char *ptr, unsigned int size); /* 430 */ char * (*tcl_AttemptDbCkrealloc) (char *ptr, unsigned int size, const char *file, int line); /* 431 */ int (*tcl_AttemptSetObjLength) (Tcl_Obj *objPtr, int length); /* 432 */ Tcl_ThreadId (*tcl_GetChannelThread) (Tcl_Channel channel); /* 433 */ - Tcl_UniChar * (*tcl_GetUnicodeFromObj) (Tcl_Obj *objPtr, int *lengthPtr); /* 434 */ + unsigned short * (*tcl_GetUnicodeFromObj) (Tcl_Obj *objPtr, int *lengthPtr); /* 434 */ TCL_DEPRECATED_API("") int (*tcl_GetMathFuncInfo) (Tcl_Interp *interp, const char *name, int *numArgsPtr, Tcl_ValueType **argTypesPtr, Tcl_MathProc **procPtr, ClientData *clientDataPtr); /* 435 */ TCL_DEPRECATED_API("") Tcl_Obj * (*tcl_ListMathFuncs) (Tcl_Interp *interp, const char *pattern); /* 436 */ Tcl_Obj * (*tcl_SubstObj) (Tcl_Interp *interp, Tcl_Obj *objPtr, int flags); /* 437 */ int (*tcl_DetachChannel) (Tcl_Interp *interp, Tcl_Channel channel); /* 438 */ int (*tcl_IsStandardChannel) (Tcl_Channel channel); /* 439 */ @@ -2648,11 +2657,11 @@ char * (*tcl_UniCharToUtfDString) (const int *uniStr, int uniLength, Tcl_DString *dsPtr); /* 647 */ int * (*tcl_UtfToUniCharDString) (const char *src, int length, Tcl_DString *dsPtr); /* 648 */ unsigned char * (*tclGetBytesFromObj) (Tcl_Interp *interp, Tcl_Obj *objPtr, int *numBytesPtr); /* 649 */ unsigned char * (*tcl_GetBytesFromObj) (Tcl_Interp *interp, Tcl_Obj *objPtr, size_t *numBytesPtr); /* 650 */ char * (*tclGetStringFromObj) (Tcl_Obj *objPtr, size_t *lengthPtr); /* 651 */ - Tcl_UniChar * (*tclGetUnicodeFromObj) (Tcl_Obj *objPtr, size_t *lengthPtr); /* 652 */ + unsigned short * (*tclGetUnicodeFromObj) (Tcl_Obj *objPtr, size_t *lengthPtr); /* 652 */ unsigned char * (*tclGetByteArrayFromObj) (Tcl_Obj *objPtr, size_t *numBytesPtr); /* 653 */ int (*tcl_UtfCharComplete) (const char *src, int length); /* 654 */ const char * (*tcl_UtfNext) (const char *src); /* 655 */ const char * (*tcl_UtfPrev) (const char *src, const char *start); /* 656 */ int (*tcl_UniCharIsUnicode) (int ch); /* 657 */ @@ -2665,10 +2674,15 @@ void (*reserved664)(void); void (*reserved665)(void); void (*reserved666)(void); void (*reserved667)(void); int (*tcl_UniCharLen) (const int *uniStr); /* 668 */ + int (*tclNumUtfChars) (const char *src, int length); /* 669 */ + int (*tclGetCharLength) (Tcl_Obj *objPtr); /* 670 */ + const char * (*tclUtfAtIndex) (const char *src, int index); /* 671 */ + Tcl_Obj * (*tclGetRange) (Tcl_Obj *objPtr, int first, int last); /* 672 */ + int (*tclGetUniChar) (Tcl_Obj *objPtr, int index); /* 673 */ } TclStubs; extern const TclStubs *tclStubsPtr; #ifdef __cplusplus @@ -4026,10 +4040,20 @@ /* Slot 665 is reserved */ /* Slot 666 is reserved */ /* Slot 667 is reserved */ #define Tcl_UniCharLen \ (tclStubsPtr->tcl_UniCharLen) /* 668 */ +#define TclNumUtfChars \ + (tclStubsPtr->tclNumUtfChars) /* 669 */ +#define TclGetCharLength \ + (tclStubsPtr->tclGetCharLength) /* 670 */ +#define TclUtfAtIndex \ + (tclStubsPtr->tclUtfAtIndex) /* 671 */ +#define TclGetRange \ + (tclStubsPtr->tclGetRange) /* 672 */ +#define TclGetUniChar \ + (tclStubsPtr->tclGetUniChar) /* 673 */ #endif /* defined(USE_TCL_STUBS) */ /* !END!: Do not edit above this line. */ Index: generic/tclEncoding.c ================================================================== --- generic/tclEncoding.c +++ generic/tclEncoding.c @@ -1336,11 +1336,11 @@ flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr, dstCharsPtr); if (*dstCharsPtr <= maxChars) { break; } - dstLen = Tcl_UtfAtIndex(dst, maxChars) - dst + (TCL_UTF_MAX - 1); + dstLen = TclUtfAtIndex(dst, maxChars) - dst + (TCL_UTF_MAX - 1); *statePtr = savedState; } while (1); if (!noTerminate) { /* ...and then append it */ Index: generic/tclExecute.c ================================================================== --- generic/tclExecute.c +++ generic/tclExecute.c @@ -5242,11 +5242,11 @@ (match < 0 ? -1 : match > 0 ? 1 : 0))); JUMP_PEEPHOLE_F(match, 1, 2); case INST_STR_LEN: valuePtr = OBJ_AT_TOS; - length = Tcl_GetCharLength(valuePtr); + length = TclGetCharLength(valuePtr); TclNewIntObj(objResultPtr, length); TRACE(("\"%.20s\" => %d\n", O2S(valuePtr), length)); NEXT_INST_F(1, 1, 1); case INST_STR_UPPER: @@ -5308,11 +5308,11 @@ /* * Get char length to calulate what 'end' means. */ - length = Tcl_GetCharLength(valuePtr); + length = TclGetCharLength(valuePtr); DECACHE_STACK_INFO(); if (TclGetIntForIndexM(interp, value2Ptr, length-1, &index)!=TCL_OK) { CACHE_STACK_INFO(); TRACE_ERROR(interp); goto gotError; @@ -5327,11 +5327,11 @@ } else if (valuePtr->bytes && length == valuePtr->length) { objResultPtr = Tcl_NewStringObj((const char *) valuePtr->bytes+index, 1); } else { char buf[4] = ""; - int ch = Tcl_GetUniChar(valuePtr, index); + int ch = TclGetUniChar(valuePtr, index); /* * This could be: Tcl_NewUnicodeObj((const Tcl_UniChar *)&ch, 1) * but creating the object as a string seems to be faster in * practical use. @@ -5351,11 +5351,11 @@ NEXT_INST_F(1, 2, 1); case INST_STR_RANGE: TRACE(("\"%.20s\" %.20s %.20s =>", O2S(OBJ_AT_DEPTH(2)), O2S(OBJ_UNDER_TOS), O2S(OBJ_AT_TOS))); - length = Tcl_GetCharLength(OBJ_AT_DEPTH(2)) - 1; + length = TclGetCharLength(OBJ_AT_DEPTH(2)) - 1; DECACHE_STACK_INFO(); if (TclGetIntForIndexM(interp, OBJ_UNDER_TOS, length, &fromIdx) != TCL_OK) { CACHE_STACK_INFO(); @@ -5371,20 +5371,20 @@ CACHE_STACK_INFO(); if (toIdx < 0) { TclNewObj(objResultPtr); } else { - objResultPtr = Tcl_GetRange(OBJ_AT_DEPTH(2), fromIdx, toIdx); + objResultPtr = TclGetRange(OBJ_AT_DEPTH(2), fromIdx, toIdx); } TRACE_APPEND(("\"%.30s\"\n", O2S(objResultPtr))); NEXT_INST_V(1, 3, 1); case INST_STR_RANGE_IMM: valuePtr = OBJ_AT_TOS; fromIdx = TclGetInt4AtPtr(pc+1); toIdx = TclGetInt4AtPtr(pc+5); - length = Tcl_GetCharLength(valuePtr); + length = TclGetCharLength(valuePtr); TRACE(("\"%.20s\" %d %d => ", O2S(valuePtr), fromIdx, toIdx)); /* Every range of an empty value is an empty value */ if (length == 0) { TRACE_APPEND(("\n")); @@ -5412,11 +5412,11 @@ } fromIdx = TclIndexDecode(fromIdx, length - 1); if (toIdx < 0) { TclNewObj(objResultPtr); } else { - objResultPtr = Tcl_GetRange(valuePtr, fromIdx, toIdx); + objResultPtr = TclGetRange(valuePtr, fromIdx, toIdx); } } TRACE_APPEND(("%.30s\n", O2S(objResultPtr))); NEXT_INST_F(9, 1, 1); @@ -5426,11 +5426,11 @@ Tcl_Obj *value3Ptr; case INST_STR_REPLACE: value3Ptr = POP_OBJECT(); valuePtr = OBJ_AT_DEPTH(2); - endIdx = Tcl_GetCharLength(valuePtr) - 1; + endIdx = TclGetCharLength(valuePtr) - 1; TRACE(("\"%.20s\" %s %s \"%.20s\" => ", O2S(valuePtr), O2S(OBJ_UNDER_TOS), O2S(OBJ_AT_TOS), O2S(value3Ptr))); DECACHE_STACK_INFO(); if (TclGetIntForIndexM(interp, OBJ_UNDER_TOS, endIdx, &fromIdx) != TCL_OK @@ -5493,16 +5493,16 @@ goto doneStringMap; } else if (valuePtr == value2Ptr) { objResultPtr = value3Ptr; goto doneStringMap; } - ustring1 = Tcl_GetUnicodeFromObj(valuePtr, &length); + ustring1 = TclGetUnicodeFromObj_(valuePtr, &length); if (length == 0) { objResultPtr = valuePtr; goto doneStringMap; } - ustring2 = Tcl_GetUnicodeFromObj(value2Ptr, &length2); + ustring2 = TclGetUnicodeFromObj_(value2Ptr, &length2); if (length2 > length || length2 == 0) { objResultPtr = valuePtr; goto doneStringMap; } else if (length2 == length) { if (memcmp(ustring1, ustring2, sizeof(Tcl_UniChar) * length)) { @@ -5510,38 +5510,38 @@ } else { objResultPtr = value3Ptr; } goto doneStringMap; } - ustring3 = Tcl_GetUnicodeFromObj(value3Ptr, &length3); + ustring3 = TclGetUnicodeFromObj_(value3Ptr, &length3); - objResultPtr = Tcl_NewUnicodeObj(ustring1, 0); + objResultPtr = TclNewUnicodeObj(ustring1, 0); p = ustring1; end = ustring1 + length; for (; ustring1 < end; ustring1++) { if ((*ustring1 == *ustring2) && /* Fix bug [69218ab7b]: restrict max compare length. */ (end-ustring1 >= length2) && (length2==1 || memcmp(ustring1, ustring2, sizeof(Tcl_UniChar) * length2) == 0)) { if (p != ustring1) { - Tcl_AppendUnicodeToObj(objResultPtr, p, ustring1-p); + TclAppendUnicodeToObj(objResultPtr, p, ustring1-p); p = ustring1 + length2; } else { p += length2; } ustring1 = p - 1; - Tcl_AppendUnicodeToObj(objResultPtr, ustring3, length3); + TclAppendUnicodeToObj(objResultPtr, ustring3, length3); } } if (p != ustring1) { /* * Put the rest of the unmapped chars onto result. */ - Tcl_AppendUnicodeToObj(objResultPtr, p, ustring1 - p); + TclAppendUnicodeToObj(objResultPtr, p, ustring1 - p); } doneStringMap: TRACE_WITH_OBJ(("%.20s %.20s %.20s => ", O2S(value2Ptr), O2S(value3Ptr), O2S(valuePtr)), objResultPtr); NEXT_INST_V(1, 3, 1); @@ -5563,11 +5563,11 @@ case INST_STR_CLASS: opnd = TclGetInt1AtPtr(pc+1); valuePtr = OBJ_AT_TOS; TRACE(("%s \"%.30s\" => ", tclStringClassTable[opnd].name, O2S(valuePtr))); - ustring1 = Tcl_GetUnicodeFromObj(valuePtr, &length); + ustring1 = TclGetUnicodeFromObj_(valuePtr, &length); match = 1; if (length > 0) { int ch; end = ustring1 + length; for (p=ustring1 ; pflags = savedFlags; statePtr->inputEncodingFlags = savedIEFlags; statePtr->inputEncodingState = savedState; continue; } Index: generic/tclInt.h ================================================================== --- generic/tclInt.h +++ generic/tclInt.h @@ -2769,10 +2769,11 @@ MODULE_SCOPE const Tcl_ObjType tclIntType; MODULE_SCOPE const Tcl_ObjType tclListType; MODULE_SCOPE const Tcl_ObjType tclDictType; MODULE_SCOPE const Tcl_ObjType tclProcBodyType; MODULE_SCOPE const Tcl_ObjType tclStringType; +MODULE_SCOPE const Tcl_ObjType tclUniCharStringType; MODULE_SCOPE const Tcl_ObjType tclEnsembleCmdType; MODULE_SCOPE const Tcl_ObjType tclRegexpType; MODULE_SCOPE Tcl_ObjType tclCmdNameType; /* @@ -3319,10 +3320,48 @@ MODULE_SCOPE void TclErrorStackResetIf(Tcl_Interp *interp, const char *msg, int length); /* Tip 430 */ MODULE_SCOPE int TclZipfs_Init(Tcl_Interp *interp); + +#if TCL_UTF_MAX > 3 + MODULE_SCOPE int *TclGetUnicodeFromObj_(Tcl_Obj *, int *); + MODULE_SCOPE Tcl_Obj *TclNewUnicodeObj(const int *, int); + MODULE_SCOPE void TclAppendUnicodeToObj(Tcl_Obj *, const int *, int); + MODULE_SCOPE int TclUniCharNcasecmp(const int *, const int *, unsigned long); + MODULE_SCOPE int TclUniCharCaseMatch(const int *, const int *, int); + MODULE_SCOPE int TclUniCharNcmp(const int *, const int *, unsigned long); +# undef Tcl_NumUtfChars +# define Tcl_NumUtfChars TclNumUtfChars +# undef Tcl_GetCharLength +# define Tcl_GetCharLength TclGetCharLength +# undef Tcl_UtfAtIndex +# define Tcl_UtfAtIndex TclUtfAtIndex +# undef Tcl_GetRange +# define Tcl_GetRange TclGetRange +# undef Tcl_GetUniChar +# define Tcl_GetUniChar TclGetUniChar +#else +# define tclUniCharStringType tclStringType +# define TclGetUnicodeFromObj_ Tcl_GetUnicodeFromObj +# define TclNewUnicodeObj Tcl_NewUnicodeObj +# define TclAppendUnicodeToObj Tcl_AppendUnicodeToObj +# define TclUniCharNcasecmp Tcl_UniCharNcasecmp +# define TclUniCharCaseMatch Tcl_UniCharCaseMatch +# define TclUniCharNcmp Tcl_UniCharNcmp +# undef TclNumUtfChars +# define TclNumUtfChars Tcl_NumUtfChars +# undef TclGetCharLength +# define TclGetCharLength Tcl_GetCharLength +# undef TclUtfAtIndex +# define TclUtfAtIndex Tcl_UtfAtIndex +# undef TclGetRange +# define TclGetRange Tcl_GetRange +# undef TclGetUniChar +# define TclGetUniChar Tcl_GetUniChar +#endif + /* * Many parsing tasks need a common definition of whitespace. * Use this routine and macro to achieve that and place * optimization (fragile on changes) in one place. @@ -4723,12 +4762,12 @@ (((UCHAR(*(str))) < 0x80) ? \ ((*(chPtr) = UCHAR(*(str))), 1) \ : Tcl_UtfToUniChar(str, chPtr)) #else #define TclUtfToUniChar(str, chPtr) \ - ((((unsigned char) *(str)) < 0x80) ? \ - ((*(chPtr) = (unsigned char) *(str)), 1) \ + (((UCHAR(*(str))) < 0x80) ? \ + ((*(chPtr) = UCHAR(*(str))), 1) \ : Tcl_UtfToChar16(str, chPtr)) #endif /* *---------------------------------------------------------------- @@ -4740,18 +4779,18 @@ * MODULE_SCOPE void TclNumUtfChars(int numChars, const char *bytes, * int numBytes); *---------------------------------------------------------------- */ -#define TclNumUtfChars(numChars, bytes, numBytes) \ +#define TclNumUtfCharsM(numChars, bytes, numBytes) \ do { \ int _count, _i = (numBytes); \ unsigned char *_str = (unsigned char *) (bytes); \ while (_i && (*_str < 0xC0)) { _i--; _str++; } \ _count = (numBytes) - _i; \ if (_i) { \ - _count += Tcl_NumUtfChars((bytes) + _count, _i); \ + _count += TclNumUtfChars((bytes) + _count, _i); \ } \ (numChars) = _count; \ } while (0); /* @@ -4776,28 +4815,10 @@ ((objPtr)->typePtr == (type)) #define TclFetchInternalRep(objPtr, type) \ (TclHasInternalRep((objPtr), (type)) ? &((objPtr)->internalRep) : NULL) -/* - *---------------------------------------------------------------- - * Macro used by the Tcl core to compare Unicode strings. On big-endian - * systems we can use the more efficient memcmp, but this would not be - * lexically correct on little-endian systems. The ANSI C "prototype" for - * this macro is: - * - * MODULE_SCOPE int TclUniCharNcmp(const Tcl_UniChar *cs, - * const Tcl_UniChar *ct, unsigned long n); - *---------------------------------------------------------------- - */ - -#if defined(WORDS_BIGENDIAN) && (TCL_UTF_MAX > 3) -# define TclUniCharNcmp(cs,ct,n) memcmp((cs),(ct),(n)*sizeof(Tcl_UniChar)) -#else /* !WORDS_BIGENDIAN */ -# define TclUniCharNcmp Tcl_UniCharNcmp -#endif /* WORDS_BIGENDIAN */ - /* *---------------------------------------------------------------- * Macro used by the Tcl core to increment a namespace's export epoch * counter. The ANSI C "prototype" for this macro is: * Index: generic/tclObj.c ================================================================== --- generic/tclObj.c +++ generic/tclObj.c @@ -385,11 +385,13 @@ Tcl_InitHashTable(&typeTable, TCL_STRING_KEYS); Tcl_MutexUnlock(&tableMutex); Tcl_RegisterObjType(&tclByteArrayType); Tcl_RegisterObjType(&tclDoubleType); +#if (TCL_UTF_MAX < 4) || !defined(TCL_NO_DEPRECATED) Tcl_RegisterObjType(&tclStringType); +#endif Tcl_RegisterObjType(&tclListType); Tcl_RegisterObjType(&tclDictType); Tcl_RegisterObjType(&tclByteCodeType); Tcl_RegisterObjType(&tclCmdNameType); Tcl_RegisterObjType(&tclRegexpType); Index: generic/tclProc.c ================================================================== --- generic/tclProc.c +++ generic/tclProc.c @@ -527,11 +527,11 @@ Tcl_SetObjResult(interp, errorObj); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "PROC", "FORMALARGUMENTFORMAT", NULL); goto procError; } - if ((fieldCount == 0) || (Tcl_GetCharLength(fieldValues[0]) == 0)) { + if ((fieldCount == 0) || (TclGetCharLength(fieldValues[0]) == 0)) { Tcl_SetObjResult(interp, Tcl_NewStringObj( "argument with no name", -1)); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "PROC", "FORMALARGUMENTFORMAT", NULL); goto procError; Index: generic/tclRegexp.c ================================================================== --- generic/tclRegexp.c +++ generic/tclRegexp.c @@ -269,12 +269,12 @@ if (regexpPtr->objPtr) { string = TclGetString(regexpPtr->objPtr); } else { string = regexpPtr->string; } - *startPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_so); - *endPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_eo); + *startPtr = TclUtfAtIndex(string, regexpPtr->matches[index].rm_so); + *endPtr = TclUtfAtIndex(string, regexpPtr->matches[index].rm_eo); } } /* *--------------------------------------------------------------------------- @@ -480,11 +480,11 @@ */ regexpPtr->string = NULL; regexpPtr->objPtr = textObj; - udata = Tcl_GetUnicodeFromObj(textObj, &length); + udata = TclGetUnicodeFromObj_(textObj, &length); if (offset > length) { offset = length; } udata += offset; Index: generic/tclStringObj.c ================================================================== --- generic/tclStringObj.c +++ generic/tclStringObj.c @@ -67,10 +67,16 @@ static int SetStringFromAny(Tcl_Interp *interp, Tcl_Obj *objPtr); static void SetUnicodeObj(Tcl_Obj *objPtr, const Tcl_UniChar *unicode, int numChars); static int UnicodeLength(const Tcl_UniChar *unicode); static void UpdateStringOfString(Tcl_Obj *objPtr); +#if (TCL_UTF_MAX) > 3 && !defined(TCL_NO_DEPRECATED) +static void DupUTF16StringInternalRep(Tcl_Obj *objPtr, + Tcl_Obj *copyPtr); +static int SetUTF16StringFromAny(Tcl_Interp *interp, Tcl_Obj *objPtr); +static void UpdateStringOfUTF16String(Tcl_Obj *objPtr); +#endif #define ISCONTINUATION(bytes) (\ ((((bytes)[0] & 0xC0) == 0x80) || (((bytes)[0] == '\xED') \ && (((bytes)[1] & 0xF0) == 0xB0) && (((bytes)[2] & 0xC0) == 0x80)))) @@ -78,18 +84,174 @@ /* * The structure below defines the string Tcl object type by means of * functions that can be invoked by generic object code. */ +#if TCL_UTF_MAX < 4 + +#define tclUniCharStringType tclStringType +#define GET_UNICHAR_STRING GET_STRING +#define UniCharString String +#define UNICHAR_STRING_MAXCHARS STRING_MAXCHARS +#define uniCharStringAlloc stringAlloc +#define uniCharStringRealloc stringRealloc +#define uniCharStringAttemptAlloc stringAttemptAlloc +#define uniCharStringAttemptRealloc stringAttemptRealloc +#define uniCharStringCheckLimits stringCheckLimits +#define SET_UNICHAR_STRING SET_STRING +#define UNICHAR_STRING_SIZE STRING_SIZE + +const Tcl_ObjType tclStringType = { + "string", /* name */ + FreeStringInternalRep, /* freeIntRepPro */ + DupStringInternalRep, /* dupIntRepProc */ + UpdateStringOfString, /* updateStringProc */ + SetStringFromAny /* setFromAnyProc */ +}; + +#else + +#ifndef TCL_NO_DEPRECATED const Tcl_ObjType tclStringType = { "string", /* name */ + FreeStringInternalRep, /* freeIntRepPro */ + DupUTF16StringInternalRep, /* dupIntRepProc */ + UpdateStringOfUTF16String, /* updateStringProc */ + SetUTF16StringFromAny /* setFromAnyProc */ +}; +#endif + +const Tcl_ObjType tclUniCharStringType = { + "utf32string", /* name */ FreeStringInternalRep, /* freeIntRepPro */ DupStringInternalRep, /* dupIntRepProc */ UpdateStringOfString, /* updateStringProc */ SetStringFromAny /* setFromAnyProc */ }; - + +typedef struct { + int numChars; /* The number of chars in the string. -1 means + * this value has not been calculated. >= 0 + * means that there is a valid Unicode rep, or + * that the number of UTF bytes == the number + * of chars. */ + int allocated; /* The amount of space actually allocated for + * the UTF string (minus 1 byte for the + * termination char). */ + int maxChars; /* Max number of chars that can fit in the + * space allocated for the unicode array. */ + int hasUnicode; /* Boolean determining whether the string has + * a Unicode representation. */ + Tcl_UniChar unicode[TCLFLEXARRAY]; /* The array of Unicode chars. The actual size + * of this field depends on the 'maxChars' + * field above. */ +} UniCharString; + +#define UNICHAR_STRING_MAXCHARS \ + (int)(((size_t)UINT_MAX - offsetof(UniCharString, unicode))/sizeof(Tcl_UniChar) - 1) +#define UNICHAR_STRING_SIZE(numChars) \ + (offsetof(UniCharString, unicode) + sizeof(Tcl_UniChar) + ((numChars) * sizeof(Tcl_UniChar))) +#define uniCharStringCheckLimits(numChars) \ + do { \ + if ((numChars) < 0 || (numChars) > UNICHAR_STRING_MAXCHARS) { \ + Tcl_Panic("max length for a Tcl unicode value (%d chars) exceeded", \ + UNICHAR_STRING_MAXCHARS); \ + } \ + } while (0) +#define uniCharStringAttemptAlloc(numChars) \ + (UniCharString *) attemptckalloc(UNICHAR_STRING_SIZE(numChars)) +#define uniCharStringAlloc(numChars) \ + (UniCharString *) ckalloc(UNICHAR_STRING_SIZE(numChars)) +#define uniCharStringRealloc(ptr, numChars) \ + (UniCharString *) ckrealloc((ptr), UNICHAR_STRING_SIZE(numChars)) +#define uniCharStringAttemptRealloc(ptr, numChars) \ + (UniCharString *) attemptckrealloc((ptr), UNICHAR_STRING_SIZE(numChars)) +#define GET_UNICHAR_STRING(objPtr) \ + ((UniCharString *) (objPtr)->internalRep.twoPtrValue.ptr1) +#define SET_UNICHAR_STRING(objPtr, stringPtr) \ + ((objPtr)->internalRep.twoPtrValue.ptr2 = NULL), \ + ((objPtr)->internalRep.twoPtrValue.ptr1 = (void *) (stringPtr)) + + +#ifndef TCL_NO_DEPRECATED +static void +DupUTF16StringInternalRep( + Tcl_Obj *srcPtr, /* Object with internal rep to copy. Must have + * an internal rep of type "String". */ + Tcl_Obj *copyPtr) /* Object with internal rep to set. Must not + * currently have an internal rep.*/ +{ + String *srcStringPtr = GET_STRING(srcPtr); + size_t size = offsetof(String, unicode) + (((srcStringPtr->allocated) + 1U) * sizeof(unsigned short)); + String *copyStringPtr = (String *)ckalloc(size); + memcpy(copyStringPtr, srcStringPtr, size); + + SET_STRING(copyPtr, copyStringPtr); + copyPtr->typePtr = &tclStringType; +} + +static int +SetUTF16StringFromAny( + TCL_UNUSED(Tcl_Interp *), + Tcl_Obj *objPtr) /* The object to convert. */ +{ + if (!TclHasInternalRep(objPtr, &tclStringType)) { + Tcl_DString ds; + + /* + * Convert whatever we have into an untyped value. Just A String. + */ + + (void) TclGetString(objPtr); + TclFreeInternalRep(objPtr); + + /* + * Create a basic String internalrep that just points to the UTF-8 string + * already in place at objPtr->bytes. + */ + + Tcl_DStringInit(&ds); + unsigned short *utf16string = Tcl_UtfToChar16DString(objPtr->bytes, objPtr->length, &ds); + int size = Tcl_DStringLength(&ds); + String *stringPtr = (String *)ckalloc((offsetof(String, unicode) + sizeof(unsigned short)) + size); + + memcpy(stringPtr->unicode, utf16string, size); + Tcl_DStringFree(&ds); + size /= sizeof(unsigned short); + stringPtr->unicode[size] = 0; + + stringPtr->numChars = size; + stringPtr->allocated = size; + stringPtr->maxChars = size; + stringPtr->hasUnicode = 1; + SET_STRING(objPtr, stringPtr); + objPtr->typePtr = &tclStringType; + } + return TCL_OK; +} + +static void +UpdateStringOfUTF16String( + Tcl_Obj *objPtr) /* Object with string rep to update. */ +{ + Tcl_DString ds; + String *stringPtr = GET_STRING(objPtr); + + Tcl_DStringInit(&ds); + const char *string = Tcl_Char16ToUtfDString(stringPtr->unicode, stringPtr->numChars, &ds); + + char *bytes = (char *)ckalloc(Tcl_DStringLength(&ds) + 1U); + memcpy(bytes, string, Tcl_DStringLength(&ds)); + bytes[Tcl_DStringLength(&ds)] = 0; + objPtr->bytes = bytes; + objPtr->length = Tcl_DStringLength(&ds); + Tcl_DStringFree(&ds); +} +#endif + +#endif + /* * TCL STRING GROWTH ALGORITHM * * When growing strings (during an append, for example), the following growth * algorithm is used: @@ -136,11 +298,11 @@ * objPtr->typePtr == &tclStringType * needed > stringPtr->allocated * flag || objPtr->bytes != NULL */ - String *stringPtr = GET_STRING(objPtr); + UniCharString *stringPtr = GET_UNICHAR_STRING(objPtr); char *ptr = NULL; int attempt; if (objPtr->bytes == &tclEmptyString) { objPtr->bytes = NULL; @@ -183,51 +345,51 @@ { /* * Pre-conditions: * objPtr->typePtr == &tclStringType * needed > stringPtr->maxChars - * needed < STRING_MAXCHARS + * needed < UNICHAR_STRING_MAXCHARS */ - String *ptr = NULL, *stringPtr = GET_STRING(objPtr); + UniCharString *ptr = NULL, *stringPtr = GET_UNICHAR_STRING(objPtr); int attempt; if (stringPtr->maxChars > 0) { /* * Subsequent appends - apply the growth algorithm. */ - if (needed <= STRING_MAXCHARS / 2) { + if (needed <= UNICHAR_STRING_MAXCHARS / 2) { attempt = 2 * needed; - ptr = stringAttemptRealloc(stringPtr, attempt); + ptr = uniCharStringAttemptRealloc(stringPtr, attempt); } if (ptr == NULL) { /* * Take care computing the amount of modest growth to avoid * overflow into invalid argument values for attempt. */ - unsigned int limit = STRING_MAXCHARS - needed; + unsigned int limit = UNICHAR_STRING_MAXCHARS - needed; unsigned int extra = needed - stringPtr->numChars + TCL_MIN_UNICHAR_GROWTH; int growth = (int) ((extra > limit) ? limit : extra); attempt = needed + growth; - ptr = stringAttemptRealloc(stringPtr, attempt); + ptr = uniCharStringAttemptRealloc(stringPtr, attempt); } } if (ptr == NULL) { /* * First allocation - just big enough; or last chance fallback. */ attempt = needed; - ptr = stringRealloc(stringPtr, attempt); + ptr = uniCharStringRealloc(stringPtr, attempt); } stringPtr = ptr; stringPtr->maxChars = attempt; - SET_STRING(objPtr, stringPtr); + SET_UNICHAR_STRING(objPtr, stringPtr); } /* *---------------------------------------------------------------------- * @@ -372,11 +534,11 @@ * *--------------------------------------------------------------------------- */ Tcl_Obj * -Tcl_NewUnicodeObj( +TclNewUnicodeObj( const Tcl_UniChar *unicode, /* The unicode string used to initialize the * new object. */ int numChars) /* Number of characters in the unicode * string. */ { @@ -385,10 +547,39 @@ TclNewObj(objPtr); SetUnicodeObj(objPtr, unicode, numChars); return objPtr; } +#if (TCL_UTF_MAX > 3) && !defined(TCL_NO_DEPRECATED) +Tcl_Obj * +Tcl_NewUnicodeObj( + const unsigned short *unicode, /* The unicode string used to initialize the + * new object. */ + int numChars) /* Number of characters in the unicode + * string. */ +{ + Tcl_Obj *objPtr; + + TclNewObj(objPtr); + TclInvalidateStringRep(objPtr); + + String *stringPtr = (String *)ckalloc((offsetof(String, unicode) + + sizeof(unsigned short)) + numChars * sizeof(unsigned short)); + memcpy(stringPtr->unicode, unicode, numChars); + stringPtr->unicode[numChars] = 0; + + stringPtr->numChars = numChars; + stringPtr->allocated = numChars; + stringPtr->maxChars = numChars; + stringPtr->hasUnicode = 1; + SET_STRING(objPtr, stringPtr); + objPtr->typePtr = &tclStringType; + + return objPtr; +} +#endif + /* *---------------------------------------------------------------------- * * Tcl_GetCharLength -- * @@ -403,15 +594,15 @@ * *---------------------------------------------------------------------- */ int -Tcl_GetCharLength( +TclGetCharLength( Tcl_Obj *objPtr) /* The String object to get the num chars * of. */ { - String *stringPtr; + UniCharString *stringPtr; int numChars; /* * Quick, no-shimmer return for short string reps. */ @@ -442,24 +633,64 @@ /* * OK, need to work with the object as a string. */ SetStringFromAny(NULL, objPtr); - stringPtr = GET_STRING(objPtr); + stringPtr = GET_UNICHAR_STRING(objPtr); numChars = stringPtr->numChars; /* * If numChars is unknown, compute it. */ if (numChars == -1) { - TclNumUtfChars(numChars, objPtr->bytes, objPtr->length); + TclNumUtfCharsM(numChars, objPtr->bytes, objPtr->length); stringPtr->numChars = numChars; } return numChars; } +#if (TCL_UTF_MAX > 3) && !defined(TCL_NO_DEPRECATED) +#undef Tcl_GetCharLength +int +Tcl_GetCharLength( + Tcl_Obj *objPtr) /* The String object to get the num chars + * of. */ +{ + int numChars; + + /* + * Quick, no-shimmer return for short string reps. + */ + + if ((objPtr->bytes) && (objPtr->length < 2)) { + /* 0 bytes -> 0 chars; 1 byte -> 1 char */ + return objPtr->length; + } + + /* + * Optimize the case where we're really dealing with a bytearray object; + * we don't need to convert to a string to perform the get-length operation. + * + * Starting in Tcl 8.7, we check for a "pure" bytearray, because the + * machinery behind that test is using a proper bytearray ObjType. We + * could also compute length of an improper bytearray without shimmering + * but there's no value in that. We *want* to shimmer an improper bytearray + * because improper bytearrays have worthless internal reps. + */ + + if (TclIsPureByteArray(objPtr)) { + + (void) Tcl_GetByteArrayFromObj(objPtr, &numChars); + } else { + Tcl_GetString(objPtr); + numChars = Tcl_NumUtfChars(objPtr->bytes, objPtr->length); + } + return numChars; +} +#endif + /* *---------------------------------------------------------------------- * * TclCheckEmptyString -- * @@ -516,10 +747,12 @@ * Fills unichar with the index'th Unicode character. * *---------------------------------------------------------------------- */ +#if (TCL_UTF_MAX > 3) && !defined(TCL_NO_DEPRECATED) +#undef Tcl_GetUniChar int Tcl_GetUniChar( Tcl_Obj *objPtr, /* The object to get the Unicode charater * from. */ int index) /* Get the index'th Unicode character. */ @@ -547,26 +780,82 @@ /* * OK, need to work with the object as a string. */ - SetStringFromAny(NULL, objPtr); + SetUTF16StringFromAny(NULL, objPtr); stringPtr = GET_STRING(objPtr); + if (index >= stringPtr->numChars) { + return -1; + } + ch = stringPtr->unicode[index]; + /* See: bug [11ae2be95dac9417] */ + if ((ch & 0xF800) == 0xD800) { + if (ch & 0x400) { + if ((index > 0) + && ((stringPtr->unicode[index-1] & 0xFC00) == 0xD800)) { + ch = -1; /* low surrogate preceded by high surrogate */ + } + } else if ((++index < stringPtr->numChars) + && ((stringPtr->unicode[index] & 0xFC00) == 0xDC00)) { + /* high surrogate followed by low surrogate */ + ch = (((ch & 0x3FF) << 10) | + (stringPtr->unicode[index] & 0x3FF)) + 0x10000; + } + } + return ch; +} +#endif + +int +TclGetUniChar( + Tcl_Obj *objPtr, /* The object to get the Unicode charater + * from. */ + int index) /* Get the index'th Unicode character. */ +{ + UniCharString *stringPtr; + int ch, length; + + if (index < 0) { + return -1; + } + + /* + * Optimize the case where we're really dealing with a bytearray object + * we don't need to convert to a string to perform the indexing operation. + */ + + if (TclIsPureByteArray(objPtr)) { + unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &length); + if (index >= length) { + return -1; + } + + return (int) bytes[index]; + } + + /* + * OK, need to work with the object as a string. + */ + + SetStringFromAny(NULL, objPtr); + stringPtr = GET_UNICHAR_STRING(objPtr); + if (stringPtr->hasUnicode == 0) { /* * If numChars is unknown, compute it. */ if (stringPtr->numChars == -1) { - TclNumUtfChars(stringPtr->numChars, objPtr->bytes, objPtr->length); + TclNumUtfCharsM(stringPtr->numChars, objPtr->bytes, objPtr->length); } if (stringPtr->numChars == objPtr->length) { return (unsigned char) objPtr->bytes[index]; } FillUnicodeRep(objPtr); - stringPtr = GET_STRING(objPtr); + stringPtr = GET_UNICHAR_STRING(objPtr); } if (index >= stringPtr->numChars) { return -1; } @@ -610,16 +899,16 @@ */ #undef Tcl_GetUnicodeFromObj #ifndef TCL_NO_DEPRECATED #undef Tcl_GetUnicode -Tcl_UniChar * +unsigned short * Tcl_GetUnicode( Tcl_Obj *objPtr) /* The object to find the unicode string * for. */ { - return Tcl_GetUnicodeFromObj(objPtr, (int *)NULL); + return TclGetUnicodeFromObj(objPtr, NULL); } #endif /* TCL_NO_DEPRECATED */ /* *---------------------------------------------------------------------- @@ -639,33 +928,55 @@ * *---------------------------------------------------------------------- */ Tcl_UniChar * +TclGetUnicodeFromObj_( + Tcl_Obj *objPtr, /* The object to find the unicode string + * for. */ + int *lengthPtr) /* If non-NULL, the location where the string + * rep's unichar length should be stored. If + * NULL, no length is stored. */ +{ + UniCharString *stringPtr; + + SetStringFromAny(NULL, objPtr); + stringPtr = GET_UNICHAR_STRING(objPtr); + + if (stringPtr->hasUnicode == 0) { + FillUnicodeRep(objPtr); + stringPtr = GET_UNICHAR_STRING(objPtr); + } + + if (lengthPtr != NULL) { + *lengthPtr = stringPtr->numChars; + } + return stringPtr->unicode; +} + +#if TCL_UTF_MAX > 3 && !defined(TCL_NO_DEPRECATED) +unsigned short * Tcl_GetUnicodeFromObj( Tcl_Obj *objPtr, /* The object to find the unicode string * for. */ int *lengthPtr) /* If non-NULL, the location where the string * rep's unichar length should be stored. If * NULL, no length is stored. */ { String *stringPtr; - SetStringFromAny(NULL, objPtr); + SetUTF16StringFromAny(NULL, objPtr); stringPtr = GET_STRING(objPtr); - if (stringPtr->hasUnicode == 0) { - FillUnicodeRep(objPtr); - stringPtr = GET_STRING(objPtr); - } - if (lengthPtr != NULL) { *lengthPtr = stringPtr->numChars; } return stringPtr->unicode; } -Tcl_UniChar * +#endif + +unsigned short * TclGetUnicodeFromObj( Tcl_Obj *objPtr, /* The object to find the unicode string * for. */ size_t *lengthPtr) /* If non-NULL, the location where the string * rep's unichar length should be stored. If @@ -674,21 +985,12 @@ String *stringPtr; SetStringFromAny(NULL, objPtr); stringPtr = GET_STRING(objPtr); - if (stringPtr->hasUnicode == 0) { - FillUnicodeRep(objPtr); - stringPtr = GET_STRING(objPtr); - } - if (lengthPtr != NULL) { -#if TCL_MAJOR_VERSION > 8 *lengthPtr = stringPtr->numChars; -#else - *lengthPtr = ((size_t)(unsigned)(stringPtr->numChars + 1)) - 1; -#endif } return stringPtr->unicode; } /* @@ -707,18 +1009,66 @@ * Changes the internal rep of "objPtr" to the String type. * *---------------------------------------------------------------------- */ +#if TCL_UTF_MAX > 3 && !defined(TCL_NO_DEPRECATED) +#undef Tcl_GetRange Tcl_Obj * Tcl_GetRange( Tcl_Obj *objPtr, /* The Tcl object to find the range of. */ int first, /* First index of the range. */ int last) /* Last index of the range. */ { Tcl_Obj *newObjPtr; /* The Tcl object to find the range of. */ - String *stringPtr; + int length; + + if (first < 0) { + first = 0; + } + + /* + * Optimize the case where we're really dealing with a bytearray object + * we don't need to convert to a string to perform the substring operation. + */ + + if (TclIsPureByteArray(objPtr)) { + unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &length); + + if (last < 0 || last >= length) { + last = length - 1; + } + if (last < first) { + TclNewObj(newObjPtr); + return newObjPtr; + } + return Tcl_NewByteArrayObj(bytes + first, last - first + 1); + } + + int numChars = Tcl_NumUtfChars(objPtr->bytes, objPtr->length); + + if (last >= numChars) { + last = numChars - 1; + } + if (last < first) { + TclNewObj(newObjPtr); + return newObjPtr; + } + const char *begin = Tcl_UtfAtIndex(objPtr->bytes, first); + const char *end = Tcl_UtfAtIndex(objPtr->bytes, last + 1); + return Tcl_NewStringObj(begin, end - begin); +} +#endif + +Tcl_Obj * +TclGetRange( + Tcl_Obj *objPtr, /* The Tcl object to find the range of. */ + int first, /* First index of the range. */ + int last) /* Last index of the range. */ +{ + Tcl_Obj *newObjPtr; /* The Tcl object to find the range of. */ + UniCharString *stringPtr; int length; if (first < 0) { first = 0; } @@ -744,19 +1094,19 @@ /* * OK, need to work with the object as a string. */ SetStringFromAny(NULL, objPtr); - stringPtr = GET_STRING(objPtr); + stringPtr = GET_UNICHAR_STRING(objPtr); if (stringPtr->hasUnicode == 0) { /* * If numChars is unknown, compute it. */ if (stringPtr->numChars == -1) { - TclNumUtfChars(stringPtr->numChars, objPtr->bytes, objPtr->length); + TclNumUtfCharsM(stringPtr->numChars, objPtr->bytes, objPtr->length); } if (stringPtr->numChars == objPtr->length) { if (last < 0 || last >= stringPtr->numChars) { last = stringPtr->numChars - 1; } @@ -769,16 +1119,16 @@ /* * Since we know the char length of the result, store it. */ SetStringFromAny(NULL, newObjPtr); - stringPtr = GET_STRING(newObjPtr); + stringPtr = GET_UNICHAR_STRING(newObjPtr); stringPtr->numChars = newObjPtr->length; return newObjPtr; } FillUnicodeRep(objPtr); - stringPtr = GET_STRING(objPtr); + stringPtr = GET_UNICHAR_STRING(objPtr); } if (last < 0 || last >= stringPtr->numChars) { last = stringPtr->numChars - 1; } if (last < first) { @@ -795,11 +1145,11 @@ && ((stringPtr->unicode[last+1] & 0xFC00) == 0xDC00) && ((stringPtr->unicode[last] & 0xFC00) == 0xD800)) { ++last; } #endif - return Tcl_NewUnicodeObj(stringPtr->unicode + first, last - first + 1); + return TclNewUnicodeObj(stringPtr->unicode + first, last - first + 1); } /* *---------------------------------------------------------------------- * @@ -881,11 +1231,11 @@ * currently be shared. */ int length) /* Number of bytes desired for string * representation of object, not including * terminating null byte. */ { - String *stringPtr; + UniCharString *stringPtr; if (length < 0) { /* * Setting to a negative length is nonsense. This is probably the * result of overflowing the signed integer range. @@ -901,11 +1251,11 @@ if (objPtr->bytes && objPtr->length == length) { return; } SetStringFromAny(NULL, objPtr); - stringPtr = GET_STRING(objPtr); + stringPtr = GET_UNICHAR_STRING(objPtr); if (objPtr->bytes != NULL) { /* * Change length of an existing string rep. */ @@ -933,14 +1283,14 @@ } else { /* * Changing length of pure unicode string. */ - stringCheckLimits(length); + uniCharStringCheckLimits(length); if (length > stringPtr->maxChars) { - stringPtr = stringRealloc(stringPtr, length); - SET_STRING(objPtr, stringPtr); + stringPtr = uniCharStringRealloc(stringPtr, length); + SET_UNICHAR_STRING(objPtr, stringPtr); stringPtr->maxChars = length; } /* * Mark the new end of the unicode string @@ -986,11 +1336,11 @@ * currently be shared. */ int length) /* Number of bytes desired for string * representation of object, not including * terminating null byte. */ { - String *stringPtr; + UniCharString *stringPtr; if (length < 0) { /* * Setting to a negative length is nonsense. This is probably the * result of overflowing the signed integer range. @@ -1004,11 +1354,11 @@ if (objPtr->bytes && objPtr->length == length) { return 1; } SetStringFromAny(NULL, objPtr); - stringPtr = GET_STRING(objPtr); + stringPtr = GET_UNICHAR_STRING(objPtr); if (objPtr->bytes != NULL) { /* * Change length of an existing string rep. */ @@ -1043,19 +1393,19 @@ } else { /* * Changing length of pure unicode string. */ - if (length > STRING_MAXCHARS) { + if (length > UNICHAR_STRING_MAXCHARS) { return 0; } if (length > stringPtr->maxChars) { - stringPtr = stringAttemptRealloc(stringPtr, length); + stringPtr = uniCharStringAttemptRealloc(stringPtr, length); if (stringPtr == NULL) { return 0; } - SET_STRING(objPtr, stringPtr); + SET_UNICHAR_STRING(objPtr, stringPtr); stringPtr->maxChars = length; } /* * Mark the new end of the unicode string. @@ -1087,24 +1437,51 @@ * Memory allocated for new "String" internal rep. * *--------------------------------------------------------------------------- */ +#if !defined(TCL_NO_DEPRECATED) void Tcl_SetUnicodeObj( Tcl_Obj *objPtr, /* The object to set the string of. */ - const Tcl_UniChar *unicode, /* The unicode string used to initialize the + const unsigned short *unicode, /* The unicode string used to initialize the * object. */ int numChars) /* Number of characters in the unicode * string. */ { - if (Tcl_IsShared(objPtr)) { - Tcl_Panic("%s called with shared object", "Tcl_SetUnicodeObj"); + String *stringPtr; + + if (numChars < 0) { + numChars = 0; + + if (unicode) { + while (numChars >= 0 && unicode[numChars] != 0) { + numChars++; + } + } + stringCheckLimits(numChars); } - TclFreeInternalRep(objPtr); - SetUnicodeObj(objPtr, unicode, numChars); + + /* + * Allocate enough space for the String structure + Unicode string. + */ + + stringCheckLimits(numChars); + stringPtr = stringAlloc(numChars); + SET_STRING(objPtr, stringPtr); + objPtr->typePtr = &tclStringType; + + stringPtr->maxChars = numChars; + memcpy(stringPtr->unicode, unicode, numChars * sizeof(unsigned char)); + stringPtr->unicode[numChars] = 0; + stringPtr->numChars = numChars; + stringPtr->hasUnicode = 1; + + TclInvalidateStringRep(objPtr); + stringPtr->allocated = numChars; } +#endif static int UnicodeLength( const Tcl_UniChar *unicode) { @@ -1113,11 +1490,11 @@ if (unicode) { while (numChars >= 0 && unicode[numChars] != 0) { numChars++; } } - stringCheckLimits(numChars); + uniCharStringCheckLimits(numChars); return numChars; } static void SetUnicodeObj( @@ -1125,24 +1502,24 @@ const Tcl_UniChar *unicode, /* The unicode string used to initialize the * object. */ int numChars) /* Number of characters in the unicode * string. */ { - String *stringPtr; + UniCharString *stringPtr; if (numChars < 0) { numChars = UnicodeLength(unicode); } /* * Allocate enough space for the String structure + Unicode string. */ - stringCheckLimits(numChars); - stringPtr = stringAlloc(numChars); - SET_STRING(objPtr, stringPtr); - objPtr->typePtr = &tclStringType; + uniCharStringCheckLimits(numChars); + stringPtr = uniCharStringAlloc(numChars); + SET_UNICHAR_STRING(objPtr, stringPtr); + objPtr->typePtr = &tclUniCharStringType; stringPtr->maxChars = numChars; memcpy(stringPtr->unicode, unicode, numChars * sizeof(Tcl_UniChar)); stringPtr->unicode[numChars] = 0; stringPtr->numChars = numChars; @@ -1182,11 +1559,11 @@ * the object. */ const char *ellipsis) /* Ellipsis marker string, appended to the * object to indicate not all available bytes * at "bytes" were appended. */ { - String *stringPtr; + UniCharString *stringPtr; int toCopy = 0; int eLen = 0; if (length < 0) { length = (bytes ? strlen(bytes) : 0); @@ -1221,17 +1598,17 @@ if (Tcl_IsShared(objPtr)) { Tcl_Panic("%s called with shared object", "Tcl_AppendLimitedToObj"); } SetStringFromAny(NULL, objPtr); - stringPtr = GET_STRING(objPtr); + stringPtr = GET_UNICHAR_STRING(objPtr); /* If appended string starts with a continuation byte or a lower surrogate, * force objPtr to unicode representation. See [7f1162a867] */ if (bytes && ISCONTINUATION(bytes)) { - Tcl_GetUnicode(objPtr); - stringPtr = GET_STRING(objPtr); + TclGetUnicodeFromObj_(objPtr, NULL); + stringPtr = GET_UNICHAR_STRING(objPtr); } if (stringPtr->hasUnicode && stringPtr->numChars > 0) { AppendUtfToUnicodeRep(objPtr, bytes, toCopy); } else { AppendUtfToUtfRep(objPtr, bytes, toCopy); @@ -1239,11 +1616,11 @@ if (length <= limit) { return; } - stringPtr = GET_STRING(objPtr); + stringPtr = GET_UNICHAR_STRING(objPtr); if (stringPtr->hasUnicode && stringPtr->numChars > 0) { AppendUtfToUnicodeRep(objPtr, ellipsis, eLen); } else { AppendUtfToUtfRep(objPtr, ellipsis, eLen); } @@ -1294,16 +1671,50 @@ * *---------------------------------------------------------------------- */ void -Tcl_AppendUnicodeToObj( +TclAppendUnicodeToObj( Tcl_Obj *objPtr, /* Points to the object to append to. */ const Tcl_UniChar *unicode, /* The unicode string to append to the * object. */ int length) /* Number of chars in "unicode". */ { + UniCharString *stringPtr; + + if (Tcl_IsShared(objPtr)) { + Tcl_Panic("%s called with shared object", "Tcl_AppendUnicodeToObj"); + } + + if (length == 0) { + return; + } + + SetStringFromAny(NULL, objPtr); + stringPtr = GET_UNICHAR_STRING(objPtr); + + /* + * If objPtr has a valid Unicode rep, then append the "unicode" to the + * objPtr's Unicode rep, otherwise the UTF conversion of "unicode" to + * objPtr's string rep. + */ + + if (stringPtr->hasUnicode) { + AppendUnicodeToUnicodeRep(objPtr, unicode, length); + } else { + AppendUnicodeToUtfRep(objPtr, unicode, length); + } +} + +#if TCL_UTF_MAX > 3 && !defined(TCL_NO_DEPRECATED) +void +Tcl_AppendUnicodeToObj( + Tcl_Obj *objPtr, /* Points to the object to append to. */ + const unsigned short *unicode, /* The unicode string to append to the + * object. */ + int length) /* Number of chars in "unicode". */ +{ String *stringPtr; if (Tcl_IsShared(objPtr)) { Tcl_Panic("%s called with shared object", "Tcl_AppendUnicodeToObj"); } @@ -1312,24 +1723,18 @@ return; } SetStringFromAny(NULL, objPtr); stringPtr = GET_STRING(objPtr); - - /* - * If objPtr has a valid Unicode rep, then append the "unicode" to the - * objPtr's Unicode rep, otherwise the UTF conversion of "unicode" to - * objPtr's string rep. - */ - - if (stringPtr->hasUnicode) { - AppendUnicodeToUnicodeRep(objPtr, unicode, length); - } else { - AppendUnicodeToUtfRep(objPtr, unicode, length); - } -} - + stringPtr = stringAttemptRealloc(stringPtr, stringPtr->numChars + length); + memcpy(&stringPtr->unicode[stringPtr->numChars], unicode, length); + stringPtr->maxChars = stringPtr->allocated = stringPtr->numChars += length; + stringPtr->unicode[stringPtr->numChars] = 0; + SET_STRING(objPtr, stringPtr); +} +#endif + /* *---------------------------------------------------------------------- * * Tcl_AppendObjToObj -- * @@ -1351,11 +1756,11 @@ void Tcl_AppendObjToObj( Tcl_Obj *objPtr, /* Points to the object to append to. */ Tcl_Obj *appendObjPtr) /* Object to append. */ { - String *stringPtr; + UniCharString *stringPtr; int length, numChars, appendNumChars = -1; const char *bytes; /* * Special case: second object is standard-empty is fast case. We know @@ -1424,18 +1829,18 @@ /* * Must append as strings. */ SetStringFromAny(NULL, objPtr); - stringPtr = GET_STRING(objPtr); + stringPtr = GET_UNICHAR_STRING(objPtr); /* If appended string starts with a continuation byte or a lower surrogate, * force objPtr to unicode representation. See [7f1162a867] * This fixes append-3.4, append-3.7 and utf-1.18 testcases. */ if (ISCONTINUATION(TclGetString(appendObjPtr))) { - Tcl_GetUnicode(objPtr); - stringPtr = GET_STRING(objPtr); + TclGetUnicodeFromObj_(objPtr, NULL); + stringPtr = GET_UNICHAR_STRING(objPtr); } /* * If objPtr has a valid Unicode rep, then get a Unicode string from * appendObjPtr and append it. */ @@ -1443,13 +1848,13 @@ if (stringPtr->hasUnicode) { /* * If appendObjPtr is not of the "String" type, don't convert it. */ - if (TclHasInternalRep(appendObjPtr, &tclStringType)) { + if (TclHasInternalRep(appendObjPtr, &tclUniCharStringType)) { Tcl_UniChar *unicode = - Tcl_GetUnicodeFromObj(appendObjPtr, &numChars); + TclGetUnicodeFromObj_(appendObjPtr, &numChars); AppendUnicodeToUnicodeRep(objPtr, unicode, numChars); } else { bytes = TclGetStringFromObj(appendObjPtr, &length); AppendUtfToUnicodeRep(objPtr, bytes, length); @@ -1464,12 +1869,12 @@ */ bytes = TclGetStringFromObj(appendObjPtr, &length); numChars = stringPtr->numChars; - if ((numChars >= 0) && TclHasInternalRep(appendObjPtr, &tclStringType)) { - String *appendStringPtr = GET_STRING(appendObjPtr); + if ((numChars >= 0) && TclHasInternalRep(appendObjPtr, &tclUniCharStringType)) { + UniCharString *appendStringPtr = GET_UNICHAR_STRING(appendObjPtr); appendNumChars = appendStringPtr->numChars; } AppendUtfToUtfRep(objPtr, bytes, length); @@ -1500,11 +1905,11 @@ AppendUnicodeToUnicodeRep( Tcl_Obj *objPtr, /* Points to the object to append to. */ const Tcl_UniChar *unicode, /* String to append. */ int appendNumChars) /* Number of chars of "unicode" to append. */ { - String *stringPtr; + UniCharString *stringPtr; int numChars; if (appendNumChars < 0) { appendNumChars = UnicodeLength(unicode); } @@ -1511,11 +1916,11 @@ if (appendNumChars == 0) { return; } SetStringFromAny(NULL, objPtr); - stringPtr = GET_STRING(objPtr); + stringPtr = GET_UNICHAR_STRING(objPtr); /* * If not enough space has been allocated for the unicode rep, reallocate * the internal rep object with additional space. First try to double the * required allocation; if that fails, try a more modest increase. See the @@ -1522,11 +1927,11 @@ * "TCL STRING GROWTH ALGORITHM" comment at the top of this file for an * explanation of this growth algorithm. */ numChars = stringPtr->numChars + appendNumChars; - stringCheckLimits(numChars); + uniCharStringCheckLimits(numChars); if (numChars > stringPtr->maxChars) { int offset = -1; /* @@ -1539,11 +1944,11 @@ && unicode <= stringPtr->unicode + stringPtr->maxChars) { offset = unicode - stringPtr->unicode; } GrowUnicodeBuffer(objPtr, numChars); - stringPtr = GET_STRING(objPtr); + stringPtr = GET_UNICHAR_STRING(objPtr); /* * Relocate unicode if needed; see above. */ @@ -1589,11 +1994,11 @@ AppendUnicodeToUtfRep( Tcl_Obj *objPtr, /* Points to the object to append to. */ const Tcl_UniChar *unicode, /* String to convert to UTF. */ int numChars) /* Number of chars of "unicode" to convert. */ { - String *stringPtr = GET_STRING(objPtr); + UniCharString *stringPtr = GET_UNICHAR_STRING(objPtr); numChars = ExtendStringRepWithUnicode(objPtr, unicode, numChars); if (stringPtr->numChars != -1) { stringPtr->numChars += numChars; @@ -1622,19 +2027,19 @@ AppendUtfToUnicodeRep( Tcl_Obj *objPtr, /* Points to the object to append to. */ const char *bytes, /* String to convert to Unicode. */ int numBytes) /* Number of bytes of "bytes" to convert. */ { - String *stringPtr; + UniCharString *stringPtr; if (numBytes == 0) { return; } ExtendUnicodeRepWithString(objPtr, bytes, numBytes, -1); TclInvalidateStringRep(objPtr); - stringPtr = GET_STRING(objPtr); + stringPtr = GET_UNICHAR_STRING(objPtr); stringPtr->allocated = 0; } /* *---------------------------------------------------------------------- @@ -1658,11 +2063,11 @@ AppendUtfToUtfRep( Tcl_Obj *objPtr, /* Points to the object to append to. */ const char *bytes, /* String to append. */ int numBytes) /* Number of bytes of "bytes" to append. */ { - String *stringPtr; + UniCharString *stringPtr; int newLength, oldLength; if (numBytes == 0) { return; } @@ -1679,11 +2084,11 @@ if (numBytes > INT_MAX - oldLength) { Tcl_Panic("max size for a Tcl value (%d bytes) exceeded", INT_MAX); } newLength = numBytes + oldLength; - stringPtr = GET_STRING(objPtr); + stringPtr = GET_UNICHAR_STRING(objPtr); if (newLength > stringPtr->allocated) { int offset = -1; /* * Protect against case where unicode points into the existing @@ -2083,16 +2488,16 @@ msg = "format string ended in middle of field specifier"; errCode = "INCOMPLETE"; goto errorMsg; case 's': if (gotPrecision) { - numChars = Tcl_GetCharLength(segment); + numChars = TclGetCharLength(segment); if (precision < numChars) { if (precision < 1) { TclNewObj(segment); } else { - segment = Tcl_GetRange(segment, 0, precision - 1); + segment = TclGetRange(segment, 0, precision - 1); } numChars = precision; Tcl_IncrRefCount(segment); allocSegment = 1; } @@ -2268,11 +2673,11 @@ length++; } gotZero = 0; } if (gotZero) { - length += Tcl_GetCharLength(segment); + length += TclGetCharLength(segment); if (length < width) { segmentLimit -= width - length; } while (length < width) { Tcl_AppendToObj(segment, "0", 1); @@ -2399,11 +2804,11 @@ length++; } gotZero = 0; } if (gotZero) { - length += Tcl_GetCharLength(segment); + length += TclGetCharLength(segment); if (length < width) { segmentLimit -= width - length; } while (length < width) { Tcl_AppendToObj(segment, "0", 1); @@ -2510,11 +2915,11 @@ } goto error; } if (width>0 && numChars<0) { - numChars = Tcl_GetCharLength(segment); + numChars = TclGetCharLength(segment); } if (!gotMinus && width>0) { if (numChars < width) { limit -= width - numChars; } @@ -2871,17 +3276,17 @@ char * TclGetStringStorage( Tcl_Obj *objPtr, unsigned int *sizePtr) { - String *stringPtr; + UniCharString *stringPtr; - if (!TclHasInternalRep(objPtr, &tclStringType) || objPtr->bytes == NULL) { + if (!TclHasInternalRep(objPtr, &tclUniCharStringType) || objPtr->bytes == NULL) { return TclGetStringFromObj(objPtr, (int *)sizePtr); } - stringPtr = GET_STRING(objPtr); + stringPtr = GET_UNICHAR_STRING(objPtr); *sizePtr = stringPtr->allocated; return objPtr->bytes; } /* @@ -2921,12 +3326,12 @@ * Produce pure bytearray when possible. * Error on overflow. */ if (!binary) { - if (TclHasInternalRep(objPtr, &tclStringType)) { - String *stringPtr = GET_STRING(objPtr); + if (TclHasInternalRep(objPtr, &tclUniCharStringType)) { + UniCharString *stringPtr = GET_UNICHAR_STRING(objPtr); if (stringPtr->hasUnicode) { unichar = 1; } } } @@ -2934,11 +3339,11 @@ if (binary) { /* Result will be pure byte array. Pre-size it */ Tcl_GetByteArrayFromObj(objPtr, &length); } else if (unichar) { /* Result will be pure Tcl_UniChar array. Pre-size it. */ - Tcl_GetUnicodeFromObj(objPtr, &length); + TclGetUnicodeFromObj_(objPtr, &length); } else { /* Result will be concat of string reps. Pre-size it. */ Tcl_GetStringFromObj(objPtr, &length); } @@ -2974,11 +3379,11 @@ /* * Efficiently produce a pure Tcl_UniChar array result. */ if (!inPlace || Tcl_IsShared(objPtr)) { - objResultPtr = Tcl_NewUnicodeObj(Tcl_GetUnicode(objPtr), length); + objResultPtr = TclNewUnicodeObj(TclGetUnicodeFromObj_(objPtr, NULL), length); } else { TclInvalidateStringRep(objPtr); objResultPtr = objPtr; } @@ -2985,21 +3390,21 @@ if (0 == Tcl_AttemptSetObjLength(objResultPtr, count*length)) { if (interp) { Tcl_SetObjResult(interp, Tcl_ObjPrintf( "string size overflow: unable to alloc %" TCL_Z_MODIFIER "u bytes", - STRING_SIZE(count*length))); + UNICHAR_STRING_SIZE(count*length))); Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); } return NULL; } Tcl_SetObjLength(objResultPtr, length); while (count - done > done) { Tcl_AppendObjToObj(objResultPtr, objResultPtr); done *= 2; } - Tcl_AppendUnicodeToObj(objResultPtr, Tcl_GetUnicode(objResultPtr), + TclAppendUnicodeToObj(objResultPtr, TclGetUnicodeFromObj_(objResultPtr, NULL), (count - done) * length); } else { /* * Efficiently concatenate string reps. */ @@ -3092,19 +3497,19 @@ */ binary = 0; if (ov > objv+1 && ISCONTINUATION(TclGetString(objPtr))) { forceUniChar = 1; - } else if ((objPtr->typePtr) && (objPtr->typePtr != &tclStringType)) { + } else if ((objPtr->typePtr) && (objPtr->typePtr != &tclUniCharStringType)) { /* Prevent shimmer of non-string types. */ allowUniChar = 0; } } } else { /* assert (objPtr->typePtr != NULL) -- stork! */ binary = 0; - if (TclHasInternalRep(objPtr, &tclStringType)) { + if (TclHasInternalRep(objPtr, &tclUniCharStringType)) { /* Have a pure Unicode value; ask to preserve it */ requestUniChar = 1; } else { /* Have another type; prevent shimmer */ allowUniChar = 0; @@ -3154,11 +3559,11 @@ Tcl_Obj *objPtr = *ov++; if ((objPtr->bytes == NULL) || (objPtr->length)) { int numChars; - Tcl_GetUnicodeFromObj(objPtr, &numChars); /* PANIC? */ + TclGetUnicodeFromObj_(objPtr, &numChars); /* PANIC? */ if (numChars) { last = objc - oc; if (length == 0) { first = last; } else if (numChars > INT_MAX - length) { @@ -3304,47 +3709,47 @@ int start; objResultPtr = *objv++; objc--; /* Ugly interface! Force resize of the unicode array. */ - Tcl_GetUnicodeFromObj(objResultPtr, &start); + TclGetUnicodeFromObj_(objResultPtr, &start); Tcl_InvalidateStringRep(objResultPtr); if (0 == Tcl_AttemptSetObjLength(objResultPtr, length)) { if (interp) { Tcl_SetObjResult(interp, Tcl_ObjPrintf( "concatenation failed: unable to alloc %" TCL_Z_MODIFIER "u bytes", - STRING_SIZE(length))); + UNICHAR_STRING_SIZE(length))); Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); } return NULL; } - dst = Tcl_GetUnicode(objResultPtr) + start; + dst = TclGetUnicodeFromObj_(objResultPtr, NULL) + start; } else { Tcl_UniChar ch = 0; /* Ugly interface! No scheme to init array size. */ - objResultPtr = Tcl_NewUnicodeObj(&ch, 0); /* PANIC? */ + objResultPtr = TclNewUnicodeObj(&ch, 0); /* PANIC? */ if (0 == Tcl_AttemptSetObjLength(objResultPtr, length)) { Tcl_DecrRefCount(objResultPtr); if (interp) { Tcl_SetObjResult(interp, Tcl_ObjPrintf( "concatenation failed: unable to alloc %" TCL_Z_MODIFIER "u bytes", - STRING_SIZE(length))); + UNICHAR_STRING_SIZE(length))); Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); } return NULL; } - dst = Tcl_GetUnicode(objResultPtr); + dst = TclGetUnicodeFromObj_(objResultPtr, NULL); } while (objc--) { Tcl_Obj *objPtr = *objv++; if ((objPtr->bytes == NULL) || (objPtr->length)) { int more; - Tcl_UniChar *src = Tcl_GetUnicodeFromObj(objPtr, &more); + Tcl_UniChar *src = TclGetUnicodeFromObj_(objPtr, &more); memcpy(dst, src, more * sizeof(Tcl_UniChar)); dst += more; } } } else { @@ -3453,36 +3858,36 @@ */ s1 = (char *) Tcl_GetByteArrayFromObj(value1Ptr, &s1len); s2 = (char *) Tcl_GetByteArrayFromObj(value2Ptr, &s2len); memCmpFn = memcmp; - } else if (TclHasInternalRep(value1Ptr, &tclStringType) - && TclHasInternalRep(value2Ptr, &tclStringType)) { + } else if (TclHasInternalRep(value1Ptr, &tclUniCharStringType) + && TclHasInternalRep(value2Ptr, &tclUniCharStringType)) { /* * Do a unicode-specific comparison if both of the args are of * String type. If the char length == byte length, we can do a * memcmp. In benchmark testing this proved the most efficient * check between the unicode and string comparison operations. */ if (nocase) { - s1 = (char *) Tcl_GetUnicodeFromObj(value1Ptr, &s1len); - s2 = (char *) Tcl_GetUnicodeFromObj(value2Ptr, &s2len); - memCmpFn = (memCmpFn_t)(void *)Tcl_UniCharNcasecmp; + s1 = (char *) TclGetUnicodeFromObj_(value1Ptr, &s1len); + s2 = (char *) TclGetUnicodeFromObj_(value2Ptr, &s2len); + memCmpFn = (memCmpFn_t)(void *)TclUniCharNcasecmp; } else { - s1len = Tcl_GetCharLength(value1Ptr); - s2len = Tcl_GetCharLength(value2Ptr); + s1len = TclGetCharLength(value1Ptr); + s2len = TclGetCharLength(value2Ptr); if ((s1len == value1Ptr->length) && (value1Ptr->bytes != NULL) && (s2len == value2Ptr->length) && (value2Ptr->bytes != NULL)) { s1 = value1Ptr->bytes; s2 = value2Ptr->bytes; memCmpFn = memcmp; } else { - s1 = (char *) Tcl_GetUnicode(value1Ptr); - s2 = (char *) Tcl_GetUnicode(value2Ptr); + s1 = (char *) TclGetUnicodeFromObj_(value1Ptr, NULL); + s2 = (char *) TclGetUnicodeFromObj_(value2Ptr, NULL); if ( #if defined(WORDS_BIGENDIAN) && (TCL_UTF_MAX > 3) 1 #else checkEq @@ -3490,11 +3895,11 @@ ) { memCmpFn = memcmp; s1len *= sizeof(Tcl_UniChar); s2len *= sizeof(Tcl_UniChar); } else { - memCmpFn = (memCmpFn_t)(void *)Tcl_UniCharNcmp; + memCmpFn = (memCmpFn_t)(void *)TclUniCharNcmp; } } } } else { empty = TclCheckEmptyString(value1Ptr); @@ -3613,11 +4018,11 @@ TclStringFirst( Tcl_Obj *needle, Tcl_Obj *haystack, int start) { - int lh, ln = Tcl_GetCharLength(needle); + int lh, ln = TclGetCharLength(needle); Tcl_Obj *result; int value = -1; Tcl_UniChar *checkStr, *endStr, *uh, *un; if (start < 0) { @@ -3676,12 +4081,12 @@ * locking down in practice more firmly just what encodings produce * what supported results for the objPtr->bytes values. For now, * do only the well-defined Tcl_UniChar array search. */ - un = Tcl_GetUnicodeFromObj(needle, &ln); - uh = Tcl_GetUnicodeFromObj(haystack, &lh); + un = TclGetUnicodeFromObj_(needle, &ln); + uh = TclGetUnicodeFromObj_(haystack, &lh); if ((lh < ln) || (start > lh - ln)) { /* Don't start the loop if there cannot be a valid answer */ goto firstEnd; } endStr = uh + lh; @@ -3720,11 +4125,11 @@ TclStringLast( Tcl_Obj *needle, Tcl_Obj *haystack, int last) { - int lh, ln = Tcl_GetCharLength(needle); + int lh, ln = TclGetCharLength(needle); Tcl_Obj *result; int value = -1; Tcl_UniChar *checkStr, *uh, *un; if (ln == 0) { @@ -3759,12 +4164,12 @@ check--; } goto lastEnd; } - uh = Tcl_GetUnicodeFromObj(haystack, &lh); - un = Tcl_GetUnicodeFromObj(needle, &ln); + uh = TclGetUnicodeFromObj_(haystack, &lh); + un = TclGetUnicodeFromObj_(needle, &ln); if (last >= lh) { last = lh - 1; } if (last + 1 < ln) { @@ -3830,11 +4235,11 @@ Tcl_Obj * TclStringReverse( Tcl_Obj *objPtr, int flags) { - String *stringPtr; + UniCharString *stringPtr; Tcl_UniChar ch = 0; int inPlace = flags & TCL_STRING_IN_PLACE; #if TCL_UTF_MAX < 4 int needFlip = 0; #endif @@ -3849,28 +4254,28 @@ ReverseBytes(TclGetByteArrayFromObj(objPtr, NULL), from, numBytes); return objPtr; } SetStringFromAny(NULL, objPtr); - stringPtr = GET_STRING(objPtr); + stringPtr = GET_UNICHAR_STRING(objPtr); if (stringPtr->hasUnicode) { - Tcl_UniChar *from = Tcl_GetUnicode(objPtr); - stringPtr = GET_STRING(objPtr); + Tcl_UniChar *from = TclGetUnicodeFromObj_(objPtr, NULL); + stringPtr = GET_UNICHAR_STRING(objPtr); Tcl_UniChar *src = from + stringPtr->numChars; Tcl_UniChar *to; if (!inPlace || Tcl_IsShared(objPtr)) { /* * Create a non-empty, pure unicode value, so we can coax * Tcl_SetObjLength into growing the unicode rep buffer. */ - objPtr = Tcl_NewUnicodeObj(&ch, 1); + objPtr = TclNewUnicodeObj(&ch, 1); Tcl_SetObjLength(objPtr, stringPtr->numChars); - to = Tcl_GetUnicode(objPtr); - stringPtr = GET_STRING(objPtr); + to = TclGetUnicodeFromObj_(objPtr, NULL); + stringPtr = GET_UNICHAR_STRING(objPtr); while (--src >= from) { #if TCL_UTF_MAX < 4 ch = *src; if ((ch & 0xF800) == 0xD800) { needFlip = 1; @@ -4095,20 +4500,20 @@ */ /* The traditional implementation... */ { int numChars; - Tcl_UniChar *ustring = Tcl_GetUnicodeFromObj(objPtr, &numChars); + Tcl_UniChar *ustring = TclGetUnicodeFromObj_(objPtr, &numChars); /* TODO: Is there an in-place option worth pursuing here? */ - result = Tcl_NewUnicodeObj(ustring, first); + result = TclNewUnicodeObj(ustring, first); if (insertPtr) { Tcl_AppendObjToObj(result, insertPtr); } if (first + count < numChars) { - Tcl_AppendUnicodeToObj(result, ustring + first + count, + TclAppendUnicodeToObj(result, ustring + first + count, numChars - first - count); } return result; } @@ -4134,11 +4539,11 @@ static void FillUnicodeRep( Tcl_Obj *objPtr) /* The object in which to fill the unicode * rep. */ { - String *stringPtr = GET_STRING(objPtr); + UniCharString *stringPtr = GET_UNICHAR_STRING(objPtr); ExtendUnicodeRepWithString(objPtr, objPtr->bytes, objPtr->length, stringPtr->numChars); } @@ -4147,26 +4552,26 @@ Tcl_Obj *objPtr, const char *bytes, int numBytes, int numAppendChars) { - String *stringPtr = GET_STRING(objPtr); + UniCharString *stringPtr = GET_UNICHAR_STRING(objPtr); int needed, numOrigChars = 0; Tcl_UniChar *dst, unichar = 0; if (stringPtr->hasUnicode) { numOrigChars = stringPtr->numChars; } if (numAppendChars == -1) { - TclNumUtfChars(numAppendChars, bytes, numBytes); + TclNumUtfCharsM(numAppendChars, bytes, numBytes); } needed = numOrigChars + numAppendChars; - stringCheckLimits(needed); + uniCharStringCheckLimits(needed); if (needed > stringPtr->maxChars) { GrowUnicodeBuffer(objPtr, needed); - stringPtr = GET_STRING(objPtr); + stringPtr = GET_UNICHAR_STRING(objPtr); } stringPtr->hasUnicode = 1; if (bytes) { stringPtr->numChars = needed; @@ -4216,12 +4621,12 @@ Tcl_Obj *srcPtr, /* Object with internal rep to copy. Must have * an internal rep of type "String". */ Tcl_Obj *copyPtr) /* Object with internal rep to set. Must not * currently have an internal rep.*/ { - String *srcStringPtr = GET_STRING(srcPtr); - String *copyStringPtr = NULL; + UniCharString *srcStringPtr = GET_UNICHAR_STRING(srcPtr); + UniCharString *copyStringPtr = NULL; if (srcStringPtr->numChars == -1) { /* * The String struct in the source value holds zero useful data. Don't * bother copying it. Don't even bother allocating space in which to @@ -4237,21 +4642,21 @@ if (srcStringPtr->maxChars / 2 >= srcStringPtr->numChars) { copyMaxChars = 2 * srcStringPtr->numChars; } else { copyMaxChars = srcStringPtr->maxChars; } - copyStringPtr = stringAttemptAlloc(copyMaxChars); + copyStringPtr = uniCharStringAttemptAlloc(copyMaxChars); if (copyStringPtr == NULL) { copyMaxChars = srcStringPtr->numChars; - copyStringPtr = stringAlloc(copyMaxChars); + copyStringPtr = uniCharStringAlloc(copyMaxChars); } copyStringPtr->maxChars = copyMaxChars; memcpy(copyStringPtr->unicode, srcStringPtr->unicode, srcStringPtr->numChars * sizeof(Tcl_UniChar)); copyStringPtr->unicode[srcStringPtr->numChars] = 0; } else { - copyStringPtr = stringAlloc(0); + copyStringPtr = uniCharStringAlloc(0); copyStringPtr->maxChars = 0; copyStringPtr->unicode[0] = 0; } copyStringPtr->hasUnicode = srcStringPtr->hasUnicode; copyStringPtr->numChars = srcStringPtr->numChars; @@ -4262,12 +4667,12 @@ * source object. */ copyStringPtr->allocated = copyPtr->bytes ? copyPtr->length : 0; - SET_STRING(copyPtr, copyStringPtr); - copyPtr->typePtr = &tclStringType; + SET_UNICHAR_STRING(copyPtr, copyStringPtr); + copyPtr->typePtr = &tclUniCharStringType; } /* *---------------------------------------------------------------------- * @@ -4288,12 +4693,12 @@ static int SetStringFromAny( TCL_UNUSED(Tcl_Interp *), Tcl_Obj *objPtr) /* The object to convert. */ { - if (!TclHasInternalRep(objPtr, &tclStringType)) { - String *stringPtr = stringAlloc(0); + if (!TclHasInternalRep(objPtr, &tclUniCharStringType)) { + UniCharString *stringPtr = uniCharStringAlloc(0); /* * Convert whatever we have into an untyped value. Just A String. */ @@ -4307,12 +4712,12 @@ stringPtr->numChars = -1; stringPtr->allocated = objPtr->length; stringPtr->maxChars = 0; stringPtr->hasUnicode = 0; - SET_STRING(objPtr, stringPtr); - objPtr->typePtr = &tclStringType; + SET_UNICHAR_STRING(objPtr, stringPtr); + objPtr->typePtr = &tclUniCharStringType; } return TCL_OK; } /* @@ -4335,11 +4740,11 @@ static void UpdateStringOfString( Tcl_Obj *objPtr) /* Object with string rep to update. */ { - String *stringPtr = GET_STRING(objPtr); + UniCharString *stringPtr = GET_UNICHAR_STRING(objPtr); /* * This routine is only called when we need to generate the * string rep objPtr->bytes because it does not exist -- it is NULL. * In that circumstance, any lingering claim about the size of @@ -4367,11 +4772,11 @@ * Pre-condition: this is the "string" Tcl_ObjType. */ int i, origLength, size = 0; char *dst; - String *stringPtr = GET_STRING(objPtr); + UniCharString *stringPtr = GET_UNICHAR_STRING(objPtr); if (numChars < 0) { numChars = UnicodeLength(unicode); } @@ -4421,11 +4826,11 @@ /* *---------------------------------------------------------------------- * * FreeStringInternalRep -- * - * Deallocate the storage associated with a String data object's internal + * Deallocate the storage associated with a (UniChar)String data object's internal * representation. * * Results: * None. * Index: generic/tclStringRep.h ================================================================== --- generic/tclStringRep.h +++ generic/tclStringRep.h @@ -37,15 +37,10 @@ * track of how much memory has been used and how much has been allocated for * the Unicode and UTF string to enable growing and shrinking of the UTF and * Unicode reps of the String object with fewer mallocs. To optimize string * length and indexing operations, this structure also stores the number of * characters (same of UTF and Unicode!) once that value has been computed. - * - * Under normal configurations, what Tcl calls "Unicode" is actually UTF-16 - * restricted to the Basic Multilingual Plane (i.e. U+00000 to U+0FFFF). This - * can be officially modified by altering the definition of Tcl_UniChar in - * tcl.h, but do not do that unless you are sure what you're doing! */ typedef struct { int numChars; /* The number of chars in the string. -1 means * this value has not been calculated. >= 0 @@ -57,19 +52,19 @@ * termination char). */ int maxChars; /* Max number of chars that can fit in the * space allocated for the unicode array. */ int hasUnicode; /* Boolean determining whether the string has * a Unicode representation. */ - Tcl_UniChar unicode[TCLFLEXARRAY]; /* The array of Unicode chars. The actual size + unsigned short unicode[TCLFLEXARRAY]; /* The array of Unicode chars. The actual size * of this field depends on the 'maxChars' * field above. */ } String; #define STRING_MAXCHARS \ - (int)(((size_t)UINT_MAX - offsetof(String, unicode))/sizeof(Tcl_UniChar) - 1) + (int)(((size_t)UINT_MAX - offsetof(String, unicode))/sizeof(unsigned short) - 1) #define STRING_SIZE(numChars) \ - (offsetof(String, unicode) + sizeof(Tcl_UniChar) + ((numChars) * sizeof(Tcl_UniChar))) + (offsetof(String, unicode) + sizeof(unsigned short) + ((numChars) * sizeof(unsigned short))) #define stringCheckLimits(numChars) \ do { \ if ((numChars) < 0 || (numChars) > STRING_MAXCHARS) { \ Tcl_Panic("max length for a Tcl unicode value (%d chars) exceeded", \ STRING_MAXCHARS); \ Index: generic/tclStubInit.c ================================================================== --- generic/tclStubInit.c +++ generic/tclStubInit.c @@ -46,10 +46,12 @@ #undef Tcl_SetUnicodeObj #undef Tcl_UniCharNcasecmp #undef Tcl_UniCharCaseMatch #undef Tcl_UniCharLen #undef Tcl_UniCharNcmp +#undef Tcl_GetRange +#undef Tcl_GetUniChar #undef Tcl_DumpActiveMemory #undef Tcl_ValidateAllMemory #undef Tcl_FindHashEntry #undef Tcl_CreateHashEntry #undef Tcl_Panic @@ -74,28 +76,35 @@ #undef Tcl_UtfToUniCharDString #undef Tcl_UtfToUniChar #undef Tcl_MacOSXOpenBundleResources #undef TclWinConvertWSAError #undef TclWinConvertError +#undef Tcl_GetCharLength +#undef Tcl_UtfAtIndex + #if defined(_WIN32) || defined(__CYGWIN__) #define TclWinConvertWSAError (void (*)(DWORD))(void *)Tcl_WinConvertError #define TclWinConvertError (void (*)(DWORD))(void *)Tcl_WinConvertError #endif -#if TCL_UTF_MAX > 3 +#if TCL_UTF_MAX > 3 && defined(TCL_NO_DEPRECATED) static void uniCodePanic(void) { - Tcl_Panic("This extension uses a deprecated function, not available now: Tcl is compiled with -DTCL_UTF_MAX==%d", TCL_UTF_MAX); -} -# define Tcl_GetUnicode (Tcl_UniChar *(*)(Tcl_Obj *))(void *)uniCodePanic -# define Tcl_GetUnicodeFromObj (Tcl_UniChar *(*)(Tcl_Obj *, int *))(void *)uniCodePanic -# define Tcl_NewUnicodeObj (Tcl_Obj *(*)(const int *, int))(void *)uniCodePanic -# define Tcl_SetUnicodeObj (void(*)(Tcl_Obj *, const Tcl_UniChar *, int))(void *)uniCodePanic -# define Tcl_AppendUnicodeToObj (void(*)(Tcl_Obj *, const Tcl_UniChar *, int))(void *)uniCodePanic -# define Tcl_UniCharNcasecmp (int(*)(const Tcl_UniChar *, const Tcl_UniChar *, unsigned long))(void *)uniCodePanic -# define Tcl_UniCharCaseMatch (int(*)(const Tcl_UniChar *, const Tcl_UniChar *, int))(void *)uniCodePanic -# define Tcl_UniCharNcmp (int(*)(const Tcl_UniChar *, const Tcl_UniChar *, unsigned long))(void *)uniCodePanic + Tcl_Panic("Tcl is compiled without the the UTF16 compatibility layer (-DTCL_NO_DEPRECATED)"); +} +# define Tcl_GetUnicode (unsigned short *(*)(Tcl_Obj *))(void *)uniCodePanic +# define Tcl_GetUnicodeFromObj (unsigned short *(*)(Tcl_Obj *, int *))(void *)uniCodePanic +# define Tcl_NewUnicodeObj (Tcl_Obj *(*)(const unsigned short *, int))(void *)uniCodePanic +# define Tcl_SetUnicodeObj (void(*)(Tcl_Obj *, const unsigned short *, int))(void *)uniCodePanic +# define Tcl_AppendUnicodeToObj (void(*)(Tcl_Obj *, const unsigned short *, int))(void *)uniCodePanic +# define Tcl_UtfAtIndex (const char *(*)(const char *, int))(void *)uniCodePanic +# define Tcl_GetCharLength (int(*)(Tcl_Obj *))(void *)uniCodePanic +# define Tcl_UniCharNcmp (int(*)(const unsigned short *, const unsigned short *, unsigned long))(void *)uniCodePanic +# define Tcl_UniCharNcasecmp (int(*)(const unsigned short *, const unsigned short *, unsigned long))(void *)uniCodePanic +# define Tcl_UniCharCaseMatch (int(*)(const unsigned short *, const unsigned short *, int))(void *)uniCodePanic +# define Tcl_GetRange (Tcl_Obj *(*)(Tcl_Obj *, int, int))(void *)uniCodePanic +# define Tcl_GetUniChar (int(*)(Tcl_Obj *, int))(void *)uniCodePanic #endif #define TclUtfCharComplete UtfCharComplete #define TclUtfNext UtfNext #define TclUtfPrev UtfPrev @@ -681,12 +690,12 @@ # define TclpLocaltime_unix 0 # define TclpGmtime_unix 0 # define Tcl_SetExitProc 0 # define Tcl_SetPanicProc 0 # define Tcl_FindExecutable 0 -# define Tcl_GetUnicode 0 #if TCL_UTF_MAX < 4 +# define Tcl_GetUnicode 0 # define Tcl_AppendUnicodeToObj 0 # define Tcl_UniCharCaseMatch 0 # define Tcl_UniCharNcasecmp 0 # define Tcl_UniCharNcmp 0 #endif @@ -1948,8 +1957,13 @@ 0, /* 664 */ 0, /* 665 */ 0, /* 666 */ 0, /* 667 */ Tcl_UniCharLen, /* 668 */ + TclNumUtfChars, /* 669 */ + TclGetCharLength, /* 670 */ + TclUtfAtIndex, /* 671 */ + TclGetRange, /* 672 */ + TclGetUniChar, /* 673 */ }; /* !END!: Do not edit above this line. */ Index: generic/tclTest.c ================================================================== --- generic/tclTest.c +++ generic/tclTest.c @@ -14,14 +14,17 @@ * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. */ #undef STATIC_BUILD +#undef BUILD_tcl #ifndef USE_TCL_STUBS # define USE_TCL_STUBS #endif -#ifndef TCL_NO_DEPRECATED +#ifdef TCL_NO_DEPRECATED +# define TCL_UTF_MAX 4 +#else # define TCL_NO_DEPRECATED #endif #include "tclInt.h" #ifdef TCL_WITH_EXTERNAL_TOMMATH # include "tommath.h" @@ -6967,11 +6970,11 @@ TCL_UNUSED(void *), Tcl_Interp *interp, int objc, Tcl_Obj *const objv[]) { - size_t numBytes; + int numBytes; char *bytes; const char *result, *first; char buffer[32]; static const char tobetested[] = "A\xA0\xC0\xC1\xC2\xD0\xE0\xE8\xF2\xF7\xF8\xFE\xFF"; const char *p = tobetested; Index: generic/tclTestObj.c ================================================================== --- generic/tclTestObj.c +++ generic/tclTestObj.c @@ -11,11 +11,11 @@ * Copyright © 2005 Kevin B. Kenny. All rights reserved. * * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. */ - +#undef BUILD_tcl #ifndef USE_TCL_STUBS # define USE_TCL_STUBS #endif #include "tclInt.h" #ifdef TCL_WITH_EXTERNAL_TOMMATH @@ -1071,12 +1071,13 @@ } if (objv[2]->typePtr == NULL) { Tcl_SetObjResult(interp, Tcl_NewStringObj("none", -1)); } else { typeName = objv[2]->typePtr->name; + if (!strcmp(typeName, "utf32string")) typeName = "string"; #ifndef TCL_WIDE_INT_IS_LONG - if (!strcmp(typeName, "wideInt")) typeName = "int"; + else if (!strcmp(typeName, "wideInt")) typeName = "int"; #endif Tcl_SetObjResult(interp, Tcl_NewStringObj(typeName, -1)); } } else if (strcmp(subCmd, "refcount") == 0) { if (objc != 3) { @@ -1151,11 +1152,11 @@ TCL_UNUSED(void *), Tcl_Interp *interp, /* Current interpreter. */ int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ { - Tcl_UniChar *unicode; + unsigned short *unicode; size_t varIndex; int size, option, i; Tcl_WideInt length; #define MAX_STRINGS 11 const char *string, *strings[MAX_STRINGS+1]; @@ -1261,14 +1262,18 @@ case 5: /* length2 */ if (objc != 3) { goto wrongNumArgs; } if (varPtr[varIndex] != NULL) { - Tcl_ConvertToType(NULL, varPtr[varIndex], - Tcl_GetObjType("string")); - strPtr = (String *)varPtr[varIndex]->internalRep.twoPtrValue.ptr1; - length = (int) strPtr->allocated; + const Tcl_ObjType *objType = Tcl_GetObjType("string"); + if (objType != NULL) { + Tcl_ConvertToType(NULL, varPtr[varIndex], objType); + strPtr = (String *)varPtr[varIndex]->internalRep.twoPtrValue.ptr1; + length = (int) strPtr->allocated; + } else { + length = -1; + } } else { length = -1; } Tcl_SetWideIntObj(Tcl_GetObjResult(interp), length); break; @@ -1315,14 +1320,18 @@ case 9: /* maxchars */ if (objc != 3) { goto wrongNumArgs; } if (varPtr[varIndex] != NULL) { - Tcl_ConvertToType(NULL, varPtr[varIndex], - Tcl_GetObjType("string")); - strPtr = (String *)varPtr[varIndex]->internalRep.twoPtrValue.ptr1; - length = strPtr->maxChars; + const Tcl_ObjType *objType = Tcl_GetObjType("string"); + if (objType != NULL) { + Tcl_ConvertToType(NULL, varPtr[varIndex],objType); + strPtr = (String *)varPtr[varIndex]->internalRep.twoPtrValue.ptr1; + length = strPtr->maxChars; + } else { + length = -1; + } } else { length = -1; } Tcl_SetWideIntObj(Tcl_GetObjResult(interp), length); break; Index: generic/tclUtf.c ================================================================== --- generic/tclUtf.c +++ generic/tclUtf.c @@ -797,11 +797,11 @@ * *--------------------------------------------------------------------------- */ int -Tcl_NumUtfChars( +TclNumUtfChars( const char *src, /* The UTF-8 string to measure. */ int length) /* The length of the string in bytes, or -1 * for strlen(string). */ { Tcl_UniChar ch = 0; @@ -848,10 +848,65 @@ } } return i; } +#if (TCL_UTF_MAX > 3) && !defined(TCL_NO_DEPRECATED) +#undef Tcl_NumUtfChars +int +Tcl_NumUtfChars( + const char *src, /* The UTF-8 string to measure. */ + int length) /* The length of the string in bytes, or -1 + * for strlen(string). */ +{ + unsigned short ch = 0; + int i = 0; + + if (length < 0) { + /* string is NUL-terminated, so TclUtfToUniChar calls are safe. */ + while ((*src != '\0') && (i < INT_MAX)) { + src += Tcl_UtfToChar16(src, &ch); + i++; + } + } else { + /* Will return value between 0 and length. No overflow checks. */ + + /* Pointer to the end of string. Never read endPtr[0] */ + const char *endPtr = src + length; + /* Pointer to last byte where optimization still can be used */ + const char *optPtr = endPtr - 4; + + /* + * Optimize away the call in this loop. Justified because... + * when (src <= optPtr), (endPtr - src) >= (endPtr - optPtr) + * By initialization above (endPtr - optPtr) = TCL_UTF_MAX + * So (endPtr - src) >= TCL_UTF_MAX, and passing that to + * Tcl_UtfCharComplete we know will cause return of 1. + */ + while (src <= optPtr + /* && Tcl_UtfCharComplete(src, endPtr - src) */ ) { + src += Tcl_UtfToChar16(src, &ch); + i++; + } + /* Loop over the remaining string where call must happen */ + while (src < endPtr) { + if (Tcl_UtfCharComplete(src, endPtr - src)) { + src += Tcl_UtfToChar16(src, &ch); + } else { + /* + * src points to incomplete UTF-8 sequence + * Treat first byte as character and count it + */ + src++; + } + i++; + } + } + return i; +} +#endif + /* *--------------------------------------------------------------------------- * * Tcl_UtfFindFirst -- * @@ -1125,26 +1180,24 @@ int Tcl_UniCharAtIndex( const char *src, /* The UTF-8 string to dereference. */ int index) /* The position of the desired character. */ { - Tcl_UniChar ch = 0; + unsigned short ch = 0; int i = 0; if (index < 0) { return -1; } while (index-- > 0) { - i = TclUtfToUniChar(src, &ch); + i = Tcl_UtfToChar16(src, &ch); src += i; } -#if TCL_UTF_MAX < 4 if ((ch >= 0xD800) && (i < 3)) { /* Index points at character following high Surrogate */ return -1; } -#endif TclUtfToUCS4(src, &i); return i; } /* @@ -1163,26 +1216,52 @@ * Side effects: * None. * *--------------------------------------------------------------------------- */ + +#if TCL_UTF_MAX < 4 +# undef Tcl_UtfToUniChar +# define Tcl_UtfToUniChar Tcl_UtfToChar16 +#endif const char * -Tcl_UtfAtIndex( +TclUtfAtIndex( const char *src, /* The UTF-8 string. */ int index) /* The position of the desired character. */ { - Tcl_UniChar ch = 0; + Tcl_UniChar ch = 0; int len = 0; while (index-- > 0) { - len = TclUtfToUniChar(src, &ch); + len = (Tcl_UtfToUniChar)(src, &ch); src += len; } #if TCL_UTF_MAX < 4 if ((ch >= 0xD800) && (len < 3)) { /* Index points at character following high Surrogate */ - src += TclUtfToUniChar(src, &ch); + src += (Tcl_UtfToUniChar)(src, &ch); } #endif return src; } + +#if (TCL_UTF_MAX > 3) && !defined(TCL_NO_DEPRECATED) +#undef Tcl_UtfAtIndex +const char * +Tcl_UtfAtIndex( + const char *src, /* The UTF-8 string. */ + int index) /* The position of the desired character. */ +{ + unsigned short ch = 0; + int len = 0; + + while (index-- > 0) { + len = Tcl_UtfToChar16(src, &ch); + src += len; + } + if ((ch >= 0xD800) && (len < 3)) { + /* Index points at character following high Surrogate */ + src += Tcl_UtfToChar16(src, &ch); + } + return src; +} @@ -1189,6 +1268,9 @@ - + + +#endif + /* *--------------------------------------------------------------------------- * * Tcl_UtfBackslash -- * @@ -1847,11 +1929,11 @@ * *---------------------------------------------------------------------- */ int -Tcl_UniCharNcmp( +TclUniCharNcmp( const Tcl_UniChar *ucs, /* Unicode string to compare to uct. */ const Tcl_UniChar *uct, /* Unicode string ucs is compared to. */ unsigned long numChars) /* Number of unichars to compare. */ { #if defined(WORDS_BIGENDIAN) && (TCL_UTF_MAX > 3) @@ -1866,25 +1948,51 @@ * We can't simply call memcmp() because that is not lexically correct. */ for ( ; numChars != 0; ucs++, uct++, numChars--) { if (*ucs != *uct) { -#if TCL_UTF_MAX < 4 + return (*ucs - *uct); + } + } + return 0; +#endif /* WORDS_BIGENDIAN */ +} + +#if (TCL_UTF_MAX > 3) && !defined(TCL_NO_DEPRECATED) +int +Tcl_UniCharNcmp( + const unsigned short *ucs, /* Unicode string to compare to uct. */ + const unsigned short *uct, /* Unicode string ucs is compared to. */ + unsigned long numChars) /* Number of unichars to compare. */ +{ +#if defined(WORDS_BIGENDIAN) && (TCL_UTF_MAX > 3) + /* + * We are definitely on a big-endian machine; memcmp() is safe + */ + + return memcmp(ucs, uct, numChars*sizeof(Tcl_UniChar)); + +#else /* !WORDS_BIGENDIAN */ + /* + * We can't simply call memcmp() because that is not lexically correct. + */ + + for ( ; numChars != 0; ucs++, uct++, numChars--) { + if (*ucs != *uct) { /* special case for handling upper surrogates */ if (((*ucs & 0xFC00) == 0xD800) && ((*uct & 0xFC00) != 0xD800)) { return 1; } else if (((*uct & 0xFC00) == 0xD800)) { return -1; } -#endif return (*ucs - *uct); } } return 0; #endif /* WORDS_BIGENDIAN */ } - +#endif /* *---------------------------------------------------------------------- * * Tcl_UniCharNcasecmp -- * @@ -1900,35 +2008,55 @@ * *---------------------------------------------------------------------- */ int -Tcl_UniCharNcasecmp( +TclUniCharNcasecmp( const Tcl_UniChar *ucs, /* Unicode string to compare to uct. */ const Tcl_UniChar *uct, /* Unicode string ucs is compared to. */ unsigned long numChars) /* Number of unichars to compare. */ { for ( ; numChars != 0; numChars--, ucs++, uct++) { if (*ucs != *uct) { - Tcl_UniChar lcs = Tcl_UniCharToLower(*ucs); - Tcl_UniChar lct = Tcl_UniCharToLower(*uct); + int lcs = Tcl_UniCharToLower(*ucs); + int lct = Tcl_UniCharToLower(*uct); if (lcs != lct) { -#if TCL_UTF_MAX < 4 + return (lcs - lct); + } + } + } + return 0; +} + +#if (TCL_UTF_MAX > 3) && !defined(TCL_NO_DEPRECATED) +int +Tcl_UniCharNcasecmp( + const unsigned short *ucs, /* Unicode string to compare to uct. */ + const unsigned short *uct, /* Unicode string ucs is compared to. */ + unsigned long numChars) /* Number of unichars to compare. */ +{ + for ( ; numChars != 0; numChars--, ucs++, uct++) { + if (*ucs != *uct) { + unsigned short lcs = Tcl_UniCharToLower(*ucs); + unsigned short lct = Tcl_UniCharToLower(*uct); + + if (lcs != lct) { /* special case for handling upper surrogates */ if (((lcs & 0xFC00) == 0xD800) && ((lct & 0xFC00) != 0xD800)) { return 1; } else if (((lct & 0xFC00) == 0xD800)) { return -1; } -#endif return (lcs - lct); } } } return 0; } +#endif + /* *---------------------------------------------------------------------- * * Tcl_UniCharIsAlnum -- @@ -2288,18 +2416,186 @@ * *---------------------------------------------------------------------- */ int -Tcl_UniCharCaseMatch( +TclUniCharCaseMatch( const Tcl_UniChar *uniStr, /* Unicode String. */ const Tcl_UniChar *uniPattern, /* Pattern, which may contain special * characters. */ int nocase) /* 0 for case sensitive, 1 for insensitive */ { - Tcl_UniChar ch1 = 0, p; + int ch1 = 0, p; + + while (1) { + p = *uniPattern; + + /* + * See if we're at the end of both the pattern and the string. If so, + * we succeeded. If we're at the end of the pattern but not at the end + * of the string, we failed. + */ + + if (p == 0) { + return (*uniStr == 0); + } + if ((*uniStr == 0) && (p != '*')) { + return 0; + } + + /* + * Check for a "*" as the next pattern character. It matches any + * substring. We handle this by skipping all the characters up to the + * next matching one in the pattern, and then calling ourselves + * recursively for each postfix of string, until either we match or we + * reach the end of the string. + */ + + if (p == '*') { + /* + * Skip all successive *'s in the pattern + */ + + while (*(++uniPattern) == '*') { + /* empty body */ + } + p = *uniPattern; + if (p == 0) { + return 1; + } + if (nocase) { + p = Tcl_UniCharToLower(p); + } + while (1) { + /* + * Optimization for matching - cruise through the string + * quickly if the next char in the pattern isn't a special + * character + */ + + if ((p != '[') && (p != '?') && (p != '\\')) { + if (nocase) { + while (*uniStr && (p != *uniStr) + && (p != Tcl_UniCharToLower(*uniStr))) { + uniStr++; + } + } else { + while (*uniStr && (p != *uniStr)) { + uniStr++; + } + } + } + if (TclUniCharCaseMatch(uniStr, uniPattern, nocase)) { + return 1; + } + if (*uniStr == 0) { + return 0; + } + uniStr++; + } + } + + /* + * Check for a "?" as the next pattern character. It matches any + * single character. + */ + + if (p == '?') { + uniPattern++; + uniStr++; + continue; + } + + /* + * Check for a "[" as the next pattern character. It is followed by a + * list of characters that are acceptable, or by a range (two + * characters separated by "-"). + */ + + if (p == '[') { + int startChar, endChar; + + uniPattern++; + ch1 = (nocase ? Tcl_UniCharToLower(*uniStr) : *uniStr); + uniStr++; + while (1) { + if ((*uniPattern == ']') || (*uniPattern == 0)) { + return 0; + } + startChar = (nocase ? Tcl_UniCharToLower(*uniPattern) + : *uniPattern); + uniPattern++; + if (*uniPattern == '-') { + uniPattern++; + if (*uniPattern == 0) { + return 0; + } + endChar = (nocase ? Tcl_UniCharToLower(*uniPattern) + : *uniPattern); + uniPattern++; + if (((startChar <= ch1) && (ch1 <= endChar)) + || ((endChar <= ch1) && (ch1 <= startChar))) { + /* + * Matches ranges of form [a-z] or [z-a]. + */ + break; + } + } else if (startChar == ch1) { + break; + } + } + while (*uniPattern != ']') { + if (*uniPattern == 0) { + uniPattern--; + break; + } + uniPattern++; + } + uniPattern++; + continue; + } + + /* + * If the next pattern character is '\', just strip off the '\' so we + * do exact matching on the character that follows. + */ + + if (p == '\\') { + if (*(++uniPattern) == '\0') { + return 0; + } + } + + /* + * There's no special character. Just make sure that the next bytes of + * each string match. + */ + + if (nocase) { + if (Tcl_UniCharToLower(*uniStr) != + Tcl_UniCharToLower(*uniPattern)) { + return 0; + } + } else if (*uniStr != *uniPattern) { + return 0; + } + uniStr++; + uniPattern++; + } +} + +#if (TCL_UTF_MAX > 3) && !defined(TCL_NO_DEPRECATED) +int +Tcl_UniCharCaseMatch( + const unsigned short *uniStr, /* Unicode String. */ + const unsigned short *uniPattern, + /* Pattern, which may contain special + * characters. */ + int nocase) /* 0 for case sensitive, 1 for insensitive */ +{ + unsigned short ch1 = 0, p; while (1) { p = *uniPattern; /* @@ -2383,11 +2679,11 @@ * list of characters that are acceptable, or by a range (two * characters separated by "-"). */ if (p == '[') { - Tcl_UniChar startChar, endChar; + unsigned short startChar, endChar; uniPattern++; ch1 = (nocase ? Tcl_UniCharToLower(*uniStr) : *uniStr); uniStr++; while (1) { @@ -2453,11 +2749,13 @@ } uniStr++; uniPattern++; } } +#endif + /* *---------------------------------------------------------------------- * * TclUniCharMatch -- * @@ -2678,11 +2976,11 @@ TclUtfToUCS4( const char *src, /* The UTF-8 string. */ int *ucs4Ptr) /* Filled with the UCS4 codepoint represented * by the UTF-8 string. */ { - /* Make use of the #undef Tcl_UtfToUniChar above, which already handles UCS4. */ +# undef Tcl_UtfToUniChar return Tcl_UtfToUniChar(src, ucs4Ptr); } int TclUniCharToUCS4( Index: generic/tclUtil.c ================================================================== --- generic/tclUtil.c +++ generic/tclUtil.c @@ -2589,15 +2589,15 @@ * XXX: Currently doesn't take advantage of exact-ness that * XXX: TclReToGlob tells us about trivial = nocase ? 0 : TclMatchIsTrivial(TclGetString(ptnObj)); */ - if (TclHasInternalRep(strObj, &tclStringType) || (strObj->typePtr == NULL)) { + if (TclHasInternalRep(strObj, &tclUniCharStringType) || (strObj->typePtr == NULL)) { Tcl_UniChar *udata, *uptn; - udata = Tcl_GetUnicodeFromObj(strObj, &length); - uptn = Tcl_GetUnicodeFromObj(ptnObj, &plen); + udata = TclGetUnicodeFromObj_(strObj, &length); + uptn = TclGetUnicodeFromObj_(ptnObj, &plen); match = TclUniCharMatch(udata, length, uptn, plen, flags); } else if (TclIsPureByteArray(strObj) && TclIsPureByteArray(ptnObj) && !flags) { unsigned char *data, *ptn; Index: tests/obj.test ================================================================== --- tests/obj.test +++ tests/obj.test @@ -16,16 +16,18 @@ namespace import -force ::tcltest::* } ::tcltest::loadTestedCommands catch [list package require -exact tcl::test [info patchlevel]] + +package require tcltests testConstraint testobj [llength [info commands testobj]] testConstraint longIs32bit [expr {$tcl_platform(wordSize) == 4}] testConstraint wideIs64bit [expr {wide(0x8000000000000000) < 0}] -test obj-1.1 {Tcl_AppendAllObjTypes, and InitTypeTable, Tcl_RegisterObjType} testobj { +test obj-1.1 {Tcl_AppendAllObjTypes, and InitTypeTable, Tcl_RegisterObjType} {testobj deprecated} { set r 1 foreach {t} { bytearray bytecode cmdName Index: tests/string.test ================================================================== --- tests/string.test +++ tests/string.test @@ -420,11 +420,11 @@ set s hello regexp ll $s m # Representation checks are canaries run {list [representationpoke $s] [representationpoke $m] \ [string first $m $s]} -} -result {{string 1} {string 0} 2} +} -match glob -result {{*string 1} {*string 0} 2} test string-4.17.$noComp {string first, corner case} -body { run {string first a aaa 4294967295} } -result {-1} test string-4.18.$noComp {string first, corner case} -body { run {string first a aaa -1} Index: tests/stringObj.test ================================================================== --- tests/stringObj.test +++ tests/stringObj.test @@ -23,12 +23,13 @@ testConstraint testobj [llength [info commands testobj]] testConstraint testbytestring [llength [info commands testbytestring]] testConstraint testdstring [llength [info commands testdstring]] testConstraint tip389 [expr {[string length \U010000] == 2}] - -test stringObj-1.1 {string type registration} testobj { +testConstraint utf32 [expr {[string length [format %c 0x10000]] == 1}] + +test stringObj-1.1 {string type registration} {testobj deprecated} { set t [testobj types] set first [string first "string" $t] set result [expr {$first >= 0}] } 1 @@ -55,31 +56,31 @@ lappend result [teststringobj set 1 foo] ;# makes existing obj a string lappend result [testobj type 1] lappend result [testobj refcount 1] } {{} 512 foo string 2} -test stringObj-4.1 {Tcl_SetObjLength procedure, string gets shorter} testobj { +test stringObj-4.1 {Tcl_SetObjLength procedure, string gets shorter} {testobj utf32 deprecated} { testobj freeallvars teststringobj set 1 test teststringobj setlength 1 3 list [teststringobj length 1] [teststringobj length2 1] \ [teststringobj get 1] -} {3 4 tes} -test stringObj-4.2 {Tcl_SetObjLength procedure, string gets longer} testobj { +} {3 3 tes} +test stringObj-4.2 {Tcl_SetObjLength procedure, string gets longer} {testobj deprecated} { testobj freeallvars teststringobj set 1 abcdef teststringobj setlength 1 10 list [teststringobj length 1] [teststringobj length2 1] } {10 10} -test stringObj-4.3 {Tcl_SetObjLength procedure, string gets longer} testobj { +test stringObj-4.3 {Tcl_SetObjLength procedure, string gets longer} {testobj utf32 deprecated} { testobj freeallvars teststringobj set 1 abcdef teststringobj append 1 xyzq -1 list [teststringobj length 1] [teststringobj length2 1] \ [teststringobj get 1] -} {10 20 abcdefxyzq} -test stringObj-4.4 {Tcl_SetObjLength procedure, "expty string", length 0} testobj { +} {10 10 abcdefxyzq} +test stringObj-4.4 {Tcl_SetObjLength procedure, "expty string", length 0} {testobj deprecated} { testobj freeallvars testobj newobj 1 teststringobj setlength 1 0 list [teststringobj length2 1] [teststringobj get 1] } {0 {}} @@ -95,11 +96,11 @@ teststringobj set 1 {x y } teststringobj append 1 bbCCddEE 4 teststringobj append 1 123 -1 teststringobj get 1 } {x y bbCC123} -test stringObj-5.3 {Tcl_AppendToObj procedure, reallocating space} testobj { +test stringObj-5.3 {Tcl_AppendToObj procedure, reallocating space} {testobj utf32 deprecated} { testobj freeallvars teststringobj set 1 xyz teststringobj setlength 1 15 teststringobj setlength 1 2 set result {} @@ -107,11 +108,11 @@ lappend result [teststringobj length 1] [teststringobj length2 1] teststringobj setlength 1 10 teststringobj append 1 abcdef -1 lappend result [teststringobj length 1] [teststringobj length2 1] \ [teststringobj get 1] -} {15 15 16 32 xy12345678abcdef} +} {15 15 16 16 xy12345678abcdef} test stringObj-6.1 {Tcl_AppendStringsToObj procedure, type conversion} testobj { testobj freeallvars teststringobj set2 1 [list a b] teststringobj appendstrings 1 xyz { 1234 } foo @@ -133,35 +134,35 @@ testobj freeallvars teststringobj set 1 abc teststringobj appendstrings 1 { 123 } abcdefg list [teststringobj length 1] [teststringobj get 1] } {15 {abc 123 abcdefg}} -test stringObj-6.5 {Tcl_AppendStringsToObj procedure, don't double space if initial string empty} testobj { +test stringObj-6.5 {Tcl_AppendStringsToObj procedure, don't double space if initial string empty} {testobj utf32 deprecated} { testobj freeallvars testobj newobj 1 teststringobj appendstrings 1 123 abcdefg list [teststringobj length 1] [teststringobj length2 1] [teststringobj get 1] -} {10 20 123abcdefg} -test stringObj-6.6 {Tcl_AppendStringsToObj procedure, space reallocation} testobj { +} {10 10 123abcdefg} +test stringObj-6.6 {Tcl_AppendStringsToObj procedure, space reallocation} {testobj deprecated} { testobj freeallvars teststringobj set 1 abc teststringobj setlength 1 10 teststringobj setlength 1 2 teststringobj appendstrings 1 34567890 list [teststringobj length 1] [teststringobj length2 1] \ [teststringobj get 1] } {10 10 ab34567890} -test stringObj-6.7 {Tcl_AppendStringsToObj procedure, space reallocation} testobj { +test stringObj-6.7 {Tcl_AppendStringsToObj procedure, space reallocation} {testobj utf32 deprecated} { testobj freeallvars teststringobj set 1 abc teststringobj setlength 1 10 teststringobj setlength 1 2 teststringobj appendstrings 1 34567890x list [teststringobj length 1] [teststringobj length2 1] \ [teststringobj get 1] -} {11 22 ab34567890x} -test stringObj-6.8 {Tcl_AppendStringsToObj procedure, object totally empty} testobj { +} {11 11 ab34567890x} +test stringObj-6.8 {Tcl_AppendStringsToObj procedure, object totally empty} {testobj deprecated} { testobj freeallvars testobj newobj 1 teststringobj appendstrings 1 {} list [teststringobj length2 1] [teststringobj get 1] } {0 {}} @@ -170,18 +171,18 @@ teststringobj set2 1 [string replace abc 1 1 d] teststringobj appendstrings 1 foo bar soom teststringobj get 1 } adcfoobarsoom -test stringObj-7.1 {SetStringFromAny procedure} testobj { +test stringObj-7.1 {SetStringFromAny procedure} {testobj utf32 deprecated} { testobj freeallvars teststringobj set2 1 [list a b] teststringobj append 1 x -1 list [teststringobj length 1] [teststringobj length2 1] \ [teststringobj get 1] -} {4 8 {a bx}} -test stringObj-7.2 {SetStringFromAny procedure, null object} testobj { +} {4 4 {a bx}} +test stringObj-7.2 {SetStringFromAny procedure, null object} {testobj deprecated} { testobj freeallvars testobj newobj 1 teststringobj appendstrings 1 {} list [teststringobj length 1] [teststringobj length2 1] \ [teststringobj get 1] @@ -195,20 +196,20 @@ set x "abcdef" list [string length $x] [testobj objtype $x] \ [string length $x] [testobj objtype $x] } {6 string 6 string} -test stringObj-8.1 {DupStringInternalRep procedure} testobj { +test stringObj-8.1 {DupStringInternalRep procedure} {testobj utf32 deprecated} { testobj freeallvars teststringobj set 1 {} teststringobj append 1 abcde -1 testobj duplicate 1 2 list [teststringobj length 1] [teststringobj length2 1] \ [teststringobj maxchars 1] [teststringobj get 1] \ [teststringobj length 2] [teststringobj length2 2] \ [teststringobj maxchars 2] [teststringobj get 2] -} {5 10 0 abcde 5 5 0 abcde} +} {5 5 5 abcde 5 5 5 abcde} test stringObj-8.2 {DupUnicodeInternalRep, mixed width chars} testobj { set x abc\xEF\xBF\xAEghi string length $x set y $x list [testobj objtype $x] [testobj objtype $y] [append x "\xAE\xBF\xEF"] \ Index: tests/utf.test ================================================================== --- tests/utf.test +++ tests/utf.test @@ -1228,11 +1228,11 @@ } -result {1 4} test utf-20.1 {TclUniCharNcmp} utf32 { string compare [string range [format %c 0xFFFF] 0 0] [string range [format %c 0x10000] 0 0] } -1 -test utf-20.2 {[4c591fa487] TclUniCharNcmp/TclUtfNcmp} { +test utf-20.2 {[4c591fa487] TclUniCharNcmp/TclUtfNcmp} utf32 { set one [format %c 0xFFFF] set two [format %c 0x10000] set first [string compare $one $two] string range $one 0 0 string range $two 0 0 Index: win/makefile.vc ================================================================== --- win/makefile.vc +++ win/makefile.vc @@ -50,11 +50,11 @@ # SDK (not expressly needed), run setenv.bat after # vcvars32.bat according to the instructions for it. This can also # turn on the 64-bit compiler, if your SDK has it. # # Basic macros and options usable on the commandline (see rules.vc for more info): -# OPTS=msvcrt,noembed,nothreads,pdbs,profile,static,symbols,thrdalloc,time64bit,unchecked,utfmax,none +# OPTS=msvcrt,noembed,nothreads,pdbs,profile,static,symbols,thrdalloc,time64bit,unchecked,utf16,none # Sets special options for the core. The default is for none. # Any combination of the above may be used (comma separated). # 'none' will over-ride everything to nothing. # # noembed = Without this option, the Tcl core library scripts @@ -78,11 +78,11 @@ # time64bit = Forces a build using 64-bit time_t for 32-bit build # (CRT library should support this). # unchecked = Allows a symbols build to not use the debug # enabled runtime (msvcrt.dll not msvcrtd.dll # or libcmt.lib not libcmtd.lib). -# utfmax = Forces a build using UTF-32 representation internally. +# utf16 = Forces a build using UTF-16 representation internally. # # STATS=compdbg,memdbg,none # Sets optional memory and bytecode compiler debugging code added # to the core. The default is for none. Any combination of the # above may be used (comma separated). 'none' will over-ride Index: win/rules.vc ================================================================== --- win/rules.vc +++ win/rules.vc @@ -814,12 +814,11 @@ # USE_STUBS - 1 -> compile to use stubs interfaces, 0 -> direct linking # CONFIG_CHECK - 1 -> check current build configuration against Tcl # configuration (ignored for Tcl itself) # _USE_64BIT_TIME_T - forces a build using 64-bit time_t for 32-bit build # (CRT library should support this, not needed for Tcl 9.x) -# TCL_UTF_MAX=4 - forces a build allowing 4-byte UTF-8 sequences internally. -# (Not needed for Tcl 9.x) +# TCL_UTF_MAX=3 - forces a build using UTF-16 internally (not recommended). # Further, LINKERFLAGS are modified based on above. # Default values for all the above STATIC_BUILD = 0 TCL_THREADS = 1 @@ -882,13 +881,13 @@ !if [nmakehlp -f $(OPTS) "time64bit"] !message *** Force 64-bit time_t _USE_64BIT_TIME_T = 1 !endif -!if [nmakehlp -f $(OPTS) "utfmax"] -!message *** Force allowing 4-byte UTF-8 sequences internally -TCL_UTF_MAX = 4 +!if [nmakehlp -f $(OPTS) "utf16"] +!message *** Force UTF-16 internally +TCL_UTF_MAX = 3 !endif !endif # Yes, it's weird that the "symbols" option controls DEBUG and # the "pdbs" option controls SYMBOLS. That's historical. @@ -1421,17 +1420,17 @@ !if "$(TCL_MAJOR_VERSION)" == "8" !if "$(_USE_64BIT_TIME_T)" == "1" OPTDEFINES = $(OPTDEFINES) /D_USE_64BIT_TIME_T=1 !endif -!if "$(TCL_UTF_MAX)" == "4" -OPTDEFINES = $(OPTDEFINES) /DTCL_UTF_MAX=4 -!endif # _ATL_XP_TARGETING - Newer SDK's need this to build for XP COMPILERFLAGS = /D_ATL_XP_TARGETING !endif +!if "$(TCL_UTF_MAX)" == "3" +OPTDEFINES = $(OPTDEFINES) /DTCL_UTF_MAX=3 +!endif # Like the TEA system only set this non empty for non-Tk extensions # Note: some extensions use PACKAGE_NAME and others use PACKAGE_TCLNAME # so we pass both !if !$(DOING_TCL) && !$(DOING_TK)