Tcl Source Code

Check-in [d224d38a6d]
Login
Bounty program for improvements to Tcl and certain Tcl packages.
Tcl 2019 Conference, Houston/TX, US, Nov 4-8
Send your abstracts to [email protected]
or submit via the online form by Sep 9.

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Somewhat simplified implementation of TIP #389, in which the "string length" if characters > U+FFFF is considered to be 2, not 1.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | tip-389
Files: files | file ages | folders
SHA3-256: d224d38a6d6be041c6cfe6d4699f6070cfe230411e0e2bd147ef6cb8a7f9d994
User & Date: jan.nijtmans 2017-11-07 12:15:30
Context
2017-11-09
16:07
merge core-8-branch check-in: 05952d6309 user: jan.nijtmans tags: tip-389
2017-11-07
12:15
Somewhat simplified implementation of TIP #389, in which the "string length" if characters > U+FFFF ... check-in: d224d38a6d user: jan.nijtmans tags: tip-389
2017-11-06
14:46
Fix version number of Windows Help file (not sure if it's actually used, but better safe than sorry) check-in: 8fcf451885 user: jan.nijtmans tags: core-8-branch
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to doc/StringObj.3.

33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
.sp
Tcl_UniChar *
\fBTcl_GetUnicodeFromObj\fR(\fIobjPtr, lengthPtr\fR)
.sp
Tcl_UniChar *
\fBTcl_GetUnicode\fR(\fIobjPtr\fR)
.sp
Tcl_UniChar
\fBTcl_GetUniChar\fR(\fIobjPtr, index\fR)
.sp
int
\fBTcl_GetCharLength\fR(\fIobjPtr\fR)
.sp
Tcl_Obj *
\fBTcl_GetRange\fR(\fIobjPtr, first, last\fR)






|







33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
.sp
Tcl_UniChar *
\fBTcl_GetUnicodeFromObj\fR(\fIobjPtr, lengthPtr\fR)
.sp
Tcl_UniChar *
\fBTcl_GetUnicode\fR(\fIobjPtr\fR)
.sp
int
\fBTcl_GetUniChar\fR(\fIobjPtr, index\fR)
.sp
int
\fBTcl_GetCharLength\fR(\fIobjPtr\fR)
.sp
Tcl_Obj *
\fBTcl_GetRange\fR(\fIobjPtr, first, last\fR)

Changes to doc/ToUpper.3.

9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
.BS
.SH NAME
Tcl_UniCharToUpper, Tcl_UniCharToLower, Tcl_UniCharToTitle, Tcl_UtfToUpper, Tcl_UtfToLower, Tcl_UtfToTitle \- routines for manipulating the case of Unicode characters and UTF-8 strings
.SH SYNOPSIS
.nf
\fB#include <tcl.h>\fR
.sp
Tcl_UniChar
\fBTcl_UniCharToUpper\fR(\fIch\fR)
.sp
Tcl_UniChar
\fBTcl_UniCharToLower\fR(\fIch\fR)
.sp
Tcl_UniChar
\fBTcl_UniCharToTitle\fR(\fIch\fR)






|







9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
.BS
.SH NAME
Tcl_UniCharToUpper, Tcl_UniCharToLower, Tcl_UniCharToTitle, Tcl_UtfToUpper, Tcl_UtfToLower, Tcl_UtfToTitle \- routines for manipulating the case of Unicode characters and UTF-8 strings
.SH SYNOPSIS
.nf
\fB#include <tcl.h>\fR
.sp
int
\fBTcl_UniCharToUpper\fR(\fIch\fR)
.sp
Tcl_UniChar
\fBTcl_UniCharToLower\fR(\fIch\fR)
.sp
Tcl_UniChar
\fBTcl_UniCharToTitle\fR(\fIch\fR)

Changes to doc/Utf.3.

59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
.sp
const char *
\fBTcl_UtfNext\fR(\fIsrc\fR)
.sp
const char *
\fBTcl_UtfPrev\fR(\fIsrc, start\fR)
.sp
Tcl_UniChar
\fBTcl_UniCharAtIndex\fR(\fIsrc, index\fR)
.sp
const char *
\fBTcl_UtfAtIndex\fR(\fIsrc, index\fR)
.sp
int
\fBTcl_UtfBackslash\fR(\fIsrc, readPtr, dst\fR)






|







59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
.sp
const char *
\fBTcl_UtfNext\fR(\fIsrc\fR)
.sp
const char *
\fBTcl_UtfPrev\fR(\fIsrc, start\fR)
.sp
int
\fBTcl_UniCharAtIndex\fR(\fIsrc, index\fR)
.sp
const char *
\fBTcl_UtfAtIndex\fR(\fIsrc, index\fR)
.sp
int
\fBTcl_UtfBackslash\fR(\fIsrc, readPtr, dst\fR)

Changes to generic/tcl.decls.

1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
....
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
    void Tcl_ThreadAlert(Tcl_ThreadId threadId)
}
declare 319 {
    void Tcl_ThreadQueueEvent(Tcl_ThreadId threadId, Tcl_Event *evPtr,
	    Tcl_QueuePosition position)
}
declare 320 {
    Tcl_UniChar Tcl_UniCharAtIndex(const char *src, int index)
}
declare 321 {
    Tcl_UniChar Tcl_UniCharToLower(int ch)
}
declare 322 {
    Tcl_UniChar Tcl_UniCharToTitle(int ch)
}
declare 323 {
    Tcl_UniChar Tcl_UniCharToUpper(int ch)
}
declare 324 {
    int Tcl_UniCharToUtf(int ch, char *buf)
}
declare 325 {
    CONST84_RETURN char *Tcl_UtfAtIndex(const char *src, int index)
}
................................................................................
    void Tcl_SetUnicodeObj(Tcl_Obj *objPtr, const Tcl_UniChar *unicode,
	    int numChars)
}
declare 380 {
    int Tcl_GetCharLength(Tcl_Obj *objPtr)
}
declare 381 {
    Tcl_UniChar Tcl_GetUniChar(Tcl_Obj *objPtr, int index)
}
declare 382 {
    Tcl_UniChar *Tcl_GetUnicode(Tcl_Obj *objPtr)
}
declare 383 {
    Tcl_Obj *Tcl_GetRange(Tcl_Obj *objPtr, int first, int last)
}






|


|


|


|







 







|







1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
....
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
    void Tcl_ThreadAlert(Tcl_ThreadId threadId)
}
declare 319 {
    void Tcl_ThreadQueueEvent(Tcl_ThreadId threadId, Tcl_Event *evPtr,
	    Tcl_QueuePosition position)
}
declare 320 {
    int Tcl_UniCharAtIndex(const char *src, int index)
}
declare 321 {
    int Tcl_UniCharToLower(int ch)
}
declare 322 {
    int Tcl_UniCharToTitle(int ch)
}
declare 323 {
    int Tcl_UniCharToUpper(int ch)
}
declare 324 {
    int Tcl_UniCharToUtf(int ch, char *buf)
}
declare 325 {
    CONST84_RETURN char *Tcl_UtfAtIndex(const char *src, int index)
}
................................................................................
    void Tcl_SetUnicodeObj(Tcl_Obj *objPtr, const Tcl_UniChar *unicode,
	    int numChars)
}
declare 380 {
    int Tcl_GetCharLength(Tcl_Obj *objPtr)
}
declare 381 {
    int Tcl_GetUniChar(Tcl_Obj *objPtr, int index)
}
declare 382 {
    Tcl_UniChar *Tcl_GetUnicode(Tcl_Obj *objPtr)
}
declare 383 {
    Tcl_Obj *Tcl_GetRange(Tcl_Obj *objPtr, int first, int last)
}

Changes to generic/tcl.h.

2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
 * 4, then Tcl_UniChar must be 2-bytes in size (UCS-2) (the default). If 6,
 * then Tcl_UniChar must be 4-bytes in size (UCS-4). At this time UCS-2 mode
 * is the default and recommended mode. UCS-4 is experimental and not
 * recommended. It works for the core, but most extensions expect UCS-2.
 */

#ifndef TCL_UTF_MAX
#define TCL_UTF_MAX		3
#endif

/*
 * This represents a Unicode character. Any changes to this should also be
 * reflected in regcustom.h.
 */







|







2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
 * 4, then Tcl_UniChar must be 2-bytes in size (UCS-2) (the default). If 6,
 * then Tcl_UniChar must be 4-bytes in size (UCS-4). At this time UCS-2 mode
 * is the default and recommended mode. UCS-4 is experimental and not
 * recommended. It works for the core, but most extensions expect UCS-2.
 */

#ifndef TCL_UTF_MAX
#define TCL_UTF_MAX		4
#endif

/*
 * This represents a Unicode character. Any changes to this should also be
 * reflected in regcustom.h.
 */

Changes to generic/tclCmdMZ.c.

305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
....
1213
1214
1215
1216
1217
1218
1219






1220
1221
1222
1223
1224
1225
1226
....
1810
1811
1812
1813
1814
1815
1816

1817







1818
1819
1820
1821
1822
1823
1824
1825
	 * start of the string unless the previous character is a newline.
	 */

	if (offset == 0) {
	    eflags = 0;
	} else if (offset > stringLength) {
	    eflags = TCL_REG_NOTBOL;
	} else if (Tcl_GetUniChar(objPtr, offset-1) == (Tcl_UniChar)'\n') {
	    eflags = 0;
	} else {
	    eflags = TCL_REG_NOTBOL;
	}

	match = Tcl_RegExpExecObj(interp, regExpr, objPtr, offset,
		numMatchesSaved, eflags);
................................................................................
	 * megabyte range!) - DKF
	 */

	Tcl_InitHashTable(&charReuseTable, TCL_ONE_WORD_KEYS);

	for ( ; stringPtr < end; stringPtr += len) {
	    len = TclUtfToUniChar(stringPtr, &ch);







	    /*
	     * Assume Tcl_UniChar is an integral type...
	     */

	    hPtr = Tcl_CreateHashEntry(&charReuseTable, INT2PTR((int) ch),
		    &isNew);
................................................................................
	    if (strict) {
		result = 0;
	    }
	    goto str_is_done;
	}
	end = string1 + length1;
	for (; string1 < end; string1 += length2, failat++) {

	    length2 = TclUtfToUniChar(string1, &ch);







	    if (!chcomp(ch)) {
		result = 0;
		break;
	    }
	}
    }

    /*






|







 







>
>
>
>
>
>







 







>

>
>
>
>
>
>
>
|







305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
....
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
....
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
	 * start of the string unless the previous character is a newline.
	 */

	if (offset == 0) {
	    eflags = 0;
	} else if (offset > stringLength) {
	    eflags = TCL_REG_NOTBOL;
	} else if (Tcl_GetUniChar(objPtr, offset-1) == '\n') {
	    eflags = 0;
	} else {
	    eflags = TCL_REG_NOTBOL;
	}

	match = Tcl_RegExpExecObj(interp, regExpr, objPtr, offset,
		numMatchesSaved, eflags);
................................................................................
	 * megabyte range!) - DKF
	 */

	Tcl_InitHashTable(&charReuseTable, TCL_ONE_WORD_KEYS);

	for ( ; stringPtr < end; stringPtr += len) {
	    len = TclUtfToUniChar(stringPtr, &ch);

#if TCL_UTF_MAX == 4
	    if (!len) {
		continue;
	    }
#endif

	    /*
	     * Assume Tcl_UniChar is an integral type...
	     */

	    hPtr = Tcl_CreateHashEntry(&charReuseTable, INT2PTR((int) ch),
		    &isNew);
................................................................................
	    if (strict) {
		result = 0;
	    }
	    goto str_is_done;
	}
	end = string1 + length1;
	for (; string1 < end; string1 += length2, failat++) {
	    int fullchar;
	    length2 = TclUtfToUniChar(string1, &ch);
	    fullchar = ch;
#if TCL_UTF_MAX == 4
	    if (!length2) {
	    	length2 = TclUtfToUniChar(string1, &ch);
	    	fullchar = (((fullchar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000;
	    }
#endif
	    if (!chcomp(fullchar)) {
		result = 0;
		break;
	    }
	}
    }

    /*

Changes to generic/tclDecls.h.

955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
....
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
....
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
....
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
				int flags);
/* 318 */
EXTERN void		Tcl_ThreadAlert(Tcl_ThreadId threadId);
/* 319 */
EXTERN void		Tcl_ThreadQueueEvent(Tcl_ThreadId threadId,
				Tcl_Event *evPtr, Tcl_QueuePosition position);
/* 320 */
EXTERN Tcl_UniChar	Tcl_UniCharAtIndex(const char *src, int index);
/* 321 */
EXTERN Tcl_UniChar	Tcl_UniCharToLower(int ch);
/* 322 */
EXTERN Tcl_UniChar	Tcl_UniCharToTitle(int ch);
/* 323 */
EXTERN Tcl_UniChar	Tcl_UniCharToUpper(int ch);
/* 324 */
EXTERN int		Tcl_UniCharToUtf(int ch, char *buf);
/* 325 */
EXTERN CONST84_RETURN char * Tcl_UtfAtIndex(const char *src, int index);
/* 326 */
EXTERN int		Tcl_UtfCharComplete(const char *src, int length);
/* 327 */
................................................................................
				int numChars);
/* 379 */
EXTERN void		Tcl_SetUnicodeObj(Tcl_Obj *objPtr,
				const Tcl_UniChar *unicode, int numChars);
/* 380 */
EXTERN int		Tcl_GetCharLength(Tcl_Obj *objPtr);
/* 381 */
EXTERN Tcl_UniChar	Tcl_GetUniChar(Tcl_Obj *objPtr, int index);
/* 382 */
EXTERN Tcl_UniChar *	Tcl_GetUnicode(Tcl_Obj *objPtr);
/* 383 */
EXTERN Tcl_Obj *	Tcl_GetRange(Tcl_Obj *objPtr, int first, int last);
/* 384 */
EXTERN void		Tcl_AppendUnicodeToObj(Tcl_Obj *objPtr,
				const Tcl_UniChar *unicode, int length);
................................................................................
    int (*tcl_ReadChars) (Tcl_Channel channel, Tcl_Obj *objPtr, int charsToRead, int appendFlag); /* 313 */
    void (*tcl_RestoreResult) (Tcl_Interp *interp, Tcl_SavedResult *statePtr); /* 314 */
    void (*tcl_SaveResult) (Tcl_Interp *interp, Tcl_SavedResult *statePtr); /* 315 */
    int (*tcl_SetSystemEncoding) (Tcl_Interp *interp, const char *name); /* 316 */
    Tcl_Obj * (*tcl_SetVar2Ex) (Tcl_Interp *interp, const char *part1, const char *part2, Tcl_Obj *newValuePtr, int flags); /* 317 */
    void (*tcl_ThreadAlert) (Tcl_ThreadId threadId); /* 318 */
    void (*tcl_ThreadQueueEvent) (Tcl_ThreadId threadId, Tcl_Event *evPtr, Tcl_QueuePosition position); /* 319 */
    Tcl_UniChar (*tcl_UniCharAtIndex) (const char *src, int index); /* 320 */
    Tcl_UniChar (*tcl_UniCharToLower) (int ch); /* 321 */
    Tcl_UniChar (*tcl_UniCharToTitle) (int ch); /* 322 */
    Tcl_UniChar (*tcl_UniCharToUpper) (int ch); /* 323 */
    int (*tcl_UniCharToUtf) (int ch, char *buf); /* 324 */
    CONST84_RETURN char * (*tcl_UtfAtIndex) (const char *src, int index); /* 325 */
    int (*tcl_UtfCharComplete) (const char *src, int length); /* 326 */
    int (*tcl_UtfBackslash) (const char *src, int *readPtr, char *dst); /* 327 */
    CONST84_RETURN char * (*tcl_UtfFindFirst) (const char *src, int ch); /* 328 */
    CONST84_RETURN char * (*tcl_UtfFindLast) (const char *src, int ch); /* 329 */
    CONST84_RETURN char * (*tcl_UtfNext) (const char *src); /* 330 */
................................................................................
    int (*tcl_UniCharIsPrint) (int ch); /* 374 */
    int (*tcl_UniCharIsPunct) (int ch); /* 375 */
    int (*tcl_RegExpExecObj) (Tcl_Interp *interp, Tcl_RegExp regexp, Tcl_Obj *textObj, int offset, int nmatches, int flags); /* 376 */
    void (*tcl_RegExpGetInfo) (Tcl_RegExp regexp, Tcl_RegExpInfo *infoPtr); /* 377 */
    Tcl_Obj * (*tcl_NewUnicodeObj) (const Tcl_UniChar *unicode, int numChars); /* 378 */
    void (*tcl_SetUnicodeObj) (Tcl_Obj *objPtr, const Tcl_UniChar *unicode, int numChars); /* 379 */
    int (*tcl_GetCharLength) (Tcl_Obj *objPtr); /* 380 */
    Tcl_UniChar (*tcl_GetUniChar) (Tcl_Obj *objPtr, int index); /* 381 */
    Tcl_UniChar * (*tcl_GetUnicode) (Tcl_Obj *objPtr); /* 382 */
    Tcl_Obj * (*tcl_GetRange) (Tcl_Obj *objPtr, int first, int last); /* 383 */
    void (*tcl_AppendUnicodeToObj) (Tcl_Obj *objPtr, const Tcl_UniChar *unicode, int length); /* 384 */
    int (*tcl_RegExpMatchObj) (Tcl_Interp *interp, Tcl_Obj *textObj, Tcl_Obj *patternObj); /* 385 */
    void (*tcl_SetNotifier) (Tcl_NotifierProcs *notifierProcPtr); /* 386 */
    Tcl_Mutex * (*tcl_GetAllocMutex) (void); /* 387 */
    int (*tcl_GetChannelNames) (Tcl_Interp *interp); /* 388 */






|

|

|

|







 







|







 







|
|
|
|







 







|







955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
....
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
....
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
....
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
				int flags);
/* 318 */
EXTERN void		Tcl_ThreadAlert(Tcl_ThreadId threadId);
/* 319 */
EXTERN void		Tcl_ThreadQueueEvent(Tcl_ThreadId threadId,
				Tcl_Event *evPtr, Tcl_QueuePosition position);
/* 320 */
EXTERN int		Tcl_UniCharAtIndex(const char *src, int index);
/* 321 */
EXTERN int		Tcl_UniCharToLower(int ch);
/* 322 */
EXTERN int		Tcl_UniCharToTitle(int ch);
/* 323 */
EXTERN int		Tcl_UniCharToUpper(int ch);
/* 324 */
EXTERN int		Tcl_UniCharToUtf(int ch, char *buf);
/* 325 */
EXTERN CONST84_RETURN char * Tcl_UtfAtIndex(const char *src, int index);
/* 326 */
EXTERN int		Tcl_UtfCharComplete(const char *src, int length);
/* 327 */
................................................................................
				int numChars);
/* 379 */
EXTERN void		Tcl_SetUnicodeObj(Tcl_Obj *objPtr,
				const Tcl_UniChar *unicode, int numChars);
/* 380 */
EXTERN int		Tcl_GetCharLength(Tcl_Obj *objPtr);
/* 381 */
EXTERN int		Tcl_GetUniChar(Tcl_Obj *objPtr, int index);
/* 382 */
EXTERN Tcl_UniChar *	Tcl_GetUnicode(Tcl_Obj *objPtr);
/* 383 */
EXTERN Tcl_Obj *	Tcl_GetRange(Tcl_Obj *objPtr, int first, int last);
/* 384 */
EXTERN void		Tcl_AppendUnicodeToObj(Tcl_Obj *objPtr,
				const Tcl_UniChar *unicode, int length);
................................................................................
    int (*tcl_ReadChars) (Tcl_Channel channel, Tcl_Obj *objPtr, int charsToRead, int appendFlag); /* 313 */
    void (*tcl_RestoreResult) (Tcl_Interp *interp, Tcl_SavedResult *statePtr); /* 314 */
    void (*tcl_SaveResult) (Tcl_Interp *interp, Tcl_SavedResult *statePtr); /* 315 */
    int (*tcl_SetSystemEncoding) (Tcl_Interp *interp, const char *name); /* 316 */
    Tcl_Obj * (*tcl_SetVar2Ex) (Tcl_Interp *interp, const char *part1, const char *part2, Tcl_Obj *newValuePtr, int flags); /* 317 */
    void (*tcl_ThreadAlert) (Tcl_ThreadId threadId); /* 318 */
    void (*tcl_ThreadQueueEvent) (Tcl_ThreadId threadId, Tcl_Event *evPtr, Tcl_QueuePosition position); /* 319 */
    int (*tcl_UniCharAtIndex) (const char *src, int index); /* 320 */
    int (*tcl_UniCharToLower) (int ch); /* 321 */
    int (*tcl_UniCharToTitle) (int ch); /* 322 */
    int (*tcl_UniCharToUpper) (int ch); /* 323 */
    int (*tcl_UniCharToUtf) (int ch, char *buf); /* 324 */
    CONST84_RETURN char * (*tcl_UtfAtIndex) (const char *src, int index); /* 325 */
    int (*tcl_UtfCharComplete) (const char *src, int length); /* 326 */
    int (*tcl_UtfBackslash) (const char *src, int *readPtr, char *dst); /* 327 */
    CONST84_RETURN char * (*tcl_UtfFindFirst) (const char *src, int ch); /* 328 */
    CONST84_RETURN char * (*tcl_UtfFindLast) (const char *src, int ch); /* 329 */
    CONST84_RETURN char * (*tcl_UtfNext) (const char *src); /* 330 */
................................................................................
    int (*tcl_UniCharIsPrint) (int ch); /* 374 */
    int (*tcl_UniCharIsPunct) (int ch); /* 375 */
    int (*tcl_RegExpExecObj) (Tcl_Interp *interp, Tcl_RegExp regexp, Tcl_Obj *textObj, int offset, int nmatches, int flags); /* 376 */
    void (*tcl_RegExpGetInfo) (Tcl_RegExp regexp, Tcl_RegExpInfo *infoPtr); /* 377 */
    Tcl_Obj * (*tcl_NewUnicodeObj) (const Tcl_UniChar *unicode, int numChars); /* 378 */
    void (*tcl_SetUnicodeObj) (Tcl_Obj *objPtr, const Tcl_UniChar *unicode, int numChars); /* 379 */
    int (*tcl_GetCharLength) (Tcl_Obj *objPtr); /* 380 */
    int (*tcl_GetUniChar) (Tcl_Obj *objPtr, int index); /* 381 */
    Tcl_UniChar * (*tcl_GetUnicode) (Tcl_Obj *objPtr); /* 382 */
    Tcl_Obj * (*tcl_GetRange) (Tcl_Obj *objPtr, int first, int last); /* 383 */
    void (*tcl_AppendUnicodeToObj) (Tcl_Obj *objPtr, const Tcl_UniChar *unicode, int length); /* 384 */
    int (*tcl_RegExpMatchObj) (Tcl_Interp *interp, Tcl_Obj *textObj, Tcl_Obj *patternObj); /* 385 */
    void (*tcl_SetNotifier) (Tcl_NotifierProcs *notifierProcPtr); /* 386 */
    Tcl_Mutex * (*tcl_GetAllocMutex) (void); /* 387 */
    int (*tcl_GetChannelNames) (Tcl_Interp *interp); /* 388 */

Changes to generic/tclScan.c.

881
882
883
884
885
886
887
888








889
890
891
892
893
894
895
896
897
	    break;
	}
	case 'c':
	    /*
	     * Scan a single Unicode character.
	     */

	    string += TclUtfToUniChar(string, &sch);








	    if (!(flags & SCAN_SUPPRESS)) {
		objPtr = Tcl_NewIntObj((int)sch);
		Tcl_IncrRefCount(objPtr);
		CLANG_ASSERT(objs);
		objs[objIndex++] = objPtr;
	    }
	    break;

	case 'i':






|
>
>
>
>
>
>
>
>

|







881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
	    break;
	}
	case 'c':
	    /*
	     * Scan a single Unicode character.
	     */

	    offset = TclUtfToUniChar(string, &sch);
	    i = (int)sch;
#if TCL_UTF_MAX == 4
	    if (!offset) {
		offset = Tcl_UtfToUniChar(string, &sch);
		i = (((i<<10) & 0x0FFC00) + 0x10000) + (sch & 0x3FF);
	    }
#endif
	    string += offset;
	    if (!(flags & SCAN_SUPPRESS)) {
		objPtr = Tcl_NewIntObj(i);
		Tcl_IncrRefCount(objPtr);
		CLANG_ASSERT(objs);
		objs[objIndex++] = objPtr;
	    }
	    break;

	case 'i':

Changes to generic/tclStringObj.c.

462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
...
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
...
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
....
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
....
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
....
3584
3585
3586
3587
3588
3589
3590
3591

3592
3593
3594
3595
3596
3597
3598
 *
 * Side effects:
 *	Fills unichar with the index'th Unicode character.
 *
 *----------------------------------------------------------------------
 */

Tcl_UniChar
Tcl_GetUniChar(
    Tcl_Obj *objPtr,		/* The object to get the Unicode charater
				 * from. */
    int index)			/* Get the index'th Unicode character. */
{
    String *stringPtr;

................................................................................
     * without string representation; we don't need to convert to a string to
     * perform the indexing operation.
     */

    if (TclIsPureByteArray(objPtr)) {
	unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, NULL);

	return (Tcl_UniChar) bytes[index];
    }

    /*
     * OK, need to work with the object as a string.
     */

    SetStringFromAny(NULL, objPtr);
................................................................................
	}
	if (stringPtr->numChars == objPtr->length) {
	    return (Tcl_UniChar) objPtr->bytes[index];
	}
	FillUnicodeRep(objPtr);
	stringPtr = GET_STRING(objPtr);
    }
    return stringPtr->unicode[index];
}
 
/*
 *----------------------------------------------------------------------
 *
 * Tcl_GetUnicode --
 *
................................................................................
	    Tcl_UniChar *to;

	    /*
	     * Create a non-empty, pure unicode value, so we can coax
	     * Tcl_SetObjLength into growing the unicode rep buffer.
	     */

	    ch = 0;
	    objPtr = Tcl_NewUnicodeObj(&ch, 1);
	    Tcl_SetObjLength(objPtr, stringPtr->numChars);
	    to = Tcl_GetUnicode(objPtr);
	    while (--src >= from) {
		*to++ = *src;
	    }
	} else {
................................................................................
    Tcl_Obj *objPtr,
    const char *bytes,
    int numBytes,
    int numAppendChars)
{
    String *stringPtr = GET_STRING(objPtr);
    int needed, numOrigChars = 0;
    Tcl_UniChar *dst;

    if (stringPtr->hasUnicode) {
	numOrigChars = stringPtr->numChars;
    }
    if (numAppendChars == -1) {
	TclNumUtfChars(numAppendChars, bytes, numBytes);
    }
................................................................................
    stringPtr->hasUnicode = 1;
    if (bytes) {
	stringPtr->numChars = needed;
    } else {
	numAppendChars = 0;
    }
    for (dst=stringPtr->unicode + numOrigChars; numAppendChars-- > 0; dst++) {
	bytes += TclUtfToUniChar(bytes, dst);

    }
    *dst = 0;
}
 
/*
 *----------------------------------------------------------------------
 *






|







 







|







 







|







 







<







 







|







 







|
>







462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
...
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
...
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
....
3458
3459
3460
3461
3462
3463
3464

3465
3466
3467
3468
3469
3470
3471
....
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
....
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
 *
 * Side effects:
 *	Fills unichar with the index'th Unicode character.
 *
 *----------------------------------------------------------------------
 */

int
Tcl_GetUniChar(
    Tcl_Obj *objPtr,		/* The object to get the Unicode charater
				 * from. */
    int index)			/* Get the index'th Unicode character. */
{
    String *stringPtr;

................................................................................
     * without string representation; we don't need to convert to a string to
     * perform the indexing operation.
     */

    if (TclIsPureByteArray(objPtr)) {
	unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, NULL);

	return (int) bytes[index];
    }

    /*
     * OK, need to work with the object as a string.
     */

    SetStringFromAny(NULL, objPtr);
................................................................................
	}
	if (stringPtr->numChars == objPtr->length) {
	    return (Tcl_UniChar) objPtr->bytes[index];
	}
	FillUnicodeRep(objPtr);
	stringPtr = GET_STRING(objPtr);
    }
    return (int) stringPtr->unicode[index];
}
 
/*
 *----------------------------------------------------------------------
 *
 * Tcl_GetUnicode --
 *
................................................................................
	    Tcl_UniChar *to;

	    /*
	     * Create a non-empty, pure unicode value, so we can coax
	     * Tcl_SetObjLength into growing the unicode rep buffer.
	     */


	    objPtr = Tcl_NewUnicodeObj(&ch, 1);
	    Tcl_SetObjLength(objPtr, stringPtr->numChars);
	    to = Tcl_GetUnicode(objPtr);
	    while (--src >= from) {
		*to++ = *src;
	    }
	} else {
................................................................................
    Tcl_Obj *objPtr,
    const char *bytes,
    int numBytes,
    int numAppendChars)
{
    String *stringPtr = GET_STRING(objPtr);
    int needed, numOrigChars = 0;
    Tcl_UniChar *dst, unichar = 0;

    if (stringPtr->hasUnicode) {
	numOrigChars = stringPtr->numChars;
    }
    if (numAppendChars == -1) {
	TclNumUtfChars(numAppendChars, bytes, numBytes);
    }
................................................................................
    stringPtr->hasUnicode = 1;
    if (bytes) {
	stringPtr->numChars = needed;
    } else {
	numAppendChars = 0;
    }
    for (dst=stringPtr->unicode + numOrigChars; numAppendChars-- > 0; dst++) {
	bytes += TclUtfToUniChar(bytes, &unichar);
	*dst = unichar;
    }
    *dst = 0;
}
 
/*
 *----------------------------------------------------------------------
 *

Changes to generic/tclUtf.c.

695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
...
815
816
817
818
819
820
821
822

823
824
825
826
827
828
829
830
831
832







833
834
835
836
837
838
839
840
...
868
869
870
871
872
873
874
875

876
877
878
879
880
881
882
883
884
885







886
887
888
889
890
891
892
893
...
922
923
924
925
926
927
928

929
930
931
932
933
934
935
936
937
938
939
940
941







942
943
944
945
946
947
948
949
950
951
952
953







954
955
956
957
958
959
960
961
....
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165

1166
1167
1168
1169
1170
1171


1172
1173
1174
1175
1176
1177
1178
....
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193

1194
1195
1196
1197
1198
1199


1200
1201
1202
1203
1204
1205
1206
....
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221

1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234


1235
1236
1237
1238
1239
1240
1241
 *
 * Side effects:
 *	None.
 *
 *---------------------------------------------------------------------------
 */

Tcl_UniChar
Tcl_UniCharAtIndex(
    register const char *src,	/* The UTF-8 string to dereference. */
    register int index)		/* The position of the desired character. */
{
    Tcl_UniChar ch = 0;

    while (index >= 0) {
................................................................................
 *----------------------------------------------------------------------
 */

int
Tcl_UtfToUpper(
    char *str)			/* String to convert in place. */
{
    Tcl_UniChar ch = 0, upChar;

    char *src, *dst;
    int bytes;

    /*
     * Iterate over the string until we hit the terminating null.
     */

    src = dst = str;
    while (*src) {
	bytes = TclUtfToUniChar(src, &ch);







	upChar = Tcl_UniCharToUpper(ch);

	/*
	 * To keep badly formed Utf strings from getting inflated by the
	 * conversion (thereby causing a segfault), only copy the upper case
	 * char to dst if its size is <= the original char.
	 */

................................................................................
 *----------------------------------------------------------------------
 */

int
Tcl_UtfToLower(
    char *str)			/* String to convert in place. */
{
    Tcl_UniChar ch = 0, lowChar;

    char *src, *dst;
    int bytes;

    /*
     * Iterate over the string until we hit the terminating null.
     */

    src = dst = str;
    while (*src) {
	bytes = TclUtfToUniChar(src, &ch);







	lowChar = Tcl_UniCharToLower(ch);

	/*
	 * To keep badly formed Utf strings from getting inflated by the
	 * conversion (thereby causing a segfault), only copy the lower case
	 * char to dst if its size is <= the original char.
	 */

................................................................................
 *----------------------------------------------------------------------
 */

int
Tcl_UtfToTitle(
    char *str)			/* String to convert in place. */
{

    Tcl_UniChar ch = 0, titleChar, lowChar;
    char *src, *dst;
    int bytes;

    /*
     * Capitalize the first character and then lowercase the rest of the
     * characters until we get to a null.
     */

    src = dst = str;

    if (*src) {
	bytes = TclUtfToUniChar(src, &ch);







	titleChar = Tcl_UniCharToTitle(ch);

	if (bytes < TclUtfCount(titleChar)) {
	    memcpy(dst, src, (size_t) bytes);
	    dst += bytes;
	} else {
	    dst += Tcl_UniCharToUtf(titleChar, dst);
	}
	src += bytes;
    }
    while (*src) {
	bytes = TclUtfToUniChar(src, &ch);







	lowChar = Tcl_UniCharToLower(ch);

	if (bytes < TclUtfCount(lowChar)) {
	    memcpy(dst, src, (size_t) bytes);
	    dst += bytes;
	} else {
	    dst += Tcl_UniCharToUtf(lowChar, dst);
	}
................................................................................
 *
 * Side effects:
 *	None.
 *
 *----------------------------------------------------------------------
 */

Tcl_UniChar
Tcl_UniCharToUpper(
    int ch)			/* Unicode character to convert. */
{

    int info = GetUniCharInfo(ch);

    if (GetCaseType(info) & 0x04) {
	ch -= GetDelta(info);
    }
    return (Tcl_UniChar) ch;


}
 
/*
 *----------------------------------------------------------------------
 *
 * Tcl_UniCharToLower --
 *
................................................................................
 *
 * Side effects:
 *	None.
 *
 *----------------------------------------------------------------------
 */

Tcl_UniChar
Tcl_UniCharToLower(
    int ch)			/* Unicode character to convert. */
{

    int info = GetUniCharInfo(ch);

    if (GetCaseType(info) & 0x02) {
	ch += GetDelta(info);
    }
    return (Tcl_UniChar) ch;


}
 
/*
 *----------------------------------------------------------------------
 *
 * Tcl_UniCharToTitle --
 *
................................................................................
 *
 * Side effects:
 *	None.
 *
 *----------------------------------------------------------------------
 */

Tcl_UniChar
Tcl_UniCharToTitle(
    int ch)			/* Unicode character to convert. */
{

    int info = GetUniCharInfo(ch);
    int mode = GetCaseType(info);

    if (mode & 0x1) {
	/*
	 * Subtract or add one depending on the original case.
	 */

	ch += ((mode & 0x4) ? -1 : 1);
    } else if (mode == 0x4) {
	ch -= GetDelta(info);
    }
    return (Tcl_UniChar) ch;


}
 
/*
 *----------------------------------------------------------------------
 *
 * Tcl_UniCharLen --
 *






|







 







|
>










>
>
>
>
>
>
>
|







 







|
>










>
>
>
>
>
>
>
|







 







>
|












>
>
>
>
>
>
>
|











>
>
>
>
>
>
>
|







 







|



>
|

|
|
|
<
>
>







 







|



>
|

|
|
|
<
>
>







 







|



>
|
|

|
|
|
|

|
|
|
|
<
>
>







695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
...
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
...
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
...
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
....
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202

1203
1204
1205
1206
1207
1208
1209
1210
1211
....
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232

1233
1234
1235
1236
1237
1238
1239
1240
1241
....
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269

1270
1271
1272
1273
1274
1275
1276
1277
1278
 *
 * Side effects:
 *	None.
 *
 *---------------------------------------------------------------------------
 */

int
Tcl_UniCharAtIndex(
    register const char *src,	/* The UTF-8 string to dereference. */
    register int index)		/* The position of the desired character. */
{
    Tcl_UniChar ch = 0;

    while (index >= 0) {
................................................................................
 *----------------------------------------------------------------------
 */

int
Tcl_UtfToUpper(
    char *str)			/* String to convert in place. */
{
    Tcl_UniChar ch = 0;
    int upChar;
    char *src, *dst;
    int bytes;

    /*
     * Iterate over the string until we hit the terminating null.
     */

    src = dst = str;
    while (*src) {
	bytes = TclUtfToUniChar(src, &ch);
	upChar = ch;
	if (!bytes) {
	    /* TclUtfToUniChar only returns 0 for chars > 0xffff ! */
	    bytes = TclUtfToUniChar(src, &ch);
	    /* Combine surrogates */
	    upChar = (((upChar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000;
	}
	upChar = Tcl_UniCharToUpper(upChar);

	/*
	 * To keep badly formed Utf strings from getting inflated by the
	 * conversion (thereby causing a segfault), only copy the upper case
	 * char to dst if its size is <= the original char.
	 */

................................................................................
 *----------------------------------------------------------------------
 */

int
Tcl_UtfToLower(
    char *str)			/* String to convert in place. */
{
    Tcl_UniChar ch = 0;
    int lowChar;
    char *src, *dst;
    int bytes;

    /*
     * Iterate over the string until we hit the terminating null.
     */

    src = dst = str;
    while (*src) {
	bytes = TclUtfToUniChar(src, &ch);
	lowChar = ch;
	if (!bytes) {
	    /* TclUtfToUniChar only returns 0 for chars > 0xffff ! */
	    bytes = TclUtfToUniChar(src, &ch);
	    /* Combine surrogates */
	    lowChar = (((lowChar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000;
	}
	lowChar = Tcl_UniCharToLower(lowChar);

	/*
	 * To keep badly formed Utf strings from getting inflated by the
	 * conversion (thereby causing a segfault), only copy the lower case
	 * char to dst if its size is <= the original char.
	 */

................................................................................
 *----------------------------------------------------------------------
 */

int
Tcl_UtfToTitle(
    char *str)			/* String to convert in place. */
{
    Tcl_UniChar ch = 0;
    int titleChar, lowChar;
    char *src, *dst;
    int bytes;

    /*
     * Capitalize the first character and then lowercase the rest of the
     * characters until we get to a null.
     */

    src = dst = str;

    if (*src) {
	bytes = TclUtfToUniChar(src, &ch);
	titleChar = ch;
	if (!bytes) {
	    /* TclUtfToUniChar only returns 0 for chars > 0xffff ! */
	    bytes = TclUtfToUniChar(src, &ch);
	    /* Combine surrogates */
	    titleChar = (((titleChar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000;
	}
	titleChar = Tcl_UniCharToTitle(titleChar);

	if (bytes < TclUtfCount(titleChar)) {
	    memcpy(dst, src, (size_t) bytes);
	    dst += bytes;
	} else {
	    dst += Tcl_UniCharToUtf(titleChar, dst);
	}
	src += bytes;
    }
    while (*src) {
	bytes = TclUtfToUniChar(src, &ch);
	lowChar = ch;
	if (!bytes) {
	    /* TclUtfToUniChar only returns 0 for chars > 0xffff ! */
	    bytes = TclUtfToUniChar(src, &ch);
	    /* Combine surrogates */
	    lowChar = (((lowChar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000;
	}
	lowChar = Tcl_UniCharToLower(lowChar);

	if (bytes < TclUtfCount(lowChar)) {
	    memcpy(dst, src, (size_t) bytes);
	    dst += bytes;
	} else {
	    dst += Tcl_UniCharToUtf(lowChar, dst);
	}
................................................................................
 *
 * Side effects:
 *	None.
 *
 *----------------------------------------------------------------------
 */

int
Tcl_UniCharToUpper(
    int ch)			/* Unicode character to convert. */
{
    if (!UNICODE_OUT_OF_RANGE(ch)) {
	int info = GetUniCharInfo(ch);

	if (GetCaseType(info) & 0x04) {
	    ch -= GetDelta(info);
	}

    }
    return ch & 0x1FFFFF;
}
 
/*
 *----------------------------------------------------------------------
 *
 * Tcl_UniCharToLower --
 *
................................................................................
 *
 * Side effects:
 *	None.
 *
 *----------------------------------------------------------------------
 */

int
Tcl_UniCharToLower(
    int ch)			/* Unicode character to convert. */
{
    if (!UNICODE_OUT_OF_RANGE(ch)) {
	int info = GetUniCharInfo(ch);

	if (GetCaseType(info) & 0x02) {
	    ch += GetDelta(info);
	}

    }
    return ch & 0x1FFFFF;
}
 
/*
 *----------------------------------------------------------------------
 *
 * Tcl_UniCharToTitle --
 *
................................................................................
 *
 * Side effects:
 *	None.
 *
 *----------------------------------------------------------------------
 */

int
Tcl_UniCharToTitle(
    int ch)			/* Unicode character to convert. */
{
    if (!UNICODE_OUT_OF_RANGE(ch)) {
	int info = GetUniCharInfo(ch);
	int mode = GetCaseType(info);

	if (mode & 0x1) {
	    /*
	     * Subtract or add one depending on the original case.
	     */

	    ch += ((mode & 0x4) ? -1 : 1);
	} else if (mode == 0x4) {
	    ch -= GetDelta(info);
	}

    }
    return ch & 0x1FFFFF;
}
 
/*
 *----------------------------------------------------------------------
 *
 * Tcl_UniCharLen --
 *

Changes to tests/string.test.

1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
} edcba
test string-24.4 {string reverse command - unshared string} {
    set x abc
    set y de
    string reverse $x$y
} edcba
test string-24.5 {string reverse command - shared unicode string} {
    set x abcde\udead
    string reverse $x
} \udeadedcba
test string-24.6 {string reverse command - unshared string} {
    set x abc
    set y de\udead
    string reverse $x$y
} \udeadedcba
test string-24.7 {string reverse command - simple case} {
    string reverse a
} a
test string-24.8 {string reverse command - simple case} {
    string reverse \udead
} \udead
test string-24.9 {string reverse command - simple case} {
    string reverse {}
} {}
test string-24.10 {string reverse command - corner case} {
    set x \ubeef\udead
    string reverse $x
} \udead\ubeef
test string-24.11 {string reverse command - corner case} {
    set x \ubeef
    set y \udead
    string reverse $x$y
} \udead\ubeef
test string-24.12 {string reverse command - corner case} {
    set x \ubeef
    set y \udead
    string is ascii [string reverse $x$y]
} 0
test string-24.13 {string reverse command - pure Unicode string} {
    string reverse [string range \ubeef\udead\ubeef\udead\ubeef\udead 1 5]
} \udead\ubeef\udead\ubeef\udead
test string-24.14 {string reverse command - pure bytearray} {
    binary scan [string reverse [binary format H* 010203]] H* x
    set x
} 030201
test string-24.15 {string reverse command - pure bytearray} {
    binary scan [tcl::string::reverse [binary format H* 010203]] H* x
    set x






|

|


|

|




|
|




|

|


|

|


|



|
|







1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
} edcba
test string-24.4 {string reverse command - unshared string} {
    set x abc
    set y de
    string reverse $x$y
} edcba
test string-24.5 {string reverse command - shared unicode string} {
    set x abcde\ud0ad
    string reverse $x
} \ud0adedcba
test string-24.6 {string reverse command - unshared string} {
    set x abc
    set y de\ud0ad
    string reverse $x$y
} \ud0adedcba
test string-24.7 {string reverse command - simple case} {
    string reverse a
} a
test string-24.8 {string reverse command - simple case} {
    string reverse \ud0ad
} \ud0ad
test string-24.9 {string reverse command - simple case} {
    string reverse {}
} {}
test string-24.10 {string reverse command - corner case} {
    set x \ubeef\ud0ad
    string reverse $x
} \ud0ad\ubeef
test string-24.11 {string reverse command - corner case} {
    set x \ubeef
    set y \ud0ad
    string reverse $x$y
} \ud0ad\ubeef
test string-24.12 {string reverse command - corner case} {
    set x \ubeef
    set y \ud0ad
    string is ascii [string reverse $x$y]
} 0
test string-24.13 {string reverse command - pure Unicode string} {
    string reverse [string range \ubeef\ud0ad\ubeef\ud0ad\ubeef\ud0ad 1 5]
} \ud0ad\ubeef\ud0ad\ubeef\ud0ad
test string-24.14 {string reverse command - pure bytearray} {
    binary scan [string reverse [binary format H* 010203]] H* x
    set x
} 030201
test string-24.15 {string reverse command - pure bytearray} {
    binary scan [tcl::string::reverse [binary format H* 010203]] H* x
    set x

Changes to tests/utf.test.

64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
    string length [testbytestring "\xE2\xA2"]
} {2}
test utf-2.7 {Tcl_UtfToUniChar: lead (3-byte) followed by 2 trail} testbytestring {
    string length [testbytestring "\xE4\xb9\x8e"]
} {1}
test utf-2.8 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {fullutf testbytestring} -body {
    string length [testbytestring "\xF0\x90\x80\x80"]
} -result {1}
test utf-2.9 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {fullutf testbytestring} -body {
    string length [testbytestring "\xF4\x8F\xBF\xBF"]
} -result {1}
test utf-2.10 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, underflow} testbytestring {
    string length [testbytestring "\xF0\x8F\xBF\xBF"]
} {4}
test utf-2.11 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, overflow} testbytestring {
    string length [testbytestring "\xF4\x90\x80\x80"]
} {4}
test utf-2.12 {Tcl_UtfToUniChar: longer UTF sequences not supported} testbytestring {






|


|







64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
    string length [testbytestring "\xE2\xA2"]
} {2}
test utf-2.7 {Tcl_UtfToUniChar: lead (3-byte) followed by 2 trail} testbytestring {
    string length [testbytestring "\xE4\xb9\x8e"]
} {1}
test utf-2.8 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {fullutf testbytestring} -body {
    string length [testbytestring "\xF0\x90\x80\x80"]
} -result {2}
test utf-2.9 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints {fullutf testbytestring} -body {
    string length [testbytestring "\xF4\x8F\xBF\xBF"]
} -result {2}
test utf-2.10 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, underflow} testbytestring {
    string length [testbytestring "\xF0\x8F\xBF\xBF"]
} {4}
test utf-2.11 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, overflow} testbytestring {
    string length [testbytestring "\xF4\x90\x80\x80"]
} {4}
test utf-2.12 {Tcl_UtfToUniChar: longer UTF sequences not supported} testbytestring {