Tcl Source Code

Changes On Branch utf-max
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Changes In Branch utf-max Excluding Merge-Ins

This is equivalent to a diff from f0782b57a6 to f4cea11d2e

2020-01-20
12:18
Implementation of TIP #542: Support for switchable Full Unicode support check-in: 73c2397d70 user: jan.nijtmans tags: core-8-branch
2020-01-17
17:15
merge 8.6 check-in: b061abfc0c user: sebres tags: core-8-branch
15:37
merge 8.7 Closed-Leaf check-in: f4cea11d2e user: dgp tags: utf-max
13:57
merge 8.7 check-in: 3bc6633fc4 user: dgp tags: dgp-review
10:43
Merge 8.7, more WIP check-in: e5a4139844 user: jan.nijtmans tags: no-wur
08:54
Merge 8.6 check-in: f0782b57a6 user: jan.nijtmans tags: core-8-branch
08:53
Fix [8cd2fe7474]: "unload" command Bug (when handling multiple libs) check-in: 29086fe087 user: jan.nijtmans tags: core-8-6-branch
2020-01-15
10:21
Merge-mark 8.6. Remove a few "break"'s added in the previous commit, because they are redundant. check-in: 9a826dfb28 user: jan.nijtmans tags: core-8-branch
2020-01-08
23:00
merge 8.7 check-in: 7c9f4a0cee user: dgp tags: utf-max

Changes to .travis.yml.

1
2
3
4
5
6
7
8
9
10
11
12
13

14
15
16
17
18
19

20
21
22
23
24
25
26
27
28
29
30
31
32
33
1
2
3
4
5
6
7
8
9
10
11
12

13
14
15
16
17
18

19







20
21
22
23
24
25
26












-
+





-
+
-
-
-
-
-
-
-







sudo: false
language: c

matrix:
  include:
# Testing on Linux with various compilers
    - name: "Linux/GCC/Shared"
      os: linux
      dist: xenial
      compiler: gcc
      env:
        - BUILD_DIR=unix
    - name: "Linux/GCC/Shared: UTF_MAX=6"
    - name: "Linux/GCC/Shared: UTF_MAX=4"
      os: linux
      dist: xenial
      compiler: gcc
      env:
        - BUILD_DIR=unix
        - CFGOPT=CFLAGS=-DTCL_UTF_MAX=6
        - CFGOPT=CFLAGS=-DTCL_UTF_MAX=4
    - name: "Linux/GCC/Shared: UTF_MAX=3"
      os: linux
      dist: xenial
      compiler: gcc
      env:
        - BUILD_DIR=unix
        - CFGOPT=CFLAGS=-DTCL_UTF_MAX=3
    - name: "Linux/GCC/Shared: NO_DEPRECATED"
      os: linux
      dist: xenial
      compiler: gcc
      env:
        - BUILD_DIR=unix
        - CFGOPT="CFLAGS=-DTCL_NO_DEPRECATED=1"
224
225
226
227
228
229
230
231

232
233
234
235
236
237
238
217
218
219
220
221
222
223

224
225
226
227
228
229
230
231







-
+







      before_install: &vcpreinst
        - PATH="$PATH:$VCDIR"
        - cd ${BUILD_DIR}
      install: []
      script:
        - cmd.exe /C 'vcvarsall.bat x64 && nmake -f makefile.vc all tcltest'
        - cmd.exe /C 'vcvarsall.bat x64 && nmake -f makefile.vc test'
    - name: "Windows/MSVC/Shared: UTF_MAX=6"
    - name: "Windows/MSVC/Shared: UTF_MAX=4"
      os: windows
      compiler: cl
      env: *vcenv
      before_install: *vcpreinst
      install: []
      script:
        - cmd.exe /C 'vcvarsall.bat x64 && nmake OPTS=utfmax -f makefile.vc all tcltest'
270
271
272
273
274
275
276
277

278
279
280
281
282
283
284
263
264
265
266
267
268
269

270
271
272
273
274
275
276
277







-
+







      compiler: cl
      env: *vcenv
      before_install: *vcpreinst
      install: []
      script:
        - cmd.exe /C 'vcvarsall.bat x86 && nmake -f makefile.vc all tcltest'
        - cmd.exe /C 'vcvarsall.bat x86 && nmake -f makefile.vc test'
    - name: "Windows/MSVC-x86/Shared: UTF_MAX=6"
    - name: "Windows/MSVC-x86/Shared: UTF_MAX=4"
      os: windows
      compiler: cl
      env: *vcenv
      before_install: *vcpreinst
      install: []
      script:
        - cmd.exe /C 'vcvarsall.bat x86 && nmake OPTS=utfmax -f makefile.vc all tcltest'
316
317
318
319
320
321
322
323

324
325
326
327
328

329
330
331
332
333
334
335
336
337
338
339
340
341
342
309
310
311
312
313
314
315

316
317
318
319
320

321







322
323
324
325
326
327
328







-
+




-
+
-
-
-
-
-
-
-







      compiler: gcc
      env:
        - BUILD_DIR=win
        - CFGOPT="--enable-64bit"
      before_install: &makepreinst
        - choco install -y make zip
        - cd ${BUILD_DIR}
    - name: "Windows/GCC/Shared: UTF_MAX=6"
    - name: "Windows/GCC/Shared: UTF_MAX=4"
      os: windows
      compiler: gcc
      env:
        - BUILD_DIR=win
        - CFGOPT="--enable-64bit CFLAGS=-DTCL_UTF_MAX=6"
        - CFGOPT="--enable-64bit CFLAGS=-DTCL_UTF_MAX=4"
      before_install: *makepreinst
    - name: "Windows/GCC/Shared: UTF_MAX=3"
      os: windows
      compiler: gcc
      env:
        - BUILD_DIR=win
        - CFGOPT="--enable-64bit CFLAGS=-DTCL_UTF_MAX=3"
      before_install: *makepreinst
    - name: "Windows/GCC/Shared: NO_DEPRECATED"
      os: windows
      compiler: gcc
      env:
        - BUILD_DIR=win
        - CFGOPT="--enable-64bit CFLAGS=-DTCL_NO_DEPRECATED=1"
358
359
360
361
362
363
364
365

366
367
368
369
370

371
372
373
374
375
376
377
378
379
380
381
382
383
384
344
345
346
347
348
349
350

351
352
353
354
355

356







357
358
359
360
361
362
363







-
+




-
+
-
-
-
-
-
-
-







# Test on Windows with GCC native (32-bit)
    - name: "Windows/GCC-x86/Shared"
      os: windows
      compiler: gcc
      env:
        - BUILD_DIR=win
      before_install: *makepreinst
    - name: "Windows/GCC-x86/Shared: UTF_MAX=6"
    - name: "Windows/GCC-x86/Shared: UTF_MAX=4"
      os: windows
      compiler: gcc
      env:
        - BUILD_DIR=win
        - CFGOPT="CFLAGS=-DTCL_UTF_MAX=6"
        - CFGOPT="CFLAGS=-DTCL_UTF_MAX=4"
      before_install: *makepreinst
    - name: "Windows/GCC-x86/Shared: UTF_MAX=3"
      os: windows
      compiler: gcc
      env:
        - BUILD_DIR=win
        - CFGOPT="CFLAGS=-DTCL_UTF_MAX=3"
      before_install: *makepreinst
    - name: "Windows/GCC-x86/Shared: NO_DEPRECATED"
      os: windows
      compiler: gcc
      env:
        - BUILD_DIR=win
        - CFGOPT="CFLAGS=-DTCL_NO_DEPRECATED=1"

Changes to doc/Utf.3.

1
2
3
4
5
6
7
8
9
10
11

12
13
14
15
16
17
18
1
2
3
4
5
6
7
8
9
10

11
12
13
14
15
16
17
18










-
+







'\"
'\" Copyright (c) 1997 Sun Microsystems, Inc.
'\"
'\" See the file "license.terms" for information on usage and redistribution
'\" of this file, and for a DISCLAIMER OF ALL WARRANTIES.
'\"
.TH Utf 3 "8.1" Tcl "Tcl Library Procedures"
.so man.macros
.BS
.SH NAME
Tcl_UniChar, Tcl_UniCharToUtf, Tcl_UtfToUniChar, Tcl_UtfToChar16, Tcl_UtfToWChar, Tcl_UniCharToUtfDString, Tcl_UtfToUniCharDString, Tcl_WCharToUtfDString, Tcl_UtfToWCharDString, Tcl_Char16ToUtfDString, Tcl_UtfToChar16DString, Tcl_UniCharLen, Tcl_UniCharNcmp, Tcl_UniCharNcasecmp, Tcl_UniCharCaseMatch, Tcl_UtfNcmp, Tcl_UtfNcasecmp, Tcl_UtfCharComplete, Tcl_NumUtfChars, Tcl_UtfFindFirst, Tcl_UtfFindLast, Tcl_UtfNext, Tcl_UtfPrev, Tcl_UniCharAtIndex, Tcl_UtfAtIndex, Tcl_UtfBackslash \- routines for manipulating UTF-8 strings
Tcl_UniChar, Tcl_UniCharToUtf, Tcl_UtfToUniChar, Tcl_UtfToChar16, Tcl_UtfToWChar, Tcl_UniCharToUtfDString, Tcl_UtfToUniCharDString, Tcl_Char16ToUtfDString, Tcl_UtfToWCharDString, Tcl_UtfToChar16DString, Tcl_UniCharLen, Tcl_UniCharNcmp, Tcl_UniCharNcasecmp, Tcl_UniCharCaseMatch, Tcl_UtfNcmp, Tcl_UtfNcasecmp, Tcl_UtfCharComplete, Tcl_NumUtfChars, Tcl_UtfFindFirst, Tcl_UtfFindLast, Tcl_UtfNext, Tcl_UtfPrev, Tcl_UniCharAtIndex, Tcl_UtfAtIndex, Tcl_UtfBackslash \- routines for manipulating UTF-8 strings
.SH SYNOPSIS
.nf
\fB#include <tcl.h>\fR
.sp
typedef ... \fBTcl_UniChar\fR;
.sp
int
116
117
118
119
120
121
122






123
124
125
126
127
128
129
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135







+
+
+
+
+
+







A null-terminated Unicode string.
.AP "const Tcl_UniChar" *uniPattern in
A null-terminated Unicode string.
.AP "const unsigned short" *uStr in
A null-terminated UTF-16 string.
.AP "const wchar_t" *wStr in
A null-terminated wchar_t string.
.AP "const unsigned short" *utf16s in
A null-terminated utf-16 string.
.AP "const unsigned short" *utf16t in
A null-terminated utf-16 string.
.AP "const unsigned short" *utf16Pattern in
A null-terminated utf-16 string.
.AP int length in
The length of the UTF-8 string in bytes (not UTF-8 characters).  If
negative, all bytes up to the first null byte are used.
.AP int uniLength in
The length of the Unicode string in characters.
.AP "Tcl_DString" *dsPtr in/out
A pointer to a previously initialized \fBTcl_DString\fR.
144
145
146
147
148
149
150
151

152
153
154
155


156
157
158
159
160
161
162
150
151
152
153
154
155
156

157
158
159


160
161
162
163
164
165
166
167
168







-
+


-
-
+
+







case-insensitive (1).
.BE

.SH DESCRIPTION
.PP
These routines convert between UTF-8 strings and Unicode/Utf-16 characters.
A UTF-8 character is a Unicode character represented as a varying-length
sequence of up to \fBTCL_UTF_MAX\fR bytes.  A multibyte UTF-8 sequence
sequence of up to \fB4\fR bytes.  A multibyte UTF-8 sequence
consists of a lead byte followed by some number of trail bytes.
.PP
\fBTCL_UTF_MAX\fR is the maximum number of bytes that it takes to
represent one Unicode character in the UTF-8 representation.
\fBTCL_UTF_MAX\fR is the maximum number of bytes that \fBTcl_UtfToUniChar\fR
can consume in a single call.
.PP
\fBTcl_UniCharToUtf\fR stores the character \fIch\fR as a UTF-8 string
in starting at \fIbuf\fR.  The return value is the number of bytes stored
in \fIbuf\fR. If ch is a high surrogate (range U+D800 - U+DBFF), then
the return value will be 1 and a single byte in the range 0xF0 - 0xF4
will be stored. If you still want to produce UTF-8 output for it (even
though knowing it's an illegal code-point on its own), just call

Changes to generic/regcustom.h.

84
85
86
87
88
89
90
91

92
93
94
95
96
97
98
84
85
86
87
88
89
90

91
92
93
94
95
96
97
98







-
+







typedef Tcl_UniChar chr;	/* The type itself. */
typedef int pchr;		/* What it promotes to. */
typedef unsigned uchr;		/* Unsigned type that will hold a chr. */
typedef int celt;		/* Type to hold chr, or NOCELT */
#define	NOCELT (-1)		/* Celt value which is not valid chr */
#define	CHR(c) (UCHAR(c))	/* Turn char literal into chr literal */
#define	DIGITVAL(c) ((c)-'0')	/* Turn chr digit into its value */
#if TCL_UTF_MAX > 4
#if TCL_UTF_MAX > 3
#define	CHRBITS	32		/* Bits in a chr; must not use sizeof */
#define	CHR_MIN	0x00000000	/* Smallest and largest chr; the value */
#define	CHR_MAX	0x10ffff	/* CHR_MAX-CHR_MIN+1 should fit in uchr */
#else
#define	CHRBITS	16		/* Bits in a chr; must not use sizeof */
#define	CHR_MIN	0x0000		/* Smallest and largest chr; the value */
#define	CHR_MAX	0xffff		/* CHR_MAX-CHR_MIN+1 should fit in uchr */

Changes to generic/tcl.decls.

1240
1241
1242
1243
1244
1245
1246
1247

1248
1249
1250

1251
1252
1253
1254
1255
1256
1257
1240
1241
1242
1243
1244
1245
1246

1247
1248
1249

1250
1251
1252
1253
1254
1255
1256
1257







-
+


-
+







}
declare 350 {
    int Tcl_UniCharIsUpper(int ch)
}
declare 351 {
    int Tcl_UniCharIsWordChar(int ch)
}
declare 352 {
declare 352 {deprecated {Use Tcl_GetCharLength}} {
    int Tcl_UniCharLen(const Tcl_UniChar *uniStr)
}
declare 353 {
declare 353 {deprecated {Use Tcl_UtfNcmp}} {
    int Tcl_UniCharNcmp(const Tcl_UniChar *ucs, const Tcl_UniChar *uct,
	    unsigned long numChars)
}
declare 354 {
    char *Tcl_Char16ToUtfDString(const unsigned short *uniStr,
	    int uniLength, Tcl_DString *dsPtr)
}
1333
1334
1335
1336
1337
1338
1339
1340

1341
1342
1343

1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359

1360
1361
1362
1363
1364
1365
1366
1333
1334
1335
1336
1337
1338
1339

1340
1341
1342

1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358

1359
1360
1361
1362
1363
1364
1365
1366







-
+


-
+















-
+







declare 376 {
    int Tcl_RegExpExecObj(Tcl_Interp *interp, Tcl_RegExp regexp,
	    Tcl_Obj *textObj, int offset, int nmatches, int flags)
}
declare 377 {
    void Tcl_RegExpGetInfo(Tcl_RegExp regexp, Tcl_RegExpInfo *infoPtr)
}
declare 378 {
declare 378 {deprecated {Use Tcl_UniCharToUtfDString}} {
    Tcl_Obj *Tcl_NewUnicodeObj(const Tcl_UniChar *unicode, int numChars)
}
declare 379 {
declare 379 {deprecated {Use Tcl_UniCharToUtfDString}} {
    void Tcl_SetUnicodeObj(Tcl_Obj *objPtr, const Tcl_UniChar *unicode,
	    int numChars)
}
declare 380 {
    int Tcl_GetCharLength(Tcl_Obj *objPtr)
}
declare 381 {
    int Tcl_GetUniChar(Tcl_Obj *objPtr, int index)
}
declare 382 {deprecated {No longer in use, changed to macro}} {
    Tcl_UniChar *Tcl_GetUnicode(Tcl_Obj *objPtr)
}
declare 383 {
    Tcl_Obj *Tcl_GetRange(Tcl_Obj *objPtr, int first, int last)
}
declare 384 {
declare 384 {deprecated {Use Tcl_AppendStringsToObj}} {
    void Tcl_AppendUnicodeToObj(Tcl_Obj *objPtr, const Tcl_UniChar *unicode,
	    int length)
}
declare 385 {
    int Tcl_RegExpMatchObj(Tcl_Interp *interp, Tcl_Obj *textObj,
	    Tcl_Obj *patternObj)
}
1478
1479
1480
1481
1482
1483
1484
1485

1486
1487
1488
1489

1490
1491
1492
1493
1494
1495
1496
1478
1479
1480
1481
1482
1483
1484

1485
1486
1487
1488

1489
1490
1491
1492
1493
1494
1495
1496







-
+



-
+







}
declare 417 {
    void Tcl_ClearChannelHandlers(Tcl_Channel channel)
}
declare 418 {
    int Tcl_IsChannelExisting(const char *channelName)
}
declare 419 {
declare 419 {deprecated {Use Tcl_UtfNcasecmp}} {
    int Tcl_UniCharNcasecmp(const Tcl_UniChar *ucs, const Tcl_UniChar *uct,
	    unsigned long numChars)
}
declare 420 {
declare 420 {deprecated {Use Tcl_StringCaseMatch}} {
    int Tcl_UniCharCaseMatch(const Tcl_UniChar *uniStr,
	    const Tcl_UniChar *uniPattern, int nocase)
}
declare 421 {
    Tcl_HashEntry *Tcl_FindHashEntry(Tcl_HashTable *tablePtr, const void *key)
}
declare 422 {
1536
1537
1538
1539
1540
1541
1542
1543

1544
1545
1546
1547
1548
1549
1550
1536
1537
1538
1539
1540
1541
1542

1543
1544
1545
1546
1547
1548
1549
1550







-
+








# TIP#10 (thread-aware channels) akupries
declare 433 {
    Tcl_ThreadId Tcl_GetChannelThread(Tcl_Channel channel)
}

# introduced in 8.4a3
declare 434 {
declare 434 {deprecated {Use Tcl_UtfToUniCharDString}} {
    Tcl_UniChar *Tcl_GetUnicodeFromObj(Tcl_Obj *objPtr, int *lengthPtr)
}

# TIP#15 (math function introspection) dkf
declare 435 {deprecated {}} {
    int Tcl_GetMathFuncInfo(Tcl_Interp *interp, const char *name,
	    int *numArgsPtr, Tcl_ValueType **argTypesPtr,

Changes to generic/tcl.h.

2102
2103
2104
2105
2106
2107
2108
2109
2110
2111



2112
2113

2114
2115
2116
2117
2118

2119
2120
2121
2122
2123
2124
2125
2126

2127
2128
2129
2130


2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2102
2103
2104
2105
2106
2107
2108



2109
2110
2111
2112

2113

2114
2115
2116

2117
2118
2119
2120
2121
2122
2123
2124

2125
2126
2127


2128
2129




2130
2131
2132
2133
2134
2135
2136







-
-
-
+
+
+

-
+
-



-
+







-
+


-
-
+
+
-
-
-
-







#define TCL_CONVERT_MULTIBYTE	(-1)
#define TCL_CONVERT_SYNTAX	(-2)
#define TCL_CONVERT_UNKNOWN	(-3)
#define TCL_CONVERT_NOSPACE	(-4)

/*
 * The maximum number of bytes that are necessary to represent a single
 * Unicode character in UTF-8. The valid values are 4 and 6
 * (or perhaps 1 if we want to support a non-unicode enabled core). If 4,
 * then Tcl_UniChar must be 2-bytes in size (UCS-2) (the default). If 6,
 * Unicode character in UTF-8. The valid values are 3 and 4
 * (or perhaps 1 if we want to support a non-unicode enabled core). If 3,
 * then Tcl_UniChar must be 2-bytes in size (UCS-2) (the default). If > 3,
 * then Tcl_UniChar must be 4-bytes in size (UCS-4). At this time UCS-2 mode
 * is the default and recommended mode. UCS-4 is experimental and not
 * is the default and recommended mode.
 * recommended. It works for the core, but most extensions expect UCS-2.
 */

#ifndef TCL_UTF_MAX
#define TCL_UTF_MAX		4
#define TCL_UTF_MAX		3
#endif

/*
 * This represents a Unicode character. Any changes to this should also be
 * reflected in regcustom.h.
 */

#if TCL_UTF_MAX > 4
#if TCL_UTF_MAX > 3
    /*
     * int isn't 100% accurate as it should be a strict 4-byte value
     * (perhaps wchar_t). 64-bit systems may have troubles. The size of this
     * value must be reflected correctly in regcustom.h and
     * (perhaps wchar_t). ILP64/SILP64 systems may have troubles. The
     * size of this value must be reflected correctly in regcustom.h.
     * in tclEncoding.c.
     * XXX: Tcl is currently UCS-2 and planning UTF-16 for the Unicode
     * XXX: string rep that Tcl_UniChar represents.  Changing the size
     * XXX: of Tcl_UniChar is /not/ supported.
     */
typedef int Tcl_UniChar;
#else
typedef unsigned short Tcl_UniChar;
#endif

/*
2307
2308
2309
2310
2311
2312
2313
2314

2315
2316
2317

2318
2319
2320
2321
2322
2323
2324
2302
2303
2304
2305
2306
2307
2308

2309
2310
2311

2312
2313
2314
2315
2316
2317
2318
2319







-
+


-
+








typedef int (Tcl_NRPostProc) (ClientData data[], Tcl_Interp *interp,
				int result);

/*
 *----------------------------------------------------------------------------
 * The following constant is used to test for older versions of Tcl in the
 * stubs tables. If TCL_UTF_MAX>4 use a different value.
 * stubs tables.
 */

#define TCL_STUB_MAGIC		((int) 0xFCA3BACF + (TCL_UTF_MAX>4))
#define TCL_STUB_MAGIC		((int) 0xFCA3BACF)

/*
 * The following function is required to be defined in all stubs aware
 * extensions. The function is actually implemented in the stub library, not
 * the main Tcl library, although there is a trivial implementation in the
 * main library in case an extension is statically linked into an application.
 */

Changes to generic/tclBinary.c.

1359
1360
1361
1362
1363
1364
1365
1366

1367
1368
1369
1370
1371
1372
1373
1359
1360
1361
1362
1363
1364
1365

1366
1367
1368
1369
1370
1371
1372
1373







-
+







 badIndex:
    errorString = "not enough arguments for all format specifiers";
    goto error;

 badField:
    {
	Tcl_UniChar ch = 0;
	char buf[TCL_UTF_MAX + 1] = "";
	char buf[5] = "";

	TclUtfToUniChar(errorString, &ch);
	buf[Tcl_UniCharToUtf(ch, buf)] = '\0';
	Tcl_SetObjResult(interp, Tcl_ObjPrintf(
		"bad field specifier \"%s\"", buf));
	return TCL_ERROR;
    }
1729
1730
1731
1732
1733
1734
1735
1736

1737
1738
1739
1740
1741
1742
1743
1729
1730
1731
1732
1733
1734
1735

1736
1737
1738
1739
1740
1741
1742
1743







-
+







 badIndex:
    errorString = "not enough arguments for all format specifiers";
    goto error;

 badField:
    {
	Tcl_UniChar ch = 0;
	char buf[TCL_UTF_MAX + 1] = "";
	char buf[5] = "";

	TclUtfToUniChar(errorString, &ch);
	buf[Tcl_UniCharToUtf(ch, buf)] = '\0';
	Tcl_SetObjResult(interp, Tcl_ObjPrintf(
		"bad field specifier \"%s\"", buf));
	return TCL_ERROR;
    }

Changes to generic/tclCmdMZ.c.

1217
1218
1219
1220
1221
1222
1223
1224

1225
1226
1227
1228
1229
1230
1231
1217
1218
1219
1220
1221
1222
1223

1224
1225
1226
1227
1228
1229
1230
1231







-
+







	Tcl_InitHashTable(&charReuseTable, TCL_ONE_WORD_KEYS);

	for ( ; stringPtr < end; stringPtr += len) {
	    int fullchar;
	    len = TclUtfToUniChar(stringPtr, &ch);
	    fullchar = ch;

#if TCL_UTF_MAX <= 4
#if TCL_UTF_MAX <= 3
	    if ((ch >= 0xD800) && (len < 3)) {
		len += TclUtfToUniChar(stringPtr + len, &ch);
		fullchar = (((fullchar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000;
	    }
#endif

	    /*
1907
1908
1909
1910
1911
1912
1913
1914

1915
1916
1917
1918
1919
1920
1921
1907
1908
1909
1910
1911
1912
1913

1914
1915
1916
1917
1918
1919
1920
1921







-
+







	    goto str_is_done;
	}
	end = string1 + length1;
	for (; string1 < end; string1 += length2, failat++) {
	    int fullchar;
	    length2 = TclUtfToUniChar(string1, &ch);
	    fullchar = ch;
#if TCL_UTF_MAX <= 4
#if TCL_UTF_MAX <= 3
	    if ((ch >= 0xD800) && (length2 < 3)) {
	    	length2 += TclUtfToUniChar(string1 + length2, &ch);
	    	fullchar = (((fullchar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000;
	    }
#endif
	    if (!chcomp(fullchar)) {
		result = 0;

Changes to generic/tclDecls.h.

1059
1060
1061
1062
1063
1064
1065

1066

1067

1068

1069
1070
1071
1072
1073
1074
1075
1059
1060
1061
1062
1063
1064
1065
1066

1067
1068
1069

1070
1071
1072
1073
1074
1075
1076
1077







+
-
+

+
-
+







/* 349 */
EXTERN int		Tcl_UniCharIsSpace(int ch);
/* 350 */
EXTERN int		Tcl_UniCharIsUpper(int ch);
/* 351 */
EXTERN int		Tcl_UniCharIsWordChar(int ch);
/* 352 */
TCL_DEPRECATED("Use Tcl_GetCharLength")
EXTERN int		Tcl_UniCharLen(const Tcl_UniChar *uniStr);
int			Tcl_UniCharLen(const Tcl_UniChar *uniStr);
/* 353 */
TCL_DEPRECATED("Use Tcl_UtfNcmp")
EXTERN int		Tcl_UniCharNcmp(const Tcl_UniChar *ucs,
int			Tcl_UniCharNcmp(const Tcl_UniChar *ucs,
				const Tcl_UniChar *uct,
				unsigned long numChars);
/* 354 */
EXTERN char *		Tcl_Char16ToUtfDString(const unsigned short *uniStr,
				int uniLength, Tcl_DString *dsPtr);
/* 355 */
EXTERN unsigned short *	 Tcl_UtfToChar16DString(const char *src, int length,
1137
1138
1139
1140
1141
1142
1143

1144

1145
1146

1147

1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158

1159

1160
1161
1162
1163
1164
1165
1166
1139
1140
1141
1142
1143
1144
1145
1146

1147
1148
1149
1150

1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163

1164
1165
1166
1167
1168
1169
1170
1171







+
-
+


+
-
+











+
-
+







EXTERN int		Tcl_RegExpExecObj(Tcl_Interp *interp,
				Tcl_RegExp regexp, Tcl_Obj *textObj,
				int offset, int nmatches, int flags);
/* 377 */
EXTERN void		Tcl_RegExpGetInfo(Tcl_RegExp regexp,
				Tcl_RegExpInfo *infoPtr);
/* 378 */
TCL_DEPRECATED("Use Tcl_UniCharToUtfDString")
EXTERN Tcl_Obj *	Tcl_NewUnicodeObj(const Tcl_UniChar *unicode,
Tcl_Obj *		Tcl_NewUnicodeObj(const Tcl_UniChar *unicode,
				int numChars);
/* 379 */
TCL_DEPRECATED("Use Tcl_UniCharToUtfDString")
EXTERN void		Tcl_SetUnicodeObj(Tcl_Obj *objPtr,
void			Tcl_SetUnicodeObj(Tcl_Obj *objPtr,
				const Tcl_UniChar *unicode, int numChars);
/* 380 */
EXTERN int		Tcl_GetCharLength(Tcl_Obj *objPtr);
/* 381 */
EXTERN int		Tcl_GetUniChar(Tcl_Obj *objPtr, int index);
/* 382 */
TCL_DEPRECATED("No longer in use, changed to macro")
Tcl_UniChar *		Tcl_GetUnicode(Tcl_Obj *objPtr);
/* 383 */
EXTERN Tcl_Obj *	Tcl_GetRange(Tcl_Obj *objPtr, int first, int last);
/* 384 */
TCL_DEPRECATED("Use Tcl_AppendStringsToObj")
EXTERN void		Tcl_AppendUnicodeToObj(Tcl_Obj *objPtr,
void			Tcl_AppendUnicodeToObj(Tcl_Obj *objPtr,
				const Tcl_UniChar *unicode, int length);
/* 385 */
EXTERN int		Tcl_RegExpMatchObj(Tcl_Interp *interp,
				Tcl_Obj *textObj, Tcl_Obj *patternObj);
/* 386 */
EXTERN void		Tcl_SetNotifier(Tcl_NotifierProcs *notifierProcPtr);
/* 387 */
1246
1247
1248
1249
1250
1251
1252

1253

1254
1255
1256

1257

1258
1259
1260
1261
1262
1263
1264
1251
1252
1253
1254
1255
1256
1257
1258

1259
1260
1261
1262
1263

1264
1265
1266
1267
1268
1269
1270
1271







+
-
+



+
-
+







/* 416 */
EXTERN void		Tcl_SpliceChannel(Tcl_Channel channel);
/* 417 */
EXTERN void		Tcl_ClearChannelHandlers(Tcl_Channel channel);
/* 418 */
EXTERN int		Tcl_IsChannelExisting(const char *channelName);
/* 419 */
TCL_DEPRECATED("Use Tcl_UtfNcasecmp")
EXTERN int		Tcl_UniCharNcasecmp(const Tcl_UniChar *ucs,
int			Tcl_UniCharNcasecmp(const Tcl_UniChar *ucs,
				const Tcl_UniChar *uct,
				unsigned long numChars);
/* 420 */
TCL_DEPRECATED("Use Tcl_StringCaseMatch")
EXTERN int		Tcl_UniCharCaseMatch(const Tcl_UniChar *uniStr,
int			Tcl_UniCharCaseMatch(const Tcl_UniChar *uniStr,
				const Tcl_UniChar *uniPattern, int nocase);
/* 421 */
EXTERN Tcl_HashEntry *	Tcl_FindHashEntry(Tcl_HashTable *tablePtr,
				const void *key);
/* 422 */
EXTERN Tcl_HashEntry *	Tcl_CreateHashEntry(Tcl_HashTable *tablePtr,
				const void *key, int *newPtr);
1293
1294
1295
1296
1297
1298
1299

1300

1301
1302
1303
1304
1305
1306
1307
1300
1301
1302
1303
1304
1305
1306
1307

1308
1309
1310
1311
1312
1313
1314
1315







+
-
+







EXTERN char *		Tcl_AttemptDbCkrealloc(char *ptr, unsigned int size,
				const char *file, int line);
/* 432 */
EXTERN int		Tcl_AttemptSetObjLength(Tcl_Obj *objPtr, int length);
/* 433 */
EXTERN Tcl_ThreadId	Tcl_GetChannelThread(Tcl_Channel channel);
/* 434 */
TCL_DEPRECATED("Use Tcl_UtfToUniCharDString")
EXTERN Tcl_UniChar *	Tcl_GetUnicodeFromObj(Tcl_Obj *objPtr,
Tcl_UniChar *		Tcl_GetUnicodeFromObj(Tcl_Obj *objPtr,
				int *lengthPtr);
/* 435 */
TCL_DEPRECATED("")
int			Tcl_GetMathFuncInfo(Tcl_Interp *interp,
				const char *name, int *numArgsPtr,
				Tcl_ValueType **argTypesPtr,
				Tcl_MathProc **procPtr,
2296
2297
2298
2299
2300
2301
2302
2303
2304


2305
2306
2307
2308
2309
2310
2311
2304
2305
2306
2307
2308
2309
2310


2311
2312
2313
2314
2315
2316
2317
2318
2319







-
-
+
+







    int (*tcl_UniCharIsAlnum) (int ch); /* 345 */
    int (*tcl_UniCharIsAlpha) (int ch); /* 346 */
    int (*tcl_UniCharIsDigit) (int ch); /* 347 */
    int (*tcl_UniCharIsLower) (int ch); /* 348 */
    int (*tcl_UniCharIsSpace) (int ch); /* 349 */
    int (*tcl_UniCharIsUpper) (int ch); /* 350 */
    int (*tcl_UniCharIsWordChar) (int ch); /* 351 */
    int (*tcl_UniCharLen) (const Tcl_UniChar *uniStr); /* 352 */
    int (*tcl_UniCharNcmp) (const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsigned long numChars); /* 353 */
    TCL_DEPRECATED_API("Use Tcl_GetCharLength") int (*tcl_UniCharLen) (const Tcl_UniChar *uniStr); /* 352 */
    TCL_DEPRECATED_API("Use Tcl_UtfNcmp") int (*tcl_UniCharNcmp) (const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsigned long numChars); /* 353 */
    char * (*tcl_Char16ToUtfDString) (const unsigned short *uniStr, int uniLength, Tcl_DString *dsPtr); /* 354 */
    unsigned short * (*tcl_UtfToChar16DString) (const char *src, int length, Tcl_DString *dsPtr); /* 355 */
    Tcl_RegExp (*tcl_GetRegExpFromObj) (Tcl_Interp *interp, Tcl_Obj *patObj, int flags); /* 356 */
    TCL_DEPRECATED_API("Use Tcl_EvalTokensStandard") Tcl_Obj * (*tcl_EvalTokens) (Tcl_Interp *interp, Tcl_Token *tokenPtr, int count); /* 357 */
    void (*tcl_FreeParse) (Tcl_Parse *parsePtr); /* 358 */
    void (*tcl_LogCommandInfo) (Tcl_Interp *interp, const char *script, const char *command, int length); /* 359 */
    int (*tcl_ParseBraces) (Tcl_Interp *interp, const char *start, int numBytes, Tcl_Parse *parsePtr, int append, const char **termPtr); /* 360 */
2322
2323
2324
2325
2326
2327
2328
2329
2330


2331
2332
2333
2334
2335

2336
2337
2338
2339
2340
2341
2342
2330
2331
2332
2333
2334
2335
2336


2337
2338
2339
2340
2341
2342

2343
2344
2345
2346
2347
2348
2349
2350







-
-
+
+




-
+







    int (*tcl_StringCaseMatch) (const char *str, const char *pattern, int nocase); /* 371 */
    int (*tcl_UniCharIsControl) (int ch); /* 372 */
    int (*tcl_UniCharIsGraph) (int ch); /* 373 */
    int (*tcl_UniCharIsPrint) (int ch); /* 374 */
    int (*tcl_UniCharIsPunct) (int ch); /* 375 */
    int (*tcl_RegExpExecObj) (Tcl_Interp *interp, Tcl_RegExp regexp, Tcl_Obj *textObj, int offset, int nmatches, int flags); /* 376 */
    void (*tcl_RegExpGetInfo) (Tcl_RegExp regexp, Tcl_RegExpInfo *infoPtr); /* 377 */
    Tcl_Obj * (*tcl_NewUnicodeObj) (const Tcl_UniChar *unicode, int numChars); /* 378 */
    void (*tcl_SetUnicodeObj) (Tcl_Obj *objPtr, const Tcl_UniChar *unicode, int numChars); /* 379 */
    TCL_DEPRECATED_API("Use Tcl_UniCharToUtfDString") Tcl_Obj * (*tcl_NewUnicodeObj) (const Tcl_UniChar *unicode, int numChars); /* 378 */
    TCL_DEPRECATED_API("Use Tcl_UniCharToUtfDString") void (*tcl_SetUnicodeObj) (Tcl_Obj *objPtr, const Tcl_UniChar *unicode, int numChars); /* 379 */
    int (*tcl_GetCharLength) (Tcl_Obj *objPtr); /* 380 */
    int (*tcl_GetUniChar) (Tcl_Obj *objPtr, int index); /* 381 */
    TCL_DEPRECATED_API("No longer in use, changed to macro") Tcl_UniChar * (*tcl_GetUnicode) (Tcl_Obj *objPtr); /* 382 */
    Tcl_Obj * (*tcl_GetRange) (Tcl_Obj *objPtr, int first, int last); /* 383 */
    void (*tcl_AppendUnicodeToObj) (Tcl_Obj *objPtr, const Tcl_UniChar *unicode, int length); /* 384 */
    TCL_DEPRECATED_API("Use Tcl_AppendStringsToObj") void (*tcl_AppendUnicodeToObj) (Tcl_Obj *objPtr, const Tcl_UniChar *unicode, int length); /* 384 */
    int (*tcl_RegExpMatchObj) (Tcl_Interp *interp, Tcl_Obj *textObj, Tcl_Obj *patternObj); /* 385 */
    void (*tcl_SetNotifier) (Tcl_NotifierProcs *notifierProcPtr); /* 386 */
    Tcl_Mutex * (*tcl_GetAllocMutex) (void); /* 387 */
    int (*tcl_GetChannelNames) (Tcl_Interp *interp); /* 388 */
    int (*tcl_GetChannelNamesEx) (Tcl_Interp *interp, const char *pattern); /* 389 */
    int (*tcl_ProcObjCmd) (ClientData clientData, Tcl_Interp *interp, int objc, Tcl_Obj *const objv[]); /* 390 */
    void (*tcl_ConditionFinalize) (Tcl_Condition *condPtr); /* 391 */
2363
2364
2365
2366
2367
2368
2369
2370
2371


2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385

2386
2387
2388
2389
2390
2391
2392
2371
2372
2373
2374
2375
2376
2377


2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392

2393
2394
2395
2396
2397
2398
2399
2400







-
-
+
+













-
+







    int (*tcl_JoinThread) (Tcl_ThreadId threadId, int *result); /* 412 */
    int (*tcl_IsChannelShared) (Tcl_Channel channel); /* 413 */
    int (*tcl_IsChannelRegistered) (Tcl_Interp *interp, Tcl_Channel channel); /* 414 */
    void (*tcl_CutChannel) (Tcl_Channel channel); /* 415 */
    void (*tcl_SpliceChannel) (Tcl_Channel channel); /* 416 */
    void (*tcl_ClearChannelHandlers) (Tcl_Channel channel); /* 417 */
    int (*tcl_IsChannelExisting) (const char *channelName); /* 418 */
    int (*tcl_UniCharNcasecmp) (const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsigned long numChars); /* 419 */
    int (*tcl_UniCharCaseMatch) (const Tcl_UniChar *uniStr, const Tcl_UniChar *uniPattern, int nocase); /* 420 */
    TCL_DEPRECATED_API("Use Tcl_UtfNcasecmp") int (*tcl_UniCharNcasecmp) (const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsigned long numChars); /* 419 */
    TCL_DEPRECATED_API("Use Tcl_StringCaseMatch") int (*tcl_UniCharCaseMatch) (const Tcl_UniChar *uniStr, const Tcl_UniChar *uniPattern, int nocase); /* 420 */
    Tcl_HashEntry * (*tcl_FindHashEntry) (Tcl_HashTable *tablePtr, const void *key); /* 421 */
    Tcl_HashEntry * (*tcl_CreateHashEntry) (Tcl_HashTable *tablePtr, const void *key, int *newPtr); /* 422 */
    void (*tcl_InitCustomHashTable) (Tcl_HashTable *tablePtr, int keyType, const Tcl_HashKeyType *typePtr); /* 423 */
    void (*tcl_InitObjHashTable) (Tcl_HashTable *tablePtr); /* 424 */
    ClientData (*tcl_CommandTraceInfo) (Tcl_Interp *interp, const char *varName, int flags, Tcl_CommandTraceProc *procPtr, ClientData prevClientData); /* 425 */
    int (*tcl_TraceCommand) (Tcl_Interp *interp, const char *varName, int flags, Tcl_CommandTraceProc *proc, ClientData clientData); /* 426 */
    void (*tcl_UntraceCommand) (Tcl_Interp *interp, const char *varName, int flags, Tcl_CommandTraceProc *proc, ClientData clientData); /* 427 */
    char * (*tcl_AttemptAlloc) (unsigned int size); /* 428 */
    char * (*tcl_AttemptDbCkalloc) (unsigned int size, const char *file, int line); /* 429 */
    char * (*tcl_AttemptRealloc) (char *ptr, unsigned int size); /* 430 */
    char * (*tcl_AttemptDbCkrealloc) (char *ptr, unsigned int size, const char *file, int line); /* 431 */
    int (*tcl_AttemptSetObjLength) (Tcl_Obj *objPtr, int length); /* 432 */
    Tcl_ThreadId (*tcl_GetChannelThread) (Tcl_Channel channel); /* 433 */
    Tcl_UniChar * (*tcl_GetUnicodeFromObj) (Tcl_Obj *objPtr, int *lengthPtr); /* 434 */
    TCL_DEPRECATED_API("Use Tcl_UtfToUniCharDString") Tcl_UniChar * (*tcl_GetUnicodeFromObj) (Tcl_Obj *objPtr, int *lengthPtr); /* 434 */
    TCL_DEPRECATED_API("") int (*tcl_GetMathFuncInfo) (Tcl_Interp *interp, const char *name, int *numArgsPtr, Tcl_ValueType **argTypesPtr, Tcl_MathProc **procPtr, ClientData *clientDataPtr); /* 435 */
    TCL_DEPRECATED_API("") Tcl_Obj * (*tcl_ListMathFuncs) (Tcl_Interp *interp, const char *pattern); /* 436 */
    Tcl_Obj * (*tcl_SubstObj) (Tcl_Interp *interp, Tcl_Obj *objPtr, int flags); /* 437 */
    int (*tcl_DetachChannel) (Tcl_Interp *interp, Tcl_Channel channel); /* 438 */
    int (*tcl_IsStandardChannel) (Tcl_Channel channel); /* 439 */
    int (*tcl_FSCopyFile) (Tcl_Obj *srcPathPtr, Tcl_Obj *destPathPtr); /* 440 */
    int (*tcl_FSCopyDirectory) (Tcl_Obj *srcPathPtr, Tcl_Obj *destPathPtr, Tcl_Obj **errorPtr); /* 441 */
4105
4106
4107
4108
4109
4110
4111
4112

4113
4114
4115
4116
4117
4118
4119
4113
4114
4115
4116
4117
4118
4119

4120
4121
4122
4123
4124
4125
4126
4127







-
+







#undef Tcl_GetUnicode
#define Tcl_GetUnicode(objPtr)	Tcl_GetUnicodeFromObj((objPtr), NULL)
#undef Tcl_BackgroundError
#define Tcl_BackgroundError(interp)	Tcl_BackgroundException((interp), TCL_ERROR)
#undef Tcl_StringMatch
#define Tcl_StringMatch(str, pattern) Tcl_StringCaseMatch((str), (pattern), 0)

#if TCL_UTF_MAX <= 4
#if TCL_UTF_MAX <= 3
#   undef Tcl_UniCharToUtfDString
#   define Tcl_UniCharToUtfDString Tcl_Char16ToUtfDString
#   undef Tcl_UtfToUniCharDString
#   define Tcl_UtfToUniCharDString Tcl_UtfToChar16DString
#   undef Tcl_UtfToUniChar
#   define Tcl_UtfToUniChar Tcl_UtfToChar16
#endif

Changes to generic/tclDisassemble.c.

899
900
901
902
903
904
905
906

907
908
909
910
911
912
913
899
900
901
902
903
904
905

906
907
908
909
910
911
912
913







-
+







	    i += 2;
	    continue;
	case '\v':
	    Tcl_AppendToObj(appendObj, "\\v", -1);
	    i += 2;
	    continue;
	default:
#if TCL_UTF_MAX > 4
#if TCL_UTF_MAX > 3
	    if (ch > 0xffff) {
		Tcl_AppendPrintfToObj(appendObj, "\\U%08x", ch);
		i += 10;
	    } else
#else
	    /* If len == 0, this means we have a char > 0xffff, resulting in
	     * TclUtfToUniChar producing a surrogate pair. We want to output

Changes to generic/tclEncoding.c.

2611
2612
2613
2614
2615
2616
2617
2618

2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633

2634
2635
2636
2637
2638
2639
2640
2611
2612
2613
2614
2615
2616
2617

2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632

2633
2634
2635
2636
2637
2638
2639
2640







-
+














-
+








	/*
	 * Need to handle this in a way that won't cause misalignment by
	 * casting dst to a Tcl_UniChar. [Bug 1122671]
	 */

	if (clientData) {
#if TCL_UTF_MAX > 4
#if TCL_UTF_MAX > 3
	    if (*chPtr <= 0xFFFF) {
		*dst++ = (*chPtr & 0xFF);
		*dst++ = (*chPtr >> 8);
	    } else {
		*dst++ = (((*chPtr - 0x10000) >> 10) & 0xFF);
		*dst++ = (((*chPtr - 0x10000) >> 18) & 0x3) | 0xD8;
		*dst++ = (*chPtr & 0xFF);
		*dst++ = ((*chPtr & 0x3) >> 8) | 0xDC;
	    }
#else
	    *dst++ = (*chPtr & 0xFF);
	    *dst++ = (*chPtr >> 8);
#endif
	} else {
#if TCL_UTF_MAX > 4
#if TCL_UTF_MAX > 3
	    if (*chPtr <= 0xFFFF) {
		*dst++ = (*chPtr >> 8);
		*dst++ = (*chPtr & 0xFF);
	    } else {
		*dst++ = ((*chPtr & 0x3) >> 8) | 0xDC;
		*dst++ = (*chPtr & 0xFF);
		*dst++ = (((*chPtr - 0x10000) >> 18) & 0x3) | 0xD8;
2693
2694
2695
2696
2697
2698
2699
2700

2701
2702
2703
2704
2705
2706
2707
2693
2694
2695
2696
2697
2698
2699

2700
2701
2702
2703
2704
2705
2706
2707







-
+







				 * the conversion. */
    int *dstCharsPtr)		/* Filled with the number of characters that
				 * correspond to the bytes stored in the
				 * output buffer. */
{
    const char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd;
    int result, numChars;
#if TCL_UTF_MAX <= 4
#if TCL_UTF_MAX <= 3
    int len;
#endif
    Tcl_UniChar ch = 0;

    srcStart = src;
    srcEnd = src + srcLen;
    srcClose = srcEnd;
2723
2724
2725
2726
2727
2728
2729
2730

2731
2732
2733
2734
2735
2736
2737
2723
2724
2725
2726
2727
2728
2729

2730
2731
2732
2733
2734
2735
2736
2737







-
+







	    result = TCL_CONVERT_MULTIBYTE;
	    break;
	}
	if (dst > dstEnd) {
	    result = TCL_CONVERT_NOSPACE;
	    break;
	}
#if TCL_UTF_MAX <= 4
#if TCL_UTF_MAX <= 3
	src += (len = TclUtfToUniChar(src, &ch));
	if ((ch >= 0xD800) && (len < 3)) {
	    src += TclUtfToUniChar(src, &ch);
	    ch = 0xFFFD;
	}
#else
	src += TclUtfToUniChar(src, &ch);
2947
2948
2949
2950
2951
2952
2953
2954

2955
2956
2957
2958
2959
2960
2961
2947
2948
2949
2950
2951
2952
2953

2954
2955
2956
2957
2958
2959
2960
2961







-
+







	     */

	    result = TCL_CONVERT_MULTIBYTE;
	    break;
	}
	len = TclUtfToUniChar(src, &ch);

#if TCL_UTF_MAX > 4
#if TCL_UTF_MAX > 3
	/*
	 * This prevents a crash condition. More evaluation is required for
	 * full support of int Tcl_UniChar. [Bug 1004065]
	 */

	if (ch & 0xffff0000) {
	    word = 0;
3158
3159
3160
3161
3162
3163
3164
3165

3166
3167
3168
3169
3170
3171
3172
3173

3174
3175
3176
3177
3178
3179
3180
3158
3159
3160
3161
3162
3163
3164

3165
3166
3167
3168
3169
3170
3171
3172

3173
3174
3175
3176
3177
3178
3179
3180







-
+







-
+







	len = TclUtfToUniChar(src, &ch);

	/*
	 * Check for illegal characters.
	 */

	if (ch > 0xff
#if TCL_UTF_MAX <= 4
#if TCL_UTF_MAX <= 3
		|| ((ch >= 0xD800) && (len < 3))
#endif
		) {
	    if (flags & TCL_ENCODING_STOPONERROR) {
		result = TCL_CONVERT_UNKNOWN;
		break;
	    }
#if TCL_UTF_MAX <= 4
#if TCL_UTF_MAX <= 3
	    if ((ch >= 0xD800) && (len < 3)) len = 4;
#endif
	    /*
	     * Plunge on, using '?' as a fallback character.
	     */

	    ch = (Tcl_UniChar) '?';

Changes to generic/tclIO.c.

4715
4716
4717
4718
4719
4720
4721
4722

4723
4724
4725
4726
4727
4728
4729
4715
4716
4717
4718
4719
4720
4721

4722
4723
4724
4725
4726
4727
4728
4729







-
+







		    int rawRead;
		    char tmp[TCL_UTF_MAX];

		    bufPtr = gs.bufPtr;
		    Tcl_ExternalToUtf(NULL, gs.encoding, RemovePoint(bufPtr),
			    gs.rawRead, statePtr->inputEncodingFlags
				| TCL_ENCODING_NO_TERMINATE, &gs.state, tmp,
			    TCL_UTF_MAX, &rawRead, NULL, NULL);
			    sizeof(tmp), &rawRead, NULL, NULL);
		    bufPtr->nextRemoved += rawRead;
		    gs.rawRead -= rawRead;
		    gs.bytesWrote--;
		    gs.charsWrote--;
		    memmove(dst, dst + 1, (size_t) (dstEnd - dst));
		    dstEnd--;
		}
6278
6279
6280
6281
6282
6283
6284
6285

6286
6287
6288
6289
6290
6291
6292
6278
6279
6280
6281
6282
6283
6284

6285
6286
6287
6288
6289
6290
6291
6292







-
+








		assert(bufPtr->nextPtr == NULL
			|| BytesLeft(bufPtr->nextPtr) == 0 || 0 ==
			(statePtr->inputEncodingFlags & TCL_ENCODING_END));

		Tcl_ExternalToUtf(NULL, encoding, src, srcLen,
		(statePtr->inputEncodingFlags | TCL_ENCODING_NO_TERMINATE),
		&statePtr->inputEncodingState, buffer, TCL_UTF_MAX + 1,
		&statePtr->inputEncodingState, buffer, sizeof(buffer),
		&read, &decoded, &count);

		if (count == 2) {
		    if (buffer[1] == '\n') {
			/* \r\n translate to \n */
			dst[0] = '\n';
			bufPtr->nextRemoved += read;

Changes to generic/tclInt.h.

4611
4612
4613
4614
4615
4616
4617
4618

4619
4620
4621
4622
4623
4624
4625
4611
4612
4613
4614
4615
4616
4617

4618
4619
4620
4621
4622
4623
4624
4625







-
+







 * string handling. The macro's expression result is 1 for the 1-byte case or
 * the result of Tcl_UtfToUniChar. The ANSI C "prototype" for this macro is:
 *
 * MODULE_SCOPE int	TclUtfToUniChar(const char *string, Tcl_UniChar *ch);
 *----------------------------------------------------------------
 */

#if TCL_UTF_MAX > 4
#if TCL_UTF_MAX > 3
#define TclUtfToUniChar(str, chPtr) \
	((((unsigned char) *(str)) < 0x80) ?		\
	    ((*(chPtr) = (unsigned char) *(str)), 1)	\
	    : Tcl_UtfToUniChar(str, chPtr))
#else
#define TclUtfToUniChar(str, chPtr) \
	((((unsigned char) *(str)) < 0x80) ?		\

Changes to generic/tclParse.c.

932
933
934
935
936
937
938
939

940
941
942
943
944
945
946
932
933
934
935
936
937
938

939
940
941
942
943
944
945
946







-
+







	 * special, we shouldn't break up a correct utf-8 character. [Bug
	 * #217987] test subst-3.2
	 */

	if (Tcl_UtfCharComplete(p, numBytes - 1)) {
	    count = TclUtfToUniChar(p, &unichar) + 1;	/* +1 for '\' */
	} else {
	    char utfBytes[TCL_UTF_MAX];
	    char utfBytes[4];

	    memcpy(utfBytes, p, numBytes - 1);
	    utfBytes[numBytes - 1] = '\0';
	    count = TclUtfToUniChar(utfBytes, &unichar) + 1;
	}
	result = unichar;
	break;

Changes to generic/tclScan.c.

261
262
263
264
265
266
267
268

269
270
271
272
273
274
275
261
262
263
264
265
266
267

268
269
270
271
272
273
274
275







-
+







    Tcl_UniChar ch = 0;
    int objIndex, xpgSize, nspace = numVars;
    int *nassign = TclStackAlloc(interp, nspace * sizeof(int));
    Tcl_Obj *errorMsg;		/* Place to build an error messages. Note that
				 * these are messy operations because we do
				 * not want to use the formatting engine;
				 * we're inside there! */
    char buf[TCL_UTF_MAX + 1] = "";
    char buf[5] = "";

    /*
     * Initialize an array that records the number of times a variable is
     * assigned to by the format string. We use this to detect if a variable
     * is multiply assigned or left unassigned.
     */

873
874
875
876
877
878
879
880

881
882
883
884
885
886
887
873
874
875
876
877
878
879

880
881
882
883
884
885
886
887







-
+







	case 'c':
	    /*
	     * Scan a single Unicode character.
	     */

	    offset = TclUtfToUniChar(string, &sch);
	    i = (int)sch;
#if TCL_UTF_MAX <= 4
#if TCL_UTF_MAX <= 3
	    if ((sch >= 0xD800) && (offset < 3)) {
		offset += TclUtfToUniChar(string+offset, &sch);
		i = (((i<<10) & 0x0FFC00) + 0x10000) + (sch & 0x3FF);
	    }
#endif
	    string += offset;
	    if (!(flags & SCAN_SUPPRESS)) {

Changes to generic/tclStringObj.c.

564
565
566
567
568
569
570
571

572
573
574
575
576
577
578
564
565
566
567
568
569
570

571
572
573
574
575
576
577
578







-
+







	stringPtr = GET_STRING(objPtr);
    }

    if (index >= stringPtr->numChars) {
	return -1;
    }
    ch = stringPtr->unicode[index];
#if TCL_UTF_MAX <= 4
#if TCL_UTF_MAX <= 3
    /* See: bug [11ae2be95dac9417] */
    if ((ch & 0xF800) == 0xD800) {
	if (ch & 0x400) {
	    if ((index > 0)
		    && ((stringPtr->unicode[index-1] & 0xFC00) == 0xD800)) {
		ch = -1; /* low surrogate preceded by high surrogate */
	    }
748
749
750
751
752
753
754
755

756
757
758
759
760
761
762
748
749
750
751
752
753
754

755
756
757
758
759
760
761
762







-
+







    }
    if (last > stringPtr->numChars) {
	last = stringPtr->numChars;
    }
    if (last < first) {
	return Tcl_NewObj();
    }
#if TCL_UTF_MAX <= 4
#if TCL_UTF_MAX <= 3
    /* See: bug [11ae2be95dac9417] */
    if ((first > 0) && ((stringPtr->unicode[first] & 0xFC00) == 0xDC00)
	    && ((stringPtr->unicode[first-1] & 0xFC00) == 0xD800)) {
	++first;
    }
    if ((last + 1 < stringPtr->numChars)
	    && ((stringPtr->unicode[last+1] & 0xFC00) == 0xDC00)
2609
2610
2611
2612
2613
2614
2615
2616

2617
2618
2619
2620
2621
2622
2623
2609
2610
2611
2612
2613
2614
2615

2616
2617
2618
2619
2620
2621
2622
2623







-
+







		 */

		q = Tcl_UtfPrev(end, bytes);
		if (!Tcl_UtfCharComplete(q, (int)(end - q))) {
		    end = q;
		}

		q = bytes + TCL_UTF_MAX;
		q = bytes + 4;
		while ((bytes < end) && (bytes < q)
			&& ((*bytes & 0xC0) == 0x80)) {
		    bytes++;
		}

		Tcl_ListObjAppendElement(NULL, list,
			Tcl_NewStringObj(bytes , (int)(end - bytes)));

Changes to generic/tclStubInit.c.

36
37
38
39
40
41
42








43
44
45
46
47
48
49
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57







+
+
+
+
+
+
+
+







#undef Tcl_NewIntObj
#undef Tcl_NewListObj
#undef Tcl_NewLongObj
#undef Tcl_DbNewLongObj
#undef Tcl_NewObj
#undef Tcl_NewStringObj
#undef Tcl_GetUnicode
#undef Tcl_GetUnicodeFromObj
#undef Tcl_AppendUnicodeToObj
#undef Tcl_NewUnicodeObj
#undef Tcl_SetUnicodeObj
#undef Tcl_UniCharNcasecmp
#undef Tcl_UniCharCaseMatch
#undef Tcl_UniCharLen
#undef Tcl_UniCharNcmp
#undef Tcl_DumpActiveMemory
#undef Tcl_ValidateAllMemory
#undef Tcl_FindHashEntry
#undef Tcl_CreateHashEntry
#undef Tcl_Panic
#undef Tcl_FindExecutable
#undef Tcl_SetExitProc
59
60
61
62
63
64
65















66
67
68
69
70
71
72
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







#undef TclWinNToHS
#undef TclStaticPackage
#undef Tcl_BackgroundError
#define TclStaticPackage Tcl_StaticPackage
#undef Tcl_UniCharToUtfDString
#undef Tcl_UtfToUniCharDString
#undef Tcl_UtfToUniChar

#if TCL_UTF_MAX > 3
static void uniCodePanic() {
    Tcl_Panic("This extension uses a deprecated function, not available now: Tcl is compiled with -DTCL_UTF_MAX==%d", TCL_UTF_MAX);
}
#   define Tcl_GetUnicode (int *(*)(Tcl_Obj *)) uniCodePanic
#   define Tcl_GetUnicodeFromObj (int *(*)(Tcl_Obj *, Tcl_UniChar *)) uniCodePanic
#   define Tcl_NewUnicodeObj (Tcl_Obj *(*)(const int *, Tcl_UniChar)) uniCodePanic
#   define Tcl_SetUnicodeObj (void(*)(Tcl_Obj *, const Tcl_UniChar *, int)) uniCodePanic
#   define Tcl_AppendUnicodeToObj (void(*)(Tcl_Obj *, const Tcl_UniChar *, int)) uniCodePanic
#   define Tcl_UniCharNcasecmp (int(*)(const Tcl_UniChar *, const Tcl_UniChar *, unsigned long)) uniCodePanic
#   define Tcl_UniCharCaseMatch (int(*)(const Tcl_UniChar *, const Tcl_UniChar *, int)) uniCodePanic
#   define Tcl_UniCharLen (int(*)(const Tcl_UniChar *)) uniCodePanic
#   define Tcl_UniCharNcmp (int(*)(const Tcl_UniChar *, const Tcl_UniChar *, unsigned long)) uniCodePanic
#endif

#define TclBN_mp_add mp_add
#define TclBN_mp_and mp_and
#define TclBN_mp_clamp mp_clamp
#define TclBN_mp_clear mp_clear
#define TclBN_mp_clear_multi mp_clear_multi
#define TclBN_mp_cmp mp_cmp
476
477
478
479
480
481
482

483
484
485
486





487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523




524
525
526
527
528
529
530







+




+
+
+
+
+








-
-
-
-







		    "integer value too large to represent as non-long integer", -1));
	    result = TCL_ERROR;
	}
    }
    return result;
}
#define Tcl_ExprLongObj (int(*)(Tcl_Interp*,Tcl_Obj*,long*))exprIntObj
#if TCL_UTF_MAX < 4
static int uniCharNcmp(const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsigned int n){
   return Tcl_UniCharNcmp(ucs, uct, (unsigned long)n);
}
#define Tcl_UniCharNcmp (int(*)(const Tcl_UniChar*,const Tcl_UniChar*,unsigned long))(void *)uniCharNcmp
static int uniCharNcasecmp(const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsigned int n){
   return Tcl_UniCharNcasecmp(ucs, uct, (unsigned long)n);
}
#define Tcl_UniCharNcasecmp (int(*)(const Tcl_UniChar*,const Tcl_UniChar*,unsigned long))uniCharNcasecmp
#endif
static int utfNcmp(const char *s1, const char *s2, unsigned int n){
   return Tcl_UtfNcmp(s1, s2, (unsigned long)n);
}
#define Tcl_UtfNcmp (int(*)(const char*,const char*,unsigned long))(void *)utfNcmp
static int utfNcasecmp(const char *s1, const char *s2, unsigned int n){
   return Tcl_UtfNcasecmp(s1, s2, (unsigned long)n);
}
#define Tcl_UtfNcasecmp (int(*)(const char*,const char*,unsigned long))(void *)utfNcasecmp
static int uniCharNcasecmp(const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsigned int n){
   return Tcl_UniCharNcasecmp(ucs, uct, (unsigned long)n);
}
#define Tcl_UniCharNcasecmp (int(*)(const Tcl_UniChar*,const Tcl_UniChar*,unsigned long))(void *)uniCharNcasecmp

#endif /* TCL_WIDE_INT_IS_LONG */

#endif /* __CYGWIN__ */

#if defined(TCL_NO_DEPRECATED)
#   define Tcl_SeekOld 0
587
588
589
590
591
592
593







594
595
596
597
598
599
600
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632







+
+
+
+
+
+
+







#   define TclpGmtime 0
#   define TclpLocaltime_unix 0
#   define TclpGmtime_unix 0
#   define Tcl_SetExitProc 0
#   define Tcl_SetPanicProc 0
#   define Tcl_FindExecutable 0
#   define Tcl_GetUnicode 0
#if TCL_UTF_MAX < 4
#   define Tcl_AppendUnicodeToObj 0
#   define Tcl_UniCharCaseMatch 0
#   define Tcl_UniCharLen 0
#   define Tcl_UniCharNcasecmp 0
#   define Tcl_UniCharNcmp 0
#endif
#   undef Tcl_StringMatch
#   define Tcl_StringMatch 0
#   define TclBN_reverse 0
#   undef TclBN_s_mp_mul_digs_fast
#   define TclBN_s_mp_mul_digs_fast 0
#   undef TclBN_s_mp_sqr_fast
#   define TclBN_s_mp_sqr_fast 0

Changes to generic/tclUtf.c.

783
784
785
786
787
788
789
790

791
792
793
794
795
796
797
783
784
785
786
787
788
789

790
791
792
793
794
795
796
797







-
+







{
    int len, fullchar;
    Tcl_UniChar find = 0;

    while (1) {
	len = TclUtfToUniChar(src, &find);
	fullchar = find;
#if TCL_UTF_MAX <= 4
#if TCL_UTF_MAX <= 3
	if ((fullchar != ch) && (find >= 0xD800) && (len < 3)) {
	    len += TclUtfToUniChar(src + len, &find);
	    fullchar = (((fullchar & 0x3ff) << 10) | (find & 0x3ff)) + 0x10000;
	}
#endif
	if (fullchar == ch) {
	    return src;
831
832
833
834
835
836
837
838

839
840
841
842
843
844
845
831
832
833
834
835
836
837

838
839
840
841
842
843
844
845







-
+







    Tcl_UniChar find = 0;
    const char *last;

    last = NULL;
    while (1) {
	len = TclUtfToUniChar(src, &find);
	fullchar = find;
#if TCL_UTF_MAX <= 4
#if TCL_UTF_MAX <= 3
	if ((fullchar != ch) && (find >= 0xD800) && (len < 3)) {
	    len += TclUtfToUniChar(src + len, &find);
	    fullchar = (((fullchar & 0x3ff) << 10) | (find & 0x3ff)) + 0x10000;
	}
#endif
	if (fullchar == ch) {
	    last = src;
874
875
876
877
878
879
880
881

882
883
884
885
886
887
888
874
875
876
877
878
879
880

881
882
883
884
885
886
887
888







-
+







const char *
Tcl_UtfNext(
    const char *src)		/* The current location in the string. */
{
    Tcl_UniChar ch = 0;
    int len = TclUtfToUniChar(src, &ch);

#if TCL_UTF_MAX <= 4
#if TCL_UTF_MAX <= 3
    if ((ch >= 0xD800) && (len < 3)) {
	len += TclUtfToUniChar(src + len, &ch);
    }
#endif
    return src + len;
}

956
957
958
959
960
961
962
963

964
965
966
967
968

969
970
971
972
973
974
975

976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991

992
993
994
995
996
997
998
956
957
958
959
960
961
962

963
964
965
966
967

968
969
970
971
972
973
974

975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990

991
992
993
994
995
996
997
998







-
+




-
+






-
+















-
+







int
Tcl_UniCharAtIndex(
    const char *src,	/* The UTF-8 string to dereference. */
    int index)		/* The position of the desired character. */
{
    Tcl_UniChar ch = 0;
    int fullchar = 0;
#if TCL_UTF_MAX <= 4
#if TCL_UTF_MAX <= 3
	int len = 0;
#endif

    while (index-- >= 0) {
#if TCL_UTF_MAX <= 4
#if TCL_UTF_MAX <= 3
	src += (len = TclUtfToUniChar(src, &ch));
#else
	src += TclUtfToUniChar(src, &ch);
#endif
    }
    fullchar = ch;
#if TCL_UTF_MAX <= 4
#if TCL_UTF_MAX <= 3
    if ((ch >= 0xD800) && (len < 3)) {
	/* If last Tcl_UniChar was a high surrogate, combine with low surrogate */
	(void)TclUtfToUniChar(src, &ch);
	fullchar = (((fullchar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000;
    }
#endif
    return fullchar;
}

/*
 *---------------------------------------------------------------------------
 *
 * Tcl_UtfAtIndex --
 *
 *	Returns a pointer to the specified character (not byte) position in
 *	the UTF-8 string. If TCL_UTF_MAX <= 4, characters > U+FFFF count as
 *	the UTF-8 string. If TCL_UTF_MAX <= 3, characters > U+FFFF count as
 *	2 positions, but then the pointer should never be placed between
 *	the two positions.
 *
 * Results:
 *	As above.
 *
 * Side effects:
1009
1010
1011
1012
1013
1014
1015
1016

1017
1018
1019
1020
1021
1022
1023
1009
1010
1011
1012
1013
1014
1015

1016
1017
1018
1019
1020
1021
1022
1023







-
+







    Tcl_UniChar ch = 0;
    int len = 0;

    while (index-- > 0) {
	len = TclUtfToUniChar(src, &ch);
	src += len;
    }
#if TCL_UTF_MAX <= 4
#if TCL_UTF_MAX <= 3
    if ((ch >= 0xD800) && (len < 3)) {
	/* Index points at character following high Surrogate */
	src += TclUtfToUniChar(src, &ch);
    }
#endif
    return src;
}
1106
1107
1108
1109
1110
1111
1112
1113

1114
1115
1116
1117
1118
1119
1120
1106
1107
1108
1109
1110
1111
1112

1113
1114
1115
1116
1117
1118
1119
1120







-
+







     * Iterate over the string until we hit the terminating null.
     */

    src = dst = str;
    while (*src) {
	len = TclUtfToUniChar(src, &ch);
	upChar = ch;
#if TCL_UTF_MAX <= 4
#if TCL_UTF_MAX <= 3
	if ((ch >= 0xD800) && (len < 3)) {
	    len += TclUtfToUniChar(src + len, &ch);
	    /* Combine surrogates */
	    upChar = (((upChar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000;
	}
#endif
	upChar = Tcl_UniCharToUpper(upChar);
1168
1169
1170
1171
1172
1173
1174
1175

1176
1177
1178
1179
1180
1181
1182
1168
1169
1170
1171
1172
1173
1174

1175
1176
1177
1178
1179
1180
1181
1182







-
+







     * Iterate over the string until we hit the terminating null.
     */

    src = dst = str;
    while (*src) {
	len = TclUtfToUniChar(src, &ch);
	lowChar = ch;
#if TCL_UTF_MAX <= 4
#if TCL_UTF_MAX <= 3
	if ((ch >= 0xD800) && (len < 3)) {
	    len += TclUtfToUniChar(src + len, &ch);
	    /* Combine surrogates */
	    lowChar = (((lowChar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000;
	}
#endif
	lowChar = Tcl_UniCharToLower(lowChar);
1233
1234
1235
1236
1237
1238
1239
1240

1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260

1261
1262
1263
1264
1265
1266
1267
1233
1234
1235
1236
1237
1238
1239

1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259

1260
1261
1262
1263
1264
1265
1266
1267







-
+



















-
+







     */

    src = dst = str;

    if (*src) {
	len = TclUtfToUniChar(src, &ch);
	titleChar = ch;
#if TCL_UTF_MAX <= 4
#if TCL_UTF_MAX <= 3
	if ((ch >= 0xD800) && (len < 3)) {
	    len += TclUtfToUniChar(src + len, &ch);
	    /* Combine surrogates */
	    titleChar = (((titleChar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000;
	}
#endif
	titleChar = Tcl_UniCharToTitle(titleChar);

	if ((len < TclUtfCount(titleChar)) || ((titleChar & 0xF800) == 0xD800)) {
	    memmove(dst, src, len);
	    dst += len;
	} else {
	    dst += Tcl_UniCharToUtf(titleChar, dst);
	}
	src += len;
    }
    while (*src) {
	len = TclUtfToUniChar(src, &ch);
	lowChar = ch;
#if TCL_UTF_MAX <= 4
#if TCL_UTF_MAX <= 3
	if ((ch >= 0xD800) && (len < 3)) {
	    len += TclUtfToUniChar(src + len, &ch);
	    /* Combine surrogates */
	    lowChar = (((lowChar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000;
	}
#endif
	/* Special exception for Georgian Asomtavruli chars, no titlecase. */
1365
1366
1367
1368
1369
1370
1371
1372

1373
1374
1375
1376
1377
1378
1379
1365
1366
1367
1368
1369
1370
1371

1372
1373
1374
1375
1376
1377
1378
1379







-
+







	 * only when both strings are of at least n chars long (no need for \0
	 * check)
	 */

	cs += TclUtfToUniChar(cs, &ch1);
	ct += TclUtfToUniChar(ct, &ch2);
	if (ch1 != ch2) {
#if TCL_UTF_MAX <= 4
#if TCL_UTF_MAX <= 3
	    /* Surrogates always report higher than non-surrogates */
	    if (((ch1 & 0xFC00) == 0xD800)) {
	    if ((ch2 & 0xFC00) != 0xD800) {
		return ch1;
	    }
	    } else if ((ch2 & 0xFC00) == 0xD800) {
		return -ch2;
1416
1417
1418
1419
1420
1421
1422
1423

1424
1425
1426
1427
1428
1429
1430
1416
1417
1418
1419
1420
1421
1422

1423
1424
1425
1426
1427
1428
1429
1430







-
+







	 * n must be interpreted as chars, not bytes.
	 * This should be called only when both strings are of
	 * at least n chars long (no need for \0 check)
	 */
	cs += TclUtfToUniChar(cs, &ch1);
	ct += TclUtfToUniChar(ct, &ch2);
	if (ch1 != ch2) {
#if TCL_UTF_MAX <= 4
#if TCL_UTF_MAX <= 3
	    /* Surrogates always report higher than non-surrogates */
	    if (((ch1 & 0xFC00) == 0xD800)) {
	    if ((ch2 & 0xFC00) != 0xD800) {
		return ch1;
	    }
	    } else if ((ch2 & 0xFC00) == 0xD800) {
		return -ch2;
1465
1466
1467
1468
1469
1470
1471
1472

1473
1474
1475
1476
1477
1478
1479
1465
1466
1467
1468
1469
1470
1471

1472
1473
1474
1475
1476
1477
1478
1479







-
+







{
    Tcl_UniChar ch1 = 0, ch2 = 0;

    while (*cs && *ct) {
	cs += TclUtfToUniChar(cs, &ch1);
	ct += TclUtfToUniChar(ct, &ch2);
	if (ch1 != ch2) {
#if TCL_UTF_MAX <= 4
#if TCL_UTF_MAX <= 3
	    /* Surrogates always report higher than non-surrogates */
	    if (((ch1 & 0xFC00) == 0xD800)) {
	    if ((ch2 & 0xFC00) != 0xD800) {
		return ch1;
	    }
	    } else if ((ch2 & 0xFC00) == 0xD800) {
		return -ch2;
1511
1512
1513
1514
1515
1516
1517
1518

1519
1520
1521
1522
1523
1524
1525
1511
1512
1513
1514
1515
1516
1517

1518
1519
1520
1521
1522
1523
1524
1525







-
+







{
    Tcl_UniChar ch1 = 0, ch2 = 0;

    while (*cs && *ct) {
	cs += TclUtfToUniChar(cs, &ch1);
	ct += TclUtfToUniChar(ct, &ch2);
	if (ch1 != ch2) {
#if TCL_UTF_MAX <= 4
#if TCL_UTF_MAX <= 3
	    /* Surrogates always report higher than non-surrogates */
	    if (((ch1 & 0xFC00) == 0xD800)) {
	    if ((ch2 & 0xFC00) != 0xD800) {
		return ch1;
	    }
	    } else if ((ch2 & 0xFC00) == 0xD800) {
		return -ch2;

Changes to tests/stringObj.test.

19
20
21
22
23
24
25


26
27
28
29
30
31
32
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34







+
+








::tcltest::loadTestedCommands
catch [list package require -exact Tcltest [info patchlevel]]

testConstraint testobj [llength [info commands testobj]]
testConstraint testbytestring [llength [info commands testbytestring]]
testConstraint testdstring [llength [info commands testdstring]]
testConstraint tip389 [expr {[string length \U010000] == 2}]
testConstraint nodep [info exists tcl_precision]

test stringObj-1.1 {string type registration} testobj {
    set t [testobj types]
    set first [string first "string" $t]
    set result [expr {$first != -1}]
} {1}

460
461
462
463
464
465
466
467

468
469
470
471

472
473
474
475

476
477
478
479

480
481
482
483
484
485
486
462
463
464
465
466
467
468

469
470
471
472

473
474
475
476

477
478
479
480

481
482
483
484
485
486
487
488







-
+



-
+



-
+



-
+







    teststringobj set 1 foo
    teststringobj appendself 1 2
} fooo
test stringObj-15.4 {Tcl_Append*ToObj: self appends} testobj {
    teststringobj set 1 foo
    teststringobj appendself 1 3
} foo
test stringObj-15.5 {Tcl_Append*ToObj: self appends} testobj {
test stringObj-15.5 {Tcl_Append*ToObj: self appends} {testobj tip389 nodep} {
    teststringobj set 1 foo
    teststringobj appendself2 1 0
} foofoo
test stringObj-15.6 {Tcl_Append*ToObj: self appends} testobj {
test stringObj-15.6 {Tcl_Append*ToObj: self appends} {testobj tip389 nodep} {
    teststringobj set 1 foo
    teststringobj appendself2 1 1
} foooo
test stringObj-15.7 {Tcl_Append*ToObj: self appends} testobj {
test stringObj-15.7 {Tcl_Append*ToObj: self appends} {testobj tip389 nodep} {
    teststringobj set 1 foo
    teststringobj appendself2 1 2
} fooo
test stringObj-15.8 {Tcl_Append*ToObj: self appends} testobj {
test stringObj-15.8 {Tcl_Append*ToObj: self appends} {testobj tip389 nodep} {
    teststringobj set 1 foo
    teststringobj appendself2 1 3
} foo

if {[testConstraint testobj]} {
    testobj freeallvars
}

Changes to win/rules.vc.

682
683
684
685
686
687
688
689

690
691
692
693
694
695
696
682
683
684
685
686
687
688

689
690
691
692
693
694
695
696







-
+







# UNCHECKED - 1 -> when doing a debug build with symbols, use the release
#           C runtime, 0 -> use the debug C runtime.
# USE_STUBS - 1 -> compile to use stubs interfaces, 0 -> direct linking
# CONFIG_CHECK - 1 -> check current build configuration against Tcl
#           configuration (ignored for Tcl itself)
# _USE_64BIT_TIME_T - forces a build using 64-bit time_t for 32-bit build
#           (CRT library should support this)
# TCL_UTF_MAX=6 - forces a build using 32-bit Tcl_UniChar in stead of 16-bit.
# TCL_UTF_MAX=4 - forces a build using 32-bit Tcl_UniChar in stead of 16-bit.
# Further, LINKERFLAGS are modified based on above.

# Default values for all the above
STATIC_BUILD	= 0
TCL_THREADS	= 1
DEBUG		= 0
SYMBOLS		= 0
758
759
760
761
762
763
764
765

766
767
768
769
770
771
772
758
759
760
761
762
763
764

765
766
767
768
769
770
771
772







-
+







!if [nmakehlp -f $(OPTS) "time64bit"]
!message *** Force 64-bit time_t
_USE_64BIT_TIME_T = 1
!endif

!if [nmakehlp -f $(OPTS) "utfmax"]
!message *** Force 32-bit Tcl_UniChar
TCL_UTF_MAX = 6
TCL_UTF_MAX = 4
!endif

# Yes, it's weird that the "symbols" option controls DEBUG and
# the "pdbs" option controls SYMBOLS. That's historical.
!if [nmakehlp -f $(OPTS) "symbols"]
!message *** Doing symbols
DEBUG		= 1
1360
1361
1362
1363
1364
1365
1366
1367

1368
1369
1370
1371
1372
1373
1374
1360
1361
1362
1363
1364
1365
1366

1367
1368
1369
1370
1371
1372
1373
1374







-
+







OPTDEFINES	= $(OPTDEFINES) /DNO_STRTOI64=1
!endif

!if "$(_USE_64BIT_TIME_T)" == "1"
OPTDEFINES	= $(OPTDEFINES) /D_USE_64BIT_TIME_T=1
!endif
!if "$(TCL_UTF_MAX)" == "6"
OPTDEFINES	= $(OPTDEFINES) /DTCL_UTF_MAX=6
OPTDEFINES	= $(OPTDEFINES) /DTCL_UTF_MAX=4
!endif

# _ATL_XP_TARGETING - Newer SDK's need this to build for XP
COMPILERFLAGS  = /D_ATL_XP_TARGETING

# Like the TEA system only set this non empty for non-Tk extensions
# Note: some extensions use PACKAGE_NAME and others use PACKAGE_TCLNAME