Tcl Source Code

Check-in [b3d886c84f]
Login
Bounty program for improvements to Tcl and certain Tcl packages.
Tcl 2019 Conference, Houston/TX, US, Nov 4-8
Send your abstracts to [email protected]
or submit via the online form by Sep 9.

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Finish complete fix, all corner-cases correct now. Also spurious UTF-8 testcase failure (as seen on travis) fixed now.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | bug-bd94500678
Files: files | file ages | folders
SHA3-256: b3d886c84f3ff860439df881ce2a918ea3f6aa21012c7947ddc445d1440eb1b0
User & Date: jan.nijtmans 2019-02-25 21:10:33
Context
2019-02-27
21:29
Fix [bd94500678e837d7]: SEGFAULT by conversion of unicode (out of BMP) to byte-array check-in: efe8f3e6b0 user: jan.nijtmans tags: core-8-branch
2019-02-25
21:10
Finish complete fix, all corner-cases correct now. Also spurious UTF-8 testcase failure (as seen on ... Closed-Leaf check-in: b3d886c84f user: jan.nijtmans tags: bug-bd94500678
17:06
merge 8.7 check-in: 6bda8e2cf5 user: dgp tags: bug-bd94500678
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to generic/tclBinary.c.

1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
....
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
 badIndex:
    errorString = "not enough arguments for all format specifiers";
    goto error;

 badField:
    {
	Tcl_UniChar ch = 0;
	char buf[TCL_UTF_MAX + 1];

	TclUtfToUniChar(errorString, &ch);
	buf[Tcl_UniCharToUtf(ch, buf)] = '\0';
	Tcl_SetObjResult(interp, Tcl_ObjPrintf(
		"bad field specifier \"%s\"", buf));
	return TCL_ERROR;
    }
................................................................................
 badIndex:
    errorString = "not enough arguments for all format specifiers";
    goto error;

 badField:
    {
	Tcl_UniChar ch = 0;
	char buf[TCL_UTF_MAX + 1];

	TclUtfToUniChar(errorString, &ch);
	buf[Tcl_UniCharToUtf(ch, buf)] = '\0';
	Tcl_SetObjResult(interp, Tcl_ObjPrintf(
		"bad field specifier \"%s\"", buf));
	return TCL_ERROR;
    }






|







 







|







1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
....
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
 badIndex:
    errorString = "not enough arguments for all format specifiers";
    goto error;

 badField:
    {
	Tcl_UniChar ch = 0;
	char buf[TCL_UTF_MAX + 1] = "";

	TclUtfToUniChar(errorString, &ch);
	buf[Tcl_UniCharToUtf(ch, buf)] = '\0';
	Tcl_SetObjResult(interp, Tcl_ObjPrintf(
		"bad field specifier \"%s\"", buf));
	return TCL_ERROR;
    }
................................................................................
 badIndex:
    errorString = "not enough arguments for all format specifiers";
    goto error;

 badField:
    {
	Tcl_UniChar ch = 0;
	char buf[TCL_UTF_MAX + 1] = "";

	TclUtfToUniChar(errorString, &ch);
	buf[Tcl_UniCharToUtf(ch, buf)] = '\0';
	Tcl_SetObjResult(interp, Tcl_ObjPrintf(
		"bad field specifier \"%s\"", buf));
	return TCL_ERROR;
    }

Changes to generic/tclCmdMZ.c.

1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
	 */

	if (TclIsPureByteArray(objv[1])) {
	    unsigned char uch = (unsigned char) ch;

	    Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(&uch, 1));
	} else {
	    char buf[4];

	    length = Tcl_UniCharToUtf(ch, buf);
	    if ((ch >= 0xD800) && (length < 3)) {
		length = Tcl_UniCharToUtf(-1, buf + length);
	    }
	    Tcl_SetObjResult(interp, Tcl_NewStringObj(buf, length));
	}
    }
    return TCL_OK;
}
 






|



|







1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
	 */

	if (TclIsPureByteArray(objv[1])) {
	    unsigned char uch = (unsigned char) ch;

	    Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(&uch, 1));
	} else {
	    char buf[TCL_UTF_MAX] = "";

	    length = Tcl_UniCharToUtf(ch, buf);
	    if ((ch >= 0xD800) && (length < 3)) {
		length += Tcl_UniCharToUtf(-1, buf + length);
	    }
	    Tcl_SetObjResult(interp, Tcl_NewStringObj(buf, length));
	}
    }
    return TCL_OK;
}
 

Changes to generic/tclCompCmdsSZ.c.

1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
	PUSH("");
	count++;
    }

    for (endTokenPtr = tokenPtr + parse.numTokens;
	    tokenPtr < endTokenPtr; tokenPtr = TokenAfter(tokenPtr)) {
	int length, literal, catchRange, breakJump;
	char buf[TCL_UTF_MAX];
	JumpFixup startFixup, okFixup, returnFixup, breakFixup;
	JumpFixup continueFixup, otherFixup, endFixup;

	switch (tokenPtr->type) {
	case TCL_TOKEN_TEXT:
	    literal = TclRegisterLiteral(envPtr,
		    tokenPtr->start, tokenPtr->size, 0);






|







1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
	PUSH("");
	count++;
    }

    for (endTokenPtr = tokenPtr + parse.numTokens;
	    tokenPtr < endTokenPtr; tokenPtr = TokenAfter(tokenPtr)) {
	int length, literal, catchRange, breakJump;
	char buf[TCL_UTF_MAX] = "";
	JumpFixup startFixup, okFixup, returnFixup, breakFixup;
	JumpFixup continueFixup, otherFixup, endFixup;

	switch (tokenPtr->type) {
	case TCL_TOKEN_TEXT:
	    literal = TclRegisterLiteral(envPtr,
		    tokenPtr->start, tokenPtr->size, 0);

Changes to generic/tclCompile.c.

1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
....
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
	    if (tempPtr != NULL) {
		Tcl_AppendToObj(tempPtr, tokenPtr->start, tokenPtr->size);
	    }
	    break;

	case TCL_TOKEN_BS:
	    if (tempPtr != NULL) {
		char utfBuf[TCL_UTF_MAX];
		int length = TclParseBackslash(tokenPtr->start,
			tokenPtr->size, NULL, utfBuf);

		Tcl_AppendToObj(tempPtr, utfBuf, length);
	    }
	    break;

................................................................................
				 * compile. */
    int count,			/* Number of tokens to consider at tokenPtr.
				 * Must be at least 1. */
    CompileEnv *envPtr)		/* Holds the resulting instructions. */
{
    Tcl_DString textBuffer;	/* Holds concatenated chars from adjacent
				 * TCL_TOKEN_TEXT, TCL_TOKEN_BS tokens. */
    char buffer[TCL_UTF_MAX];
    int i, numObjsToConcat, length, adjust;
    unsigned char *entryCodeNext = envPtr->codeNext;
#define NUM_STATIC_POS 20
    int isLiteral, maxNumCL, numCL;
    int *clPosition = NULL;
    int depth = TclGetStackDepth(envPtr);







|







 







|







1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
....
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
	    if (tempPtr != NULL) {
		Tcl_AppendToObj(tempPtr, tokenPtr->start, tokenPtr->size);
	    }
	    break;

	case TCL_TOKEN_BS:
	    if (tempPtr != NULL) {
		char utfBuf[TCL_UTF_MAX] = "";
		int length = TclParseBackslash(tokenPtr->start,
			tokenPtr->size, NULL, utfBuf);

		Tcl_AppendToObj(tempPtr, utfBuf, length);
	    }
	    break;

................................................................................
				 * compile. */
    int count,			/* Number of tokens to consider at tokenPtr.
				 * Must be at least 1. */
    CompileEnv *envPtr)		/* Holds the resulting instructions. */
{
    Tcl_DString textBuffer;	/* Holds concatenated chars from adjacent
				 * TCL_TOKEN_TEXT, TCL_TOKEN_BS tokens. */
    char buffer[TCL_UTF_MAX] = "";
    int i, numObjsToConcat, length, adjust;
    unsigned char *entryCodeNext = envPtr->codeNext;
#define NUM_STATIC_POS 20
    int isLiteral, maxNumCL, numCL;
    int *clPosition = NULL;
    int depth = TclGetStackDepth(envPtr);

Changes to generic/tclExecute.c.

5211
5212
5213
5214
5215
5216
5217
5218
5219
5220
5221
5222
5223
5224
5225
5226
5227
5228
5229
5230
5231
5232
5233
5234
5235
5236
5237
5238
	} else if (TclIsPureByteArray(valuePtr)) {
	    objResultPtr = Tcl_NewByteArrayObj(
		    Tcl_GetByteArrayFromObj(valuePtr, NULL)+index, 1);
	} else if (valuePtr->bytes && length == valuePtr->length) {
	    objResultPtr = Tcl_NewStringObj((const char *)
		    valuePtr->bytes+index, 1);
	} else {
	    char buf[4];
	    int ch = Tcl_GetUniChar(valuePtr, index);

	    /*
	     * This could be: Tcl_NewUnicodeObj((const Tcl_UniChar *)&ch, 1)
	     * but creating the object as a string seems to be faster in
	     * practical use.
	     */
	    if (ch == -1) {
		objResultPtr = Tcl_NewObj();
	    } else {
		length = Tcl_UniCharToUtf(ch, buf);
		if ((ch >= 0xD800) && (length < 3)) {
		    length = Tcl_UniCharToUtf(-1, buf + length);
		}
		objResultPtr = Tcl_NewStringObj(buf, length);
	    }
	}

	TRACE_APPEND(("\"%s\"\n", O2S(objResultPtr)));
	NEXT_INST_F(1, 2, 1);






|












|







5211
5212
5213
5214
5215
5216
5217
5218
5219
5220
5221
5222
5223
5224
5225
5226
5227
5228
5229
5230
5231
5232
5233
5234
5235
5236
5237
5238
	} else if (TclIsPureByteArray(valuePtr)) {
	    objResultPtr = Tcl_NewByteArrayObj(
		    Tcl_GetByteArrayFromObj(valuePtr, NULL)+index, 1);
	} else if (valuePtr->bytes && length == valuePtr->length) {
	    objResultPtr = Tcl_NewStringObj((const char *)
		    valuePtr->bytes+index, 1);
	} else {
	    char buf[TCL_UTF_MAX] = "";
	    int ch = Tcl_GetUniChar(valuePtr, index);

	    /*
	     * This could be: Tcl_NewUnicodeObj((const Tcl_UniChar *)&ch, 1)
	     * but creating the object as a string seems to be faster in
	     * practical use.
	     */
	    if (ch == -1) {
		objResultPtr = Tcl_NewObj();
	    } else {
		length = Tcl_UniCharToUtf(ch, buf);
		if ((ch >= 0xD800) && (length < 3)) {
		    length += Tcl_UniCharToUtf(-1, buf + length);
		}
		objResultPtr = Tcl_NewStringObj(buf, length);
	    }
	}

	TRACE_APPEND(("\"%s\"\n", O2S(objResultPtr)));
	NEXT_INST_F(1, 2, 1);

Changes to generic/tclParse.c.

787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
....
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
				 * written. At most TCL_UTF_MAX bytes will be
				 * written there. */
{
    register const char *p = src+1;
    Tcl_UniChar unichar = 0;
    int result;
    int count;
    char buf[TCL_UTF_MAX];

    if (numBytes == 0) {
	if (readPtr != NULL) {
	    *readPtr = 0;
	}
	return 0;
    }
................................................................................

    adjust = 0;
    result = NULL;
    for (; count>0 && code==TCL_OK ; count--, tokenPtr++) {
	Tcl_Obj *appendObj = NULL;
	const char *append = NULL;
	int appendByteLength = 0;
	char utfCharBytes[TCL_UTF_MAX];

	switch (tokenPtr->type) {
	case TCL_TOKEN_TEXT:
	    append = tokenPtr->start;
	    appendByteLength = tokenPtr->size;
	    break;







|







 







|







787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
....
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
				 * written. At most TCL_UTF_MAX bytes will be
				 * written there. */
{
    register const char *p = src+1;
    Tcl_UniChar unichar = 0;
    int result;
    int count;
    char buf[TCL_UTF_MAX] = "";

    if (numBytes == 0) {
	if (readPtr != NULL) {
	    *readPtr = 0;
	}
	return 0;
    }
................................................................................

    adjust = 0;
    result = NULL;
    for (; count>0 && code==TCL_OK ; count--, tokenPtr++) {
	Tcl_Obj *appendObj = NULL;
	const char *append = NULL;
	int appendByteLength = 0;
	char utfCharBytes[TCL_UTF_MAX] = "";

	switch (tokenPtr->type) {
	case TCL_TOKEN_TEXT:
	    append = tokenPtr->start;
	    appendByteLength = tokenPtr->size;
	    break;

Changes to generic/tclScan.c.

257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
				 * required. */
{
    int gotXpg, gotSequential, value, i, flags;
    char *end;
    Tcl_UniChar ch = 0;
    int objIndex, xpgSize, nspace = numVars;
    int *nassign = TclStackAlloc(interp, nspace * sizeof(int));
    char buf[TCL_UTF_MAX+1];
    Tcl_Obj *errorMsg;		/* Place to build an error messages. Note that
				 * these are messy operations because we do
				 * not want to use the formatting engine;
				 * we're inside there! */

    /*
     * Initialize an array that records the number of times a variable is






|







257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
				 * required. */
{
    int gotXpg, gotSequential, value, i, flags;
    char *end;
    Tcl_UniChar ch = 0;
    int objIndex, xpgSize, nspace = numVars;
    int *nassign = TclStackAlloc(interp, nspace * sizeof(int));
    char buf[TCL_UTF_MAX+1] = "";
    Tcl_Obj *errorMsg;		/* Place to build an error messages. Note that
				 * these are messy operations because we do
				 * not want to use the formatting engine;
				 * we're inside there! */

    /*
     * Initialize an array that records the number of times a variable is

Changes to generic/tclUtf.c.

141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181



182
183
184
185
186
187
188
...
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
...
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
...
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
...
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
	    buf[0] = (char) ((ch >> 6) | 0xC0);
	    return 2;
	}
	if (ch <= 0xFFFF) {
	    if ((ch & 0xF800) == 0xD800) {
		if (ch & 0x0400) {
		    /* Low surrogate */
		    if (((buf[0] & 0xF8) == 0xF0) && ((buf[1] & 0xC0) == 0x80)
			    && ((buf[2] & 0xCF) == 0)) {
			/* Previous Tcl_UniChar was a high surrogate, so combine */
			buf[3] = (char) ((ch & 0x3F) | 0x80);
			buf[2] |= (char) (((ch >> 6) & 0x0F) | 0x80);
			return 4;
		    }
		    /* Previous Tcl_UniChar was not a high surrogate, so just output */
		} else {
		    /* High surrogate */
		    ch += 0x40;
		    /* Fill buffer with specific 3-byte (invalid) byte combination,
		       so following low surrogate can recognize it and combine */
		    buf[2] = (char) ((ch << 4) & 0x30);
		    buf[1] = (char) (((ch >> 2) & 0x3F) | 0x80);
		    buf[0] = (char) (((ch >> 8) & 0x07) | 0xF0);
		    return 0;
		}
	    }
	    goto three;
	}
	if (ch <= 0x10FFFF) {
	    buf[3] = (char) ((ch | 0x80) & 0xBF);
	    buf[2] = (char) (((ch >> 6) | 0x80) & 0xBF);
	    buf[1] = (char) (((ch >> 12) | 0x80) & 0xBF);
	    buf[0] = (char) ((ch >> 18) | 0xF0);
	    return 4;
	}
    } else if (ch == -1) {
	if (((buf[0] & 0xF8) == 0xF0) && ((buf[1] & 0xC0) == 0x80)
		&& ((buf[2] & 0xCF) == 0)) {
	    ch = 0xD7C0 + ((buf[0] & 0x07) << 8) + ((buf[1] & 0x3F) << 2)
		    + ((buf[2] & 0x30) >> 4);
	    goto three;



	}
    }

    ch = 0xFFFD;
three:
    buf[2] = (char) ((ch | 0x80) & 0xBF);
    buf[1] = (char) (((ch >> 6) | 0x80) & 0xBF);
................................................................................
    register const char *src,	/* The UTF-8 string. */
    register Tcl_UniChar *chPtr)/* Filled with the Tcl_UniChar represented by
				 * the UTF-8 string. */
{
    Tcl_UniChar byte;

    /*
     * Unroll 1 to 3 (or 4) byte UTF-8 sequences.
     */

    byte = *((unsigned char *) src);
    if (byte < 0xC0) {
	/*
	 * Handles properly formed UTF-8 characters between 0x01 and 0x7F.
	 * Treats naked trail bytes 0x80 to 0x9F as valid characters from
................................................................................
    }
    else if (byte < 0xF8) {
	if (((src[1] & 0xC0) == 0x80) && ((src[2] & 0xC0) == 0x80) && ((src[3] & 0xC0) == 0x80)) {
	    /*
	     * Four-byte-character lead byte followed by three trail bytes.
	     */
#if TCL_UTF_MAX <= 4
	    byte = (((byte & 0x07) << 8) | ((src[1] & 0x3F) << 2)
		    | ((src[2] & 0x3F) >> 4)) - 0x40;
	    if (byte >= 0x400) {
		/* out of range, < 0x10000 or > 0x10ffff */
	    } else {
		/* produce high surrogate, advance source pointer */
		*chPtr = 0xD800 + byte;
		return 1;
	    }
#else
	    *chPtr = (((byte & 0x07) << 18) | ((src[1] & 0x3F) << 12)
		    | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F));
	    if ((*chPtr - 0x10000) <= 0xFFFFF) {
		return 4;
................................................................................
#else
	src += TclUtfToUniChar(src, &ch);
#endif
    }
    fullchar = ch;
#if TCL_UTF_MAX <= 4
    if ((ch >= 0xD800) && (len < 3)) {
	/* If last Tcl_UniChar was an high surrogate, combine with low surrogate */
	(void)TclUtfToUniChar(src + len, &ch);
	fullchar = (((fullchar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000;
    }
#endif
    return fullchar;
}
 
/*
................................................................................

    while (index-- > 0) {
	len = TclUtfToUniChar(src, &ch);
	src += len;
    }
#if TCL_UTF_MAX <= 4
    if ((ch >= 0xD800) && (len < 3)) {
	/* Index points at character following High Surrogate */
	src += TclUtfToUniChar(src, &ch);
    }
#endif
    return src;
}
 
/*






|
<

|
|
|










|












|
|
|
|
|
>
>
>







 







|







 







|

|



|







 







|
|







 







|







141
142
143
144
145
146
147
148

149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
...
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
...
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
...
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
...
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
	    buf[0] = (char) ((ch >> 6) | 0xC0);
	    return 2;
	}
	if (ch <= 0xFFFF) {
	    if ((ch & 0xF800) == 0xD800) {
		if (ch & 0x0400) {
		    /* Low surrogate */
		    if (((buf[0] & 0xC0) == 0x80) && ((buf[1] & 0xCF) == 0)) {

			/* Previous Tcl_UniChar was a high surrogate, so combine */
			buf[2] = (char) ((ch & 0x3F) | 0x80);
			buf[1] |= (char) (((ch >> 6) & 0x0F) | 0x80);
			return 3;
		    }
		    /* Previous Tcl_UniChar was not a high surrogate, so just output */
		} else {
		    /* High surrogate */
		    ch += 0x40;
		    /* Fill buffer with specific 3-byte (invalid) byte combination,
		       so following low surrogate can recognize it and combine */
		    buf[2] = (char) ((ch << 4) & 0x30);
		    buf[1] = (char) (((ch >> 2) & 0x3F) | 0x80);
		    buf[0] = (char) (((ch >> 8) & 0x07) | 0xF0);
		    return 1;
		}
	    }
	    goto three;
	}
	if (ch <= 0x10FFFF) {
	    buf[3] = (char) ((ch | 0x80) & 0xBF);
	    buf[2] = (char) (((ch >> 6) | 0x80) & 0xBF);
	    buf[1] = (char) (((ch >> 12) | 0x80) & 0xBF);
	    buf[0] = (char) ((ch >> 18) | 0xF0);
	    return 4;
	}
    } else if (ch == -1) {
	if (((buf[0] & 0xC0) == 0x80) && ((buf[1] & 0xCF) == 0)
		&& ((buf[-1] & 0xF8) == 0xF0)) {
	    ch = 0xD7C0 + ((buf[-1] & 0x07) << 8) + ((buf[0] & 0x3F) << 2)
		    + ((buf[1] & 0x30) >> 4);
	    buf[1] = (char) ((ch | 0x80) & 0xBF);
	    buf[0] = (char) (((ch >> 6) | 0x80) & 0xBF);
	    buf[-1] = (char) ((ch >> 12) | 0xE0);
	    return 2;
	}
    }

    ch = 0xFFFD;
three:
    buf[2] = (char) ((ch | 0x80) & 0xBF);
    buf[1] = (char) (((ch >> 6) | 0x80) & 0xBF);
................................................................................
    register const char *src,	/* The UTF-8 string. */
    register Tcl_UniChar *chPtr)/* Filled with the Tcl_UniChar represented by
				 * the UTF-8 string. */
{
    Tcl_UniChar byte;

    /*
     * Unroll 1 to 4 byte UTF-8 sequences.
     */

    byte = *((unsigned char *) src);
    if (byte < 0xC0) {
	/*
	 * Handles properly formed UTF-8 characters between 0x01 and 0x7F.
	 * Treats naked trail bytes 0x80 to 0x9F as valid characters from
................................................................................
    }
    else if (byte < 0xF8) {
	if (((src[1] & 0xC0) == 0x80) && ((src[2] & 0xC0) == 0x80) && ((src[3] & 0xC0) == 0x80)) {
	    /*
	     * Four-byte-character lead byte followed by three trail bytes.
	     */
#if TCL_UTF_MAX <= 4
	    Tcl_UniChar high = (((byte & 0x07) << 8) | ((src[1] & 0x3F) << 2)
		    | ((src[2] & 0x3F) >> 4)) - 0x40;
	    if (high >= 0x400) {
		/* out of range, < 0x10000 or > 0x10ffff */
	    } else {
		/* produce high surrogate, advance source pointer */
		*chPtr = 0xD800 + high;
		return 1;
	    }
#else
	    *chPtr = (((byte & 0x07) << 18) | ((src[1] & 0x3F) << 12)
		    | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F));
	    if ((*chPtr - 0x10000) <= 0xFFFFF) {
		return 4;
................................................................................
#else
	src += TclUtfToUniChar(src, &ch);
#endif
    }
    fullchar = ch;
#if TCL_UTF_MAX <= 4
    if ((ch >= 0xD800) && (len < 3)) {
	/* If last Tcl_UniChar was a high surrogate, combine with low surrogate */
	(void)TclUtfToUniChar(src, &ch);
	fullchar = (((fullchar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000;
    }
#endif
    return fullchar;
}
 
/*
................................................................................

    while (index-- > 0) {
	len = TclUtfToUniChar(src, &ch);
	src += len;
    }
#if TCL_UTF_MAX <= 4
    if ((ch >= 0xD800) && (len < 3)) {
	/* Index points at character following high Surrogate */
	src += TclUtfToUniChar(src, &ch);
    }
#endif
    return src;
}
 
/*

Changes to generic/tclUtil.c.

1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
char
Tcl_Backslash(
    const char *src,		/* Points to the backslash character of a
				 * backslash sequence. */
    int *readPtr)		/* Fill in with number of characters read from
				 * src, unless NULL. */
{
    char buf[TCL_UTF_MAX];
    Tcl_UniChar ch = 0;

    Tcl_UtfBackslash(src, readPtr, buf);
    TclUtfToUniChar(buf, &ch);
    return (char) ch;
}
#endif /* !TCL_NO_DEPRECATED */






|







1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
char
Tcl_Backslash(
    const char *src,		/* Points to the backslash character of a
				 * backslash sequence. */
    int *readPtr)		/* Fill in with number of characters read from
				 * src, unless NULL. */
{
    char buf[TCL_UTF_MAX] = "";
    Tcl_UniChar ch = 0;

    Tcl_UtfBackslash(src, readPtr, buf);
    TclUtfToUniChar(buf, &ch);
    return (char) ch;
}
#endif /* !TCL_NO_DEPRECATED */