Tcl Source Code

Check-in [f6eb4196ee]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Attempt to fix a179564826: Tk 8.6: prevent issues when encountering non-BMP Unicode characters
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | bug-a179564826
Files: files | file ages | folders
SHA3-256: f6eb4196ee4d6fa810c96907618d22d5c320c5814c77a438a15935cfff39a13b
User & Date: jan.nijtmans 2019-08-01 08:02:38
Context
2019-09-12
07:51
Fix Tcl part of a179564826: Tk 8.6: prevent i... check-in: a7a44cf3ca user: jan.nijtmans tags: core-8-6-branch
2019-08-01
08:02
Attempt to fix a179564826: Tk 8.6: prevent is... Closed-Leaf check-in: f6eb4196ee user: jan.nijtmans tags: bug-a179564826
07:52
Mark one more test-case as knownMsvcBug. TODO: something to be fixed here? See: [https://travis-ci.o... check-in: 0a54b276c1 user: jan.nijtmans tags: core-8-6-branch
Changes
Unified Diff Ignore Whitespace Patch
Changes to generic/tclUtf.c.
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
    3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
#if TCL_UTF_MAX > 3
    4,4,4,4,4,
#else
    1,1,1,1,1,
#endif
    1,1,1,1,1,1,1,1,1,1,1
};

/*
 * Functions used only in this module.
 */







|







67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
    3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
#if TCL_UTF_MAX > 3
    4,4,4,4,4,
#else
    3,3,3,3,3, /* Tcl_UtfCharComplete() only checks TCL_UTF_MAX bytes */
#endif
    1,1,1,1,1,1,1,1,1,1,1
};

/*
 * Functions used only in this module.
 */
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
    if (byte < 0xC0) {
	/*
	 * Handles properly formed UTF-8 characters between 0x01 and 0x7F.
	 * Also treats \0 and naked trail bytes 0x80 to 0xBF as valid
	 * characters representing themselves.
	 */

#if TCL_UTF_MAX == 4
	/* If *chPtr contains a high surrogate (produced by a previous
	 * Tcl_UtfToUniChar() call) and the next 3 bytes are UTF-8 continuation
	 * bytes, then we must produce a follow-up low surrogate. We only
	 * do that if the high surrogate matches the bits we encounter.
	 */
	if ((byte >= 0x80)
		&& (((((byte - 0x10) << 2) & 0xFC) | 0xD800) == (*chPtr & 0xFCFC))







|







310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
    if (byte < 0xC0) {
	/*
	 * Handles properly formed UTF-8 characters between 0x01 and 0x7F.
	 * Also treats \0 and naked trail bytes 0x80 to 0xBF as valid
	 * characters representing themselves.
	 */

#if TCL_UTF_MAX <= 4
	/* If *chPtr contains a high surrogate (produced by a previous
	 * Tcl_UtfToUniChar() call) and the next 3 bytes are UTF-8 continuation
	 * bytes, then we must produce a follow-up low surrogate. We only
	 * do that if the high surrogate matches the bits we encounter.
	 */
	if ((byte >= 0x80)
		&& (((((byte - 0x10) << 2) & 0xFC) | 0xD800) == (*chPtr & 0xFCFC))
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
	}

	/*
	 * A three-byte-character lead-byte not followed by two trail-bytes
	 * represents itself.
	 */
    }
#if TCL_UTF_MAX > 3
    else if (byte < 0xF8) {
	if (((src[1] & 0xC0) == 0x80) && ((src[2] & 0xC0) == 0x80) && ((src[3] & 0xC0) == 0x80)) {
	    /*
	     * Four-byte-character lead byte followed by three trail bytes.
	     */
#if TCL_UTF_MAX == 4
	    Tcl_UniChar high = (((byte & 0x07) << 8) | ((src[1] & 0x3F) << 2)
		    | ((src[2] & 0x3F) >> 4)) - 0x40;
	    if (high >= 0x400) {
		/* out of range, < 0x10000 or > 0x10ffff */
	    } else {
		/* produce high surrogate, advance source pointer */
		*chPtr = 0xD800 + high;







<





|







360
361
362
363
364
365
366

367
368
369
370
371
372
373
374
375
376
377
378
379
	}

	/*
	 * A three-byte-character lead-byte not followed by two trail-bytes
	 * represents itself.
	 */
    }

    else if (byte < 0xF8) {
	if (((src[1] & 0xC0) == 0x80) && ((src[2] & 0xC0) == 0x80) && ((src[3] & 0xC0) == 0x80)) {
	    /*
	     * Four-byte-character lead byte followed by three trail bytes.
	     */
#if TCL_UTF_MAX <= 4
	    Tcl_UniChar high = (((byte & 0x07) << 8) | ((src[1] & 0x3F) << 2)
		    | ((src[2] & 0x3F) >> 4)) - 0x40;
	    if (high >= 0x400) {
		/* out of range, < 0x10000 or > 0x10ffff */
	    } else {
		/* produce high surrogate, advance source pointer */
		*chPtr = 0xD800 + high;
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
	}

	/*
	 * A four-byte-character lead-byte not followed by three trail-bytes
	 * represents itself.
	 */
    }
#endif

    *chPtr = byte;
    return 1;
}

/*
 *---------------------------------------------------------------------------







<







389
390
391
392
393
394
395

396
397
398
399
400
401
402
	}

	/*
	 * A four-byte-character lead-byte not followed by three trail-bytes
	 * represents itself.
	 */
    }


    *chPtr = byte;
    return 1;
}

/*
 *---------------------------------------------------------------------------