Tcl Source Code

Check-in [f0adfe7dac]
Login
Bounty program for improvements to Tcl and certain Tcl packages.
Tcl 2019 Conference, Houston/TX, US, Nov 4-8
Send your abstracts to [email protected]
or submit via the online form by Sep 9.

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:(partial) fix for 00a27923ee: text/entry dysfunctional when pasting an emoji on MacOSX. Don't handle incoming valid 4-byte UTF-8 characters as invalid byte sequences (since they aren't), but as being the Unicode replacement character.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | bug-00a27923ee
Files: files | file ages | folders
SHA3-256: f0adfe7dacbc6c229043189a4508f8d6c2199ce27a111ddee74430f0ef00b069
User & Date: jan.nijtmans 2018-01-09 11:15:14
Context
2018-01-10
08:25
Fix 00a27923ee: (Tcl part, re... check-in: 8481a52495 user: jan.nijtmans tags: core-8-branch
2018-01-09
11:15
(partial) fix for 00a27923ee:... Closed-Leaf check-in: f0adfe7dac user: jan.nijtmans tags: bug-00a27923ee
2018-01-04
13:00
(cherry-pick): Use http 2 instead of http 1 for Safe Base testing. check-in: 116dad3ca7 user: jan.nijtmans tags: core-8-6-branch
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to generic/tclUtf.c.

64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
...
330
331
332
333
334
335
336
337
338
339
340
341
342
343










344
345
346
347
348
349
350
...
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
...
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
...
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
    3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
#if TCL_UTF_MAX > 3
    4,4,4,4,4,4,4,4,
#else
    1,1,1,1,1,1,1,1,
#endif
    1,1,1,1,1,1,1,1
};

/*
 * Functions used only in this module.
 */

................................................................................
	}

	/*
	 * A three-byte-character lead-byte not followed by two trail-bytes
	 * represents itself.
	 */
    }
#if TCL_UTF_MAX > 3
    else if (byte < 0xF8) {
	if (((src[1] & 0xC0) == 0x80) && ((src[2] & 0xC0) == 0x80) && ((src[3] & 0xC0) == 0x80)) {
	    /*
	     * Four-byte-character lead byte followed by three trail bytes.
	     */
#if TCL_UTF_MAX == 4










	    Tcl_UniChar surrogate;

	    byte = (((byte & 0x07) << 18) | ((src[1] & 0x3F) << 12)
		    | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F)) - 0x10000;
	    surrogate = (Tcl_UniChar) (0xD800 + (byte >> 10));
	    if (byte & 0x100000) {
		/* out of range, < 0x10000 or > 0x10ffff */
................................................................................
	}

	/*
	 * A four-byte-character lead-byte not followed by two trail-bytes
	 * represents itself.
	 */
    }
#endif

    *chPtr = (Tcl_UniChar) byte;
    return 1;
}
 
/*
 *---------------------------------------------------------------------------
................................................................................
    if (length < 0) {
	while (*src != '\0') {
	    src += TclUtfToUniChar(src, &ch);
	    i++;
	}
	if (i < 0) i = INT_MAX; /* Bug [2738427] */
    } else {
	register const char *endPtr = src + length - TCL_UTF_MAX;

	while (src < endPtr) {
	    src += TclUtfToUniChar(src, &ch);
	    i++;
	}
	endPtr += TCL_UTF_MAX;
	while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) {
	    src += TclUtfToUniChar(src, &ch);
	    i++;
	}
	if (src < endPtr) {
	    i += endPtr - src;
	}
................................................................................
    const char *start)		/* Pointer to the beginning of the string, to
				 * avoid going backwards too far. */
{
    const char *look;
    int i, byte;

    look = --src;
    for (i = 0; i < TCL_UTF_MAX; i++) {
	if (look < start) {
	    if (src < start) {
		src = start;
	    }
	    break;
	}
	byte = *((unsigned char *) look);






<

<
<
<







 







<





|
>
>
>
>
>
>
>
>
>
>







 







<







 







|





|







 







|







64
65
66
67
68
69
70

71



72
73
74
75
76
77
78
...
326
327
328
329
330
331
332

333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
...
372
373
374
375
376
377
378

379
380
381
382
383
384
385
...
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
...
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
    3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,

    4,4,4,4,4,4,4,4,



    1,1,1,1,1,1,1,1
};

/*
 * Functions used only in this module.
 */

................................................................................
	}

	/*
	 * A three-byte-character lead-byte not followed by two trail-bytes
	 * represents itself.
	 */
    }

    else if (byte < 0xF8) {
	if (((src[1] & 0xC0) == 0x80) && ((src[2] & 0xC0) == 0x80) && ((src[3] & 0xC0) == 0x80)) {
	    /*
	     * Four-byte-character lead byte followed by three trail bytes.
	     */
#if TCL_UTF_MAX == 3
	    byte = (((byte & 0x07) << 18) | ((src[1] & 0x3F) << 12)
		    | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F)) - 0x10000;
	    if (byte & 0x100000) {
		/* out of range, < 0x10000 or > 0x10ffff */
	    } else {
		/* produce replacement character, and advance source pointer */
		*chPtr = (Tcl_UniChar) 0xFFFD;
		return 4;
	    }
#elif TCL_UTF_MAX == 4
	    Tcl_UniChar surrogate;

	    byte = (((byte & 0x07) << 18) | ((src[1] & 0x3F) << 12)
		    | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F)) - 0x10000;
	    surrogate = (Tcl_UniChar) (0xD800 + (byte >> 10));
	    if (byte & 0x100000) {
		/* out of range, < 0x10000 or > 0x10ffff */
................................................................................
	}

	/*
	 * A four-byte-character lead-byte not followed by two trail-bytes
	 * represents itself.
	 */
    }


    *chPtr = (Tcl_UniChar) byte;
    return 1;
}
 
/*
 *---------------------------------------------------------------------------
................................................................................
    if (length < 0) {
	while (*src != '\0') {
	    src += TclUtfToUniChar(src, &ch);
	    i++;
	}
	if (i < 0) i = INT_MAX; /* Bug [2738427] */
    } else {
	register const char *endPtr = src + length - 4;

	while (src < endPtr) {
	    src += TclUtfToUniChar(src, &ch);
	    i++;
	}
	endPtr += 4;
	while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) {
	    src += TclUtfToUniChar(src, &ch);
	    i++;
	}
	if (src < endPtr) {
	    i += endPtr - src;
	}
................................................................................
    const char *start)		/* Pointer to the beginning of the string, to
				 * avoid going backwards too far. */
{
    const char *look;
    int i, byte;

    look = --src;
    for (i = 0; i < 4; i++) {
	if (look < start) {
	    if (src < start) {
		src = start;
	    }
	    break;
	}
	byte = *((unsigned char *) look);