Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Fix 00a27923ee: (Tcl part, remaining is in Tk) text/entry dysfunctional when pasting an emoji on MacOSX. This changes the handling of incoming valid 4-byte UTF-8 sequences: Those are no longer split in 4 separate characters (as was done for invalid byte sequences) but replaced by a single ' replacement character' . |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | core-8-branch |
Files: | files | file ages | folders |
SHA3-256: |
8481a52495dbecc1976edc52825e2cb8 |
User & Date: | jan.nijtmans 2018-01-10 08:25:13.679 |
Context
2018-01-10
| ||
14:02 | Re-implement Tcl_WinTCharToUtf/Tcl_WinUtfToTChar in pure win32 api, even for TCL_UTF_MAX=3. We can d... check-in: a2c5eee57d user: jan.nijtmans tags: core-8-branch | |
08:27 | merge core-8-branch check-in: b3fc2fbe3d user: jan.nijtmans tags: tip-389 | |
08:25 | Fix 00a27923ee: (Tcl part, re... check-in: 8481a52495 user: jan.nijtmans tags: core-8-branch | |
2018-01-09
| ||
11:15 | (partial) fix for 00a27923ee:... Closed-Leaf check-in: f0adfe7dac user: jan.nijtmans tags: bug-00a27923ee | |
00:10 | Some refactoring and tidying up of comments. check-in: 85aedb56e8 user: dkf tags: core-8-branch | |
Changes
Changes to generic/tclUtf.c.
︙ | ︙ | |||
64 65 66 67 68 69 70 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, | < < < < | 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4, 1,1,1,1,1,1,1,1 }; /* *--------------------------------------------------------------------------- * * TclUtfCount -- |
︙ | ︙ | |||
324 325 326 327 328 329 330 | } /* * A three-byte-character lead-byte not followed by two trail-bytes * represents itself. */ } | < | > > > > > > > > > > | 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 | } /* * A three-byte-character lead-byte not followed by two trail-bytes * represents itself. */ } else if (byte < 0xF8) { if (((src[1] & 0xC0) == 0x80) && ((src[2] & 0xC0) == 0x80) && ((src[3] & 0xC0) == 0x80)) { /* * Four-byte-character lead byte followed by three trail bytes. */ #if TCL_UTF_MAX == 3 byte = (((byte & 0x07) << 18) | ((src[1] & 0x3F) << 12) | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F)) - 0x10000; if (byte & 0x100000) { /* out of range, < 0x10000 or > 0x10ffff */ } else { /* produce replacement character, and advance source pointer */ *chPtr = (Tcl_UniChar) 0xFFFD; return 4; } #elif TCL_UTF_MAX == 4 Tcl_UniChar surrogate; byte = (((byte & 0x07) << 18) | ((src[1] & 0x3F) << 12) | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F)) - 0x10000; surrogate = (Tcl_UniChar) (0xD800 + (byte >> 10)); if (byte & 0x100000) { /* out of range, < 0x10000 or > 0x10ffff */ |
︙ | ︙ | |||
361 362 363 364 365 366 367 | } /* * A four-byte-character lead-byte not followed by two trail-bytes * represents itself. */ } | < | 366 367 368 369 370 371 372 373 374 375 376 377 378 379 | } /* * A four-byte-character lead-byte not followed by two trail-bytes * represents itself. */ } *chPtr = (Tcl_UniChar) byte; return 1; } /* *--------------------------------------------------------------------------- |
︙ | ︙ | |||
495 496 497 498 499 500 501 | if (length < 0) { while (*src != '\0') { src += TclUtfToUniChar(src, &ch); i++; } if (i < 0) i = INT_MAX; /* Bug [2738427] */ } else { | | | | 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 | if (length < 0) { while (*src != '\0') { src += TclUtfToUniChar(src, &ch); i++; } if (i < 0) i = INT_MAX; /* Bug [2738427] */ } else { register const char *endPtr = src + length - 4; while (src < endPtr) { src += TclUtfToUniChar(src, &ch); i++; } endPtr += 4; while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) { src += TclUtfToUniChar(src, &ch); i++; } if (src < endPtr) { i += endPtr - src; } |
︙ | ︙ | |||
673 674 675 676 677 678 679 | const char *start) /* Pointer to the beginning of the string, to * avoid going backwards too far. */ { const char *look; int i, byte; look = --src; | | | 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 | const char *start) /* Pointer to the beginning of the string, to * avoid going backwards too far. */ { const char *look; int i, byte; look = --src; for (i = 0; i < 4; i++) { if (look < start) { if (src < start) { src = start; } break; } byte = *((unsigned char *) look); |
︙ | ︙ |