Tk Source Code: Check-in [7f62ccc3]

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview

Comment:	Various cleanups in Unicode handling. Note that without Xft on X11 we don't have Emoji.
Downloads:	Tarball \| ZIP archive
Timelines:	family \| ancestors \| descendants \| both \| core-8-6-branch
Files:	files \| file ages \| folders
SHA3-256:	7f62ccc3c60ebc55d27b6413a152af627822848cf7de16254394d29e117fa66b
User & Date:	jan.nijtmans 2019-11-26 20:33:36.240

Context

2019-11-26
20:35		Horrible windows-specific workaround against SendInput not moving the mouse on Win10 when both input.mi.dx and input.mi.dy are zero) - this must be a Windows bug. bind-34.3 now passes on Win10 check-in: 4202ddbd user: jan.nijtmans tags: core-8-6-branch
20:33		Various cleanups in Unicode handling. Note that without Xft on X11 we don't have Emoji. check-in: 7f62ccc3 user: jan.nijtmans tags: core-8-6-branch
2019-11-25
12:47		Fix [a953736b54]: Potential wrong use of TkKeyEvent check-in: be77d20c user: jan.nijtmans tags: core-8-6-branch
2019-11-22
08:48		Merge 8.6 Closed-Leaf check-in: b35590d9 user: jan.nijtmans tags: utf-16-for-X11

Changes

Changes to generic/tkEntry.c.

Changes to generic/tkUtil.c.

Changes to generic/ttk/ttkEntry.c.

Changes to library/demos/unicodeout.tcl.

Changes to unix/tkUnixFont.c.

Changes to win/tkWinFont.c.

Changes to win/tkWinKey.c.

Changes to win/tkWinX.c.

︙			︙
1212 1213 1214 1215 1216 1217 1218 ~~1219~~ 1220 1221 1222 1223 1224 ~~1225 1226~~ 1227 1228 ~~1229 1230 1231 1232 1233 1234~~ 1235 ~~1236 1237~~ 1238 1239 1240 1241 1242 1243 1244 1245	* --------------------------------------------------------------------------- / int TkUtfToUniChar( const char src, / The UTF-8 string. / ~~int chPtr) /* Filled with the ~~Tcl_~~Uni~~Char~~ represented by~~ * the UTF-8 string. / { Tcl_UniChar uniChar = 0; int len = Tcl_UtfToUniChar(src, &uniChar); ~~if ((uniChar & 0xfc00) == 0xd800) { Tcl_UniChar ~~high~~ = uniChar;~~ / This can only happen if Tcl is compiled with TCL_UTF_MAX=4, * or when a high surrogate character is detected in UTF-8 form / int len2 = Tcl_UtfToUniChar(src+len, &~~uniChar~~); if ((uniChar & 0xfc00) == 0xdc00) { chPtr = (((~~high & 0x3ff) << 10) \| (~~uniChar & 0x3ff)) + 0x10000; len += len2; ~~} else {~~ chPtr = high; } ~~~~} else {~~ chPtr = uniChar;~~ } return len; } /* --------------------------------------------------------------------------- * TkUniCharToUtf --	\| \| \| \| \| \| \| < < < < >	1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242	* --------------------------------------------------------------------------- / int TkUtfToUniChar( const char src, / The UTF-8 string. / int chPtr) /* Filled with the Unicode value represented by * the UTF-8 string. / { Tcl_UniChar uniChar = 0; int len = Tcl_UtfToUniChar(src, &uniChar); if ((uniChar & 0xFC00) == 0xD800) { Tcl_UniChar low = uniChar; / This can only happen if Tcl is compiled with TCL_UTF_MAX=4, * or when a high surrogate character is detected in UTF-8 form / int len2 = Tcl_UtfToUniChar(src+len, &low); if ((uniChar & 0xFC00) == 0xDC00) { chPtr = (((uniChar & 0x3FF) << 10) \| (low & 0x3FF)) + 0x10000; return len + len2; } } chPtr = uniChar; return len; } / --------------------------------------------------------------------------- * TkUniCharToUtf --
︙			︙
1255 1256 1257 1258 1259 1260 1261 ~~1262 1263 1264 1265 1266 1267 1268 1269 1270~~ 1271 ~~1272~~ 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283	* None. * --------------------------------------------------------------------------- / int TkUniCharToUtf(int ch, char buf) { ~~int size = Tcl_UniCharToUtf(ch, buf);~~ if ((((unsigned)(ch - 0x10000) <= 0xFFFFF~~)) && (size < 4~~)) { ~~/ Hey, this is wrong, we must be running TCL_UTF_MAX==3~~ * The best thing we can do is spit out a 4-byte UTF-8 character / buf~~[3]~~ = (char) ((ch \| 0x~~80) & 0xBF~~); buf~~[2]~~ = (char) (((ch >> 6) \| 0x80) & 0xBF); buf~~[1]~~ = (char) (((ch >> 12) \| 0x80) & 0xBF); ~~buf[0]~~ = (char) ((ch ~~>> 18)~~ \| 0xF0); ~~siz~~e = 4; } ~~return size;~~ } #endif / * Local Variables: * mode: c * c-basic-offset: 4 * fill-column: 78 * End: */	< \| < \| \| \| \| \| \| > > <	1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279	* None. * --------------------------------------------------------------------------- / int TkUniCharToUtf(int ch, char buf) { if (((unsigned)(ch - 0x10000) <= 0xFFFFF)) { / Spit out a 4-byte UTF-8 character / buf++ = (char) ((ch >> 18) \| 0xF0); buf++ = (char) (((ch >> 12) \| 0x80) & 0xBF); buf++ = (char) (((ch >> 6) \| 0x80) & 0xBF); buf = (char) ((ch \| 0x80) & 0xBF); return 4; } else { return Tcl_UniCharToUtf(ch, buf); } } #endif / * Local Variables: * mode: c * c-basic-offset: 4 * fill-column: 78 * End: */

︙			︙
8 9 10 11 12 13 14 ~~15 16~~ 17 18 19 20 21 22 23	* * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. / #include "tkUnixInt.h" #include "tkFont.h" ~~#include <netinet/in.h> / for htons() prototype /~~ ~~#include <arpa/inet.h> / inet_ntoa() /~~ / * The preferred font encodings. / static const char const encodingList[] = { "iso8859-1", "jis0208", "jis0212", NULL	< <	8 9 10 11 12 13 14 15 16 17 18 19 20 21	* * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. / #include "tkUnixInt.h" #include "tkFont.h" / * The preferred font encodings. / static const char const encodingList[] = { "iso8859-1", "jis0208", "jis0212", NULL
︙			︙
483 484 485 486 487 488 489 ~~490 491~~ 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 ~~510~~ 511 ~~512~~ 513 514 515 516 517 518 ~~519 520~~ 521 ~~522~~ ~~523 524~~ 525 526 527 528 529 530 531	* stored in the output buffer as a result of * the conversion. / int dstCharsPtr) /* Filled with the number of characters that * correspond to the bytes stored in the * output buffer. / { const char srcStart, srcEnd; ~~char dstEnd, dstStart; int result, numChars;~~ result = TCL_OK; / check alignment with ucs-2 (2 == sizeof(UCS-2)) / if ((srcLen % 2) != 0) { result = TCL_CONVERT_MULTIBYTE; srcLen--; } / If last code point is a high surrogate, we cannot handle that yet / if ((srcLen >= 2) && ((src[srcLen - 2] & 0xFC) == 0xD8)) { result = TCL_CONVERT_MULTIBYTE; srcLen -= 2; } srcStart = src; srcEnd = src + srcLen; dstStart = dst; ~~dstEnd = dst + dstLen - ~~TCL_UTF_MAX~~;~~ ~~for (numChars = 0; src < srcEnd; numChars++) {~~ if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; break; } / * Need to swap byte-order on little-endian machines (x86) for * UCS-2BE. We know this is an LE->BE swap. / ~~dst += Tcl_UniCharToUtf(~~htons(((short )src))~~, dst); ~~src += 2 / sizeof(UCS-2) /;~~~~ } srcReadPtr = src - srcStart; dstWrotePtr = dst - dstStart; dstCharsPtr = numChars; return result; }	\| \| > > > > \| \| > > > < < > > \| > > \| < >	481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538	* stored in the output buffer as a result of * the conversion. / int dstCharsPtr) /* Filled with the number of characters that * correspond to the bytes stored in the * output buffer. / { const char srcStart, srcEnd; const char dstEnd, dstStart; int result, numChars, charLimit = INT_MAX; unsigned short ch; if (flags & TCL_ENCODING_CHAR_LIMIT) { charLimit = dstCharsPtr; } result = TCL_OK; /* check alignment with ucs-2 (2 == sizeof(UCS-2)) / if ((srcLen % 2) != 0) { result = TCL_CONVERT_MULTIBYTE; srcLen--; } / If last code point is a high surrogate, we cannot handle that yet / if ((srcLen >= 2) && ((src[srcLen - 2] & 0xFC) == 0xD8)) { result = TCL_CONVERT_MULTIBYTE; srcLen -= 2; } srcStart = src; srcEnd = src + srcLen; dstStart = dst; dstEnd = dst + dstLen - 4; for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) { if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; break; } ch = (src[0] & 0xFF) << 8 \| (src[1] & 0xFF); src += 2 / sizeof(UTF-16) /; / * Special case for 1-byte utf chars for speed. Make sure we work with * unsigned short-size data. / if (ch && ch < 0x80) { dst++ = (ch & 0xFF); } else { dst += Tcl_UniCharToUtf(ch, dst); } } srcReadPtr = src - srcStart; dstWrotePtr = dst - dstStart; *dstCharsPtr = numChars; return result; }
︙			︙
572 573 574 575 576 577 578 ~~579 580 581 582 583~~ 584 585 586 587 588 ~~589~~ 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 ~~609~~ 610 611 612 ~~613 614~~ 615 616 ~~617 618 619~~ 620 621 622 623 624 625 626	* the conversion. / int dstCharsPtr) /* Filled with the number of characters that * correspond to the bytes stored in the * output buffer. / { const char srcStart, srcEnd, srcClose, dstStart, dstEnd; int result, numChars; ~~~~Tcl_UniChar chPtr = (Tcl_UniChar )statePtr;~~ ~~if (flags & TCL_ENCODING_START) {~~ statePtr = 0; }~~ srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; if (!(flags & TCL_ENCODING_END)) { ~~srcClose -= ~~TCL_UTF_MAX~~;~~ } dstStart = dst; dstEnd = dst + dstLen - 2 / sizeof(UCS-2) /; result = TCL_OK; for (numChars = 0; src < srcEnd; numChars++) { if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) { / * If there is more string to follow, this will ensure that the * last UTF-8 character in the source buffer hasn't been cut off. / result = TCL_CONVERT_MULTIBYTE; break; } if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; break; } ~~src += T~~cl_~~UtfToUniChar(src, ~~chPtr~~);~~ / * Ensure big-endianness (store big bits first). * XXX: This hard-codes the assumed size of Tcl_UniChar as 2. Make * sure to work in char* for Tcl_UtfToUniChar alignment. [Bug 1122671] / dst++ = (char)(chPtr >> 8); dst++ = (char)chPtr; } srcReadPtr = src - srcStart; dstWrotePtr = dst - dstStart; dstCharsPtr = numChars; return result; }	< \| < < < \| \| < < < \| \|	579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626	* the conversion. / int dstCharsPtr) /* Filled with the number of characters that * correspond to the bytes stored in the * output buffer. / { const char srcStart, srcEnd, srcClose, dstStart, dstEnd; int result, numChars; int ch; srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; if (!(flags & TCL_ENCODING_END)) { srcClose -= 6; } dstStart = dst; dstEnd = dst + dstLen - 2 /* sizeof(UCS-2) /; result = TCL_OK; for (numChars = 0; src < srcEnd; numChars++) { if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) { / * If there is more string to follow, this will ensure that the * last UTF-8 character in the source buffer hasn't been cut off. / result = TCL_CONVERT_MULTIBYTE; break; } if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; break; } src += TkUtfToUniChar(src, &ch); / * Ensure big-endianness (store big bits first). / dst++ = (char)(ch >> 8); dst++ = (char)ch; } srcReadPtr = src - srcStart; dstWrotePtr = dst - dstStart; dstCharsPtr = numChars; return result; }
︙			︙