Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Merge 8.7 |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk | main |
Files: | files | file ages | folders |
SHA3-256: |
702b3410f77b9e3a4bfcb2d069468b57 |
User & Date: | jan.nijtmans 2021-03-18 09:13:22.739 |
Context
2021-03-18
| ||
09:52 | Merge 8.7 check-in: 587db5d2a3 user: jan.nijtmans tags: trunk, main | |
09:13 | Merge 8.7 check-in: 702b3410f7 user: jan.nijtmans tags: trunk, main | |
09:12 | More simplification of UtfToUtfProc(). Get rid of wrapper functions UtfIntToUtfExtProc/UtfExtToUtfIn... check-in: 5e32efda50 user: jan.nijtmans tags: core-8-branch | |
2021-03-17
| ||
15:05 | Merge 8.7 check-in: 341e1e54c4 user: jan.nijtmans tags: trunk, main | |
Changes
Changes to generic/tclEncoding.c.
︙ | ︙ | |||
216 217 218 219 220 221 222 | static Tcl_EncodingFreeProc TableFreeProc; static Tcl_EncodingConvertProc TableFromUtfProc; static Tcl_EncodingConvertProc TableToUtfProc; static size_t unilen(const char *src); static Tcl_EncodingConvertProc Utf16ToUtfProc; static Tcl_EncodingConvertProc UtfToUtf16Proc; static Tcl_EncodingConvertProc UtfToUcs2Proc; | < < < < < < | < | 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 | static Tcl_EncodingFreeProc TableFreeProc; static Tcl_EncodingConvertProc TableFromUtfProc; static Tcl_EncodingConvertProc TableToUtfProc; static size_t unilen(const char *src); static Tcl_EncodingConvertProc Utf16ToUtfProc; static Tcl_EncodingConvertProc UtfToUtf16Proc; static Tcl_EncodingConvertProc UtfToUcs2Proc; static Tcl_EncodingConvertProc UtfToUtfProc; static Tcl_EncodingConvertProc Iso88591FromUtfProc; static Tcl_EncodingConvertProc Iso88591ToUtfProc; /* * A Tcl_ObjType for holding a cached Tcl_Encoding in the twoPtrValue.ptr1 field * of the intrep. This should help the lifetime of encodings be more useful. * See concerns raised in [Bug 1077262]. |
︙ | ︙ | |||
513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 | * * Side effects: * Depends on the memory, object, and IO subsystems. * *--------------------------------------------------------------------------- */ void TclInitEncodingSubsystem(void) { Tcl_EncodingType type; TableEncodingData *dataPtr; unsigned size; unsigned short i; union { | > > > > | | | | | | | 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 | * * Side effects: * Depends on the memory, object, and IO subsystems. * *--------------------------------------------------------------------------- */ /* Those flags must not conflict with other TCL_ENCODING_* flags in tcl.h */ #define TCL_ENCODING_LE 0x40 /* Little-endian encoding */ #define TCL_ENCODING_EXTERNAL 0x80 /* Converting from internal to external variant */ void TclInitEncodingSubsystem(void) { Tcl_EncodingType type; TableEncodingData *dataPtr; unsigned size; unsigned short i; union { unsigned char c; short s; } isLe; if (encodingsInitialized) { return; } isLe.s = TCL_ENCODING_LE; Tcl_MutexLock(&encodingMutex); Tcl_InitHashTable(&encodingTable, TCL_STRING_KEYS); Tcl_MutexUnlock(&encodingMutex); /* * Create a few initial encodings. UTF-8 to UTF-8 translation is not a * no-op because it turns a stream of improperly formed UTF-8 into a * properly formed stream. */ type.encodingName = NULL; type.toUtfProc = BinaryProc; type.fromUtfProc = BinaryProc; type.freeProc = NULL; type.nullSize = 1; type.clientData = NULL; tclIdentityEncoding = Tcl_CreateEncoding(&type); type.encodingName = "utf-8"; type.toUtfProc = UtfToUtfProc; type.fromUtfProc = UtfToUtfProc; type.freeProc = NULL; type.nullSize = 1; type.clientData = NULL; Tcl_CreateEncoding(&type); type.toUtfProc = Utf16ToUtfProc; type.fromUtfProc = UtfToUcs2Proc; type.freeProc = NULL; type.nullSize = 2; type.encodingName = "ucs-2le"; type.clientData = INT2PTR(TCL_ENCODING_LE); Tcl_CreateEncoding(&type); type.encodingName = "ucs-2be"; type.clientData = INT2PTR(0); Tcl_CreateEncoding(&type); type.encodingName = "ucs-2"; type.clientData = INT2PTR(isLe.c); Tcl_CreateEncoding(&type); type.toUtfProc = Utf16ToUtfProc; type.fromUtfProc = UtfToUtf16Proc; type.freeProc = NULL; type.nullSize = 2; type.encodingName = "utf-16le"; type.clientData = INT2PTR(TCL_ENCODING_LE); Tcl_CreateEncoding(&type); type.encodingName = "utf-16be"; type.clientData = INT2PTR(0); Tcl_CreateEncoding(&type); type.encodingName = "utf-16"; type.clientData = INT2PTR(isLe.c); Tcl_CreateEncoding(&type); |
︙ | ︙ | |||
1265 1266 1267 1268 1269 1270 1271 | srcLen = 0; } else if (srcLen == TCL_INDEX_NONE) { srcLen = strlen(src); } flags = TCL_ENCODING_START | TCL_ENCODING_END; while (1) { result = encodingPtr->fromUtfProc(encodingPtr->clientData, src, | | | 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 | srcLen = 0; } else if (srcLen == TCL_INDEX_NONE) { srcLen = strlen(src); } flags = TCL_ENCODING_START | TCL_ENCODING_END; while (1) { result = encodingPtr->fromUtfProc(encodingPtr->clientData, src, srcLen, flags | TCL_ENCODING_EXTERNAL, &state, dst, dstLen, &srcRead, &dstWrote, &dstChars); soFar = dst + dstWrote - Tcl_DStringValue(dstPtr); if (result != TCL_CONVERT_NOSPACE) { if (encodingPtr->nullSize == 2) { Tcl_DStringSetLength(dstPtr, soFar + 1); } |
︙ | ︙ | |||
1367 1368 1369 1370 1371 1372 1373 | } if (dstCharsPtr == NULL) { dstCharsPtr = &dstChars; } dstLen -= encodingPtr->nullSize; result = encodingPtr->fromUtfProc(encodingPtr->clientData, src, srcLen, | | | 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 | } if (dstCharsPtr == NULL) { dstCharsPtr = &dstChars; } dstLen -= encodingPtr->nullSize; result = encodingPtr->fromUtfProc(encodingPtr->clientData, src, srcLen, flags | TCL_ENCODING_EXTERNAL, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr, dstCharsPtr); if (encodingPtr->nullSize == 2) { dst[*dstWrotePtr + 1] = '\0'; } dst[*dstWrotePtr] = '\0'; return result; |
︙ | ︙ | |||
2090 2091 2092 2093 2094 2095 2096 | memcpy(dst, src, srcLen); return result; } /* *------------------------------------------------------------------------- * | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 | memcpy(dst, src, srcLen); return result; } /* *------------------------------------------------------------------------- * * UtfToUtfProc -- * * Convert from UTF-8 to UTF-8. Note that the UTF-8 to UTF-8 translation * is not a no-op, because it will turn a stream of improperly formed * UTF-8 into a properly formed stream. * * Results: |
︙ | ︙ | |||
2222 2223 2224 2225 2226 2227 2228 | * source string that were converted. This may * be less than the original source length if * there was a problem converting some source * characters. */ int *dstWrotePtr, /* Filled with the number of bytes that were * stored in the output buffer as a result of * the conversion. */ | | < < < | 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 | * source string that were converted. This may * be less than the original source length if * there was a problem converting some source * characters. */ int *dstWrotePtr, /* Filled with the number of bytes that were * stored in the output buffer as a result of * the conversion. */ int *dstCharsPtr) /* Filled with the number of characters that * correspond to the bytes stored in the * output buffer. */ { const char *srcStart, *srcEnd, *srcClose; const char *dstStart, *dstEnd; int result, numChars, charLimit = INT_MAX; int ch; result = TCL_OK; |
︙ | ︙ | |||
2263 2264 2265 2266 2267 2268 2269 | result = TCL_CONVERT_MULTIBYTE; break; } if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; break; } | | | | | 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 | result = TCL_CONVERT_MULTIBYTE; break; } if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; break; } if ((UCHAR(*src - 1) < 0x7F) && !(flags & TCL_ENCODING_EXTERNAL)) { /* * Copy 7bit characters, but skip null-bytes when we are in input * mode, so that they get converted to 0xC080. */ *dst++ = *src++; } else if (UCHAR(*src) == 0xC0 && (src + 1 < srcEnd) && UCHAR(src[1]) == 0x80 && (flags & TCL_ENCODING_EXTERNAL)) { /* * Convert 0xC080 to real nulls when we are in output mode. */ *dst++ = 0; src += 2; } else if (!Tcl_UtfCharComplete(src, srcEnd - src)) { |
︙ | ︙ | |||
2332 2333 2334 2335 2336 2337 2338 | * None. * *------------------------------------------------------------------------- */ static int Utf16ToUtfProc( | | | 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 | * None. * *------------------------------------------------------------------------- */ static int Utf16ToUtfProc( ClientData clientData, /* additional flags, e.g. TCL_ENCODING_LE */ const char *src, /* Source string in Unicode. */ int srcLen, /* Source string length in bytes. */ int flags, /* Conversion control flags. */ TCL_UNUSED(Tcl_EncodingState *), char *dst, /* Output buffer in which converted string is * stored. */ int dstLen, /* The maximum length of output buffer in |
︙ | ︙ | |||
2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 | * output buffer. */ { const char *srcStart, *srcEnd; const char *dstEnd, *dstStart; int result, numChars, charLimit = INT_MAX; unsigned short ch; if (flags & TCL_ENCODING_CHAR_LIMIT) { charLimit = *dstCharsPtr; } result = TCL_OK; /* check alignment with utf-16 (2 == sizeof(UTF-16)) */ if ((srcLen % 2) != 0) { result = TCL_CONVERT_MULTIBYTE; srcLen--; } /* If last code point is a high surrogate, we cannot handle that yet */ | > | | | 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 | * output buffer. */ { const char *srcStart, *srcEnd; const char *dstEnd, *dstStart; int result, numChars, charLimit = INT_MAX; unsigned short ch; flags |= PTR2INT(clientData); if (flags & TCL_ENCODING_CHAR_LIMIT) { charLimit = *dstCharsPtr; } result = TCL_OK; /* check alignment with utf-16 (2 == sizeof(UTF-16)) */ if ((srcLen % 2) != 0) { result = TCL_CONVERT_MULTIBYTE; srcLen--; } /* If last code point is a high surrogate, we cannot handle that yet */ if ((srcLen >= 2) && ((src[srcLen - ((flags & TCL_ENCODING_LE)?1:2)] & 0xFC) == 0xD8)) { result = TCL_CONVERT_MULTIBYTE; srcLen-= 2; } srcStart = src; srcEnd = src + srcLen; dstStart = dst; dstEnd = dst + dstLen - TCL_UTF_MAX; for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) { if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; break; } if (flags & TCL_ENCODING_LE) { ch = (src[1] & 0xFF) << 8 | (src[0] & 0xFF); } else { ch = (src[0] & 0xFF) << 8 | (src[1] & 0xFF); } /* * Special case for 1-byte utf chars for speed. Make sure we work with * unsigned short-size data. |
︙ | ︙ | |||
2524 2525 2526 2527 2528 2529 2530 | * None. * *------------------------------------------------------------------------- */ static int UtfToUcs2Proc( | | | 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 | * None. * *------------------------------------------------------------------------- */ static int UtfToUcs2Proc( ClientData clientData, /* additional flags, e.g. TCL_ENCODING_LE */ const char *src, /* Source string in UTF-8. */ int srcLen, /* Source string length in bytes. */ int flags, /* Conversion control flags. */ TCL_UNUSED(Tcl_EncodingState *), char *dst, /* Output buffer in which converted string is * stored. */ int dstLen, /* The maximum length of output buffer in |
︙ | ︙ | |||
2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 | const char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd; int result, numChars; #if TCL_UTF_MAX <= 3 int len; #endif Tcl_UniChar ch = 0; srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; if ((flags & TCL_ENCODING_END) == 0) { srcClose -= TCL_UTF_MAX; } | > | 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 | const char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd; int result, numChars; #if TCL_UTF_MAX <= 3 int len; #endif Tcl_UniChar ch = 0; flags |= PTR2INT(clientData); srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; if ((flags & TCL_ENCODING_END) == 0) { srcClose -= TCL_UTF_MAX; } |
︙ | ︙ | |||
2595 2596 2597 2598 2599 2600 2601 | #endif /* * Need to handle this in a way that won't cause misalignment by * casting dst to a Tcl_UniChar. [Bug 1122671] */ | | | 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 | #endif /* * Need to handle this in a way that won't cause misalignment by * casting dst to a Tcl_UniChar. [Bug 1122671] */ if (flags & TCL_ENCODING_LE) { *dst++ = (ch & 0xFF); *dst++ = (ch >> 8); } else { *dst++ = (ch >> 8); *dst++ = (ch & 0xFF); } } |
︙ | ︙ |