Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Merge 8.7 |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk | main |
Files: | files | file ages | folders |
SHA3-256: |
341e1e54c4dfdf0c21bce7b2a99d146c |
User & Date: | jan.nijtmans 2021-03-17 15:05:02.324 |
Context
2021-03-18
| ||
09:13 | Merge 8.7 check-in: 702b3410f7 user: jan.nijtmans tags: trunk, main | |
2021-03-17
| ||
15:05 | Merge 8.7 check-in: 341e1e54c4 user: jan.nijtmans tags: trunk, main | |
15:01 | Simplify UtfToUtfProc() and UtfToUtf16Proc(): Using TclUtfToUCS4() internally means that we don't ha... check-in: 357301b2a8 user: jan.nijtmans tags: core-8-branch | |
09:07 | Merge 8.7 check-in: f4fc2b9a0d user: jan.nijtmans tags: trunk, main | |
Changes
Changes to generic/tclEncoding.c.
︙ | ︙ | |||
2209 2210 2211 2212 2213 2214 2215 | static int UtfToUtfProc( TCL_UNUSED(ClientData), const char *src, /* Source string in UTF-8. */ int srcLen, /* Source string length in bytes. */ int flags, /* Conversion control flags. */ | | < < < < | 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 | static int UtfToUtfProc( TCL_UNUSED(ClientData), const char *src, /* Source string in UTF-8. */ int srcLen, /* Source string length in bytes. */ int flags, /* Conversion control flags. */ TCL_UNUSED(Tcl_EncodingState *), char *dst, /* Output buffer in which converted string is * stored. */ int dstLen, /* The maximum length of output buffer in * bytes. */ int *srcReadPtr, /* Filled with the number of bytes from the * source string that were converted. This may * be less than the original source length if |
︙ | ︙ | |||
2236 2237 2238 2239 2240 2241 2242 | int pureNullMode) /* Convert embedded nulls from internal * representation to real null-bytes or vice * versa. Also combine or separate surrogate pairs */ { const char *srcStart, *srcEnd, *srcClose; const char *dstStart, *dstEnd; int result, numChars, charLimit = INT_MAX; | < | < < | | 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 | int pureNullMode) /* Convert embedded nulls from internal * representation to real null-bytes or vice * versa. Also combine or separate surrogate pairs */ { const char *srcStart, *srcEnd, *srcClose; const char *dstStart, *dstEnd; int result, numChars, charLimit = INT_MAX; int ch; result = TCL_OK; srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; if ((flags & TCL_ENCODING_END) == 0) { srcClose -= 6; |
︙ | ︙ | |||
2292 2293 2294 2295 2296 2297 2298 | } else if (!Tcl_UtfCharComplete(src, srcEnd - src)) { /* * Always check before using TclUtfToUCS4. Not doing can so * cause it run beyond the end of the buffer! If we happen such an * incomplete char its bytes are made to represent themselves. */ | | | | | | | | | | | | | | 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 | } else if (!Tcl_UtfCharComplete(src, srcEnd - src)) { /* * Always check before using TclUtfToUCS4. Not doing can so * cause it run beyond the end of the buffer! If we happen such an * incomplete char its bytes are made to represent themselves. */ ch = UCHAR(*src); src += 1; dst += Tcl_UniCharToUtf(ch, dst); } else { src += TclUtfToUCS4(src, &ch); if ((ch | 0x7FF) == 0xDFFF) { /* A surrogate character is detected, handle especially */ int low = ch; size_t len = (src <= srcEnd-3) ? TclUtfToUCS4(src, &low) : 0; if (((low & ~0x3FF) != 0xDC00) || (ch & 0x400)) { *dst++ = (char) (((ch >> 12) | 0xE0) & 0xEF); *dst++ = (char) (((ch >> 6) | 0x80) & 0xBF); *dst++ = (char) ((ch | 0x80) & 0xBF); continue; } src += len; dst += Tcl_UniCharToUtf(ch, dst); ch = low; } dst += Tcl_UniCharToUtf(ch, dst); } } *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; *dstCharsPtr = numChars; return result; |
︙ | ︙ | |||
2438 2439 2440 2441 2442 2443 2444 | static int UtfToUtf16Proc( ClientData clientData, /* != NULL means LE, == NUL means BE */ const char *src, /* Source string in UTF-8. */ int srcLen, /* Source string length in bytes. */ int flags, /* Conversion control flags. */ | | < < < < < | < < | | 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 | static int UtfToUtf16Proc( ClientData clientData, /* != NULL means LE, == NUL means BE */ const char *src, /* Source string in UTF-8. */ int srcLen, /* Source string length in bytes. */ int flags, /* Conversion control flags. */ TCL_UNUSED(Tcl_EncodingState *), char *dst, /* Output buffer in which converted string is * stored. */ int dstLen, /* The maximum length of output buffer in * bytes. */ int *srcReadPtr, /* Filled with the number of bytes from the * source string that were converted. This may * be less than the original source length if * there was a problem converting some source * characters. */ int *dstWrotePtr, /* Filled with the number of bytes that were * stored in the output buffer as a result of * the conversion. */ int *dstCharsPtr) /* Filled with the number of characters that * correspond to the bytes stored in the * output buffer. */ { const char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd; int result, numChars; int ch; srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; if ((flags & TCL_ENCODING_END) == 0) { srcClose -= TCL_UTF_MAX; } |
︙ | ︙ | |||
2491 2492 2493 2494 2495 2496 2497 | result = TCL_CONVERT_MULTIBYTE; break; } if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; break; } | | < < | | | | | | | < < < < < | | | | | | | < < < < | 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 | result = TCL_CONVERT_MULTIBYTE; break; } if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; break; } src += TclUtfToUCS4(src, &ch); if (clientData) { if (ch <= 0xFFFF) { *dst++ = (ch & 0xFF); *dst++ = (ch >> 8); } else { *dst++ = (((ch - 0x10000) >> 10) & 0xFF); *dst++ = (((ch - 0x10000) >> 18) & 0x3) | 0xD8; *dst++ = (ch & 0xFF); *dst++ = ((ch >> 8) & 0x3) | 0xDC; } } else { if (ch <= 0xFFFF) { *dst++ = (ch >> 8); *dst++ = (ch & 0xFF); } else { *dst++ = (((ch - 0x10000) >> 18) & 0x3) | 0xD8; *dst++ = (((ch - 0x10000) >> 10) & 0xFF); *dst++ = ((ch >> 8) & 0x3) | 0xDC; *dst++ = (ch & 0xFF); } } } *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; *dstCharsPtr = numChars; return result; } |
︙ | ︙ |