Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Changes In Branch tip-547 Excluding Merge-Ins
This is equivalent to a diff from 34ea059cbf to 25074a8c8b
2019-06-28
| ||
22:36 | Implement TIP #547: New encodings: UTF-16, UCS-2 check-in: 2cef9a0691 user: jan.nijtmans tags: core-8-branch | |
2019-06-27
| ||
13:40 | Fix [15d851e394]: Fix spelling in comment of tclCmdIL.c check-in: e138330fe5 user: jan.nijtmans tags: core-8-branch | |
13:08 | merge 8.7 check-in: a1193f802a user: dgp tags: core-8-7-a3-rc | |
2019-06-26
| ||
20:08 | Merge tip-547 check-in: 6f4e263391 user: jan.nijtmans tags: tip-548 | |
20:06 | Merge 8.7 Closed-Leaf check-in: 25074a8c8b user: jan.nijtmans tags: tip-547 | |
08:34 | Merge 8.7 check-in: 7723dac835 user: jan.nijtmans tags: trunk | |
08:32 | Merge 8.6 check-in: 34ea059cbf user: jan.nijtmans tags: core-8-branch | |
08:27 | UNEXEC win/tclWinFile.c check-in: eaf145bcd7 user: jan.nijtmans tags: core-8-6-branch | |
2019-06-25
| ||
22:32 | Merge 8.6. Also fix mp_get_long_long for VC++ 6.0 (will be deprecated by libtommath) check-in: f4b325d680 user: jan.nijtmans tags: core-8-branch | |
2019-06-16
| ||
11:37 | Merge 8.7 check-in: f9c29c1321 user: jan.nijtmans tags: tip-547 | |
Changes to generic/tclEncoding.c.
230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 ... 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 ... 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 .... 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 .... 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 .... 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 .... 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 .... 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 |
int *srcReadPtr, int *dstWrotePtr, int *dstCharsPtr); static int TableToUtfProc(ClientData clientData, const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, int *dstCharsPtr); static size_t unilen(const char *src); static int UniCharToUtfProc(ClientData clientData, const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, int *dstCharsPtr); static int UtfToUniCharProc(ClientData clientData, const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, int *dstCharsPtr); static int UtfToUtfProc(ClientData clientData, const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, ................................................................................ void TclInitEncodingSubsystem(void) { Tcl_EncodingType type; TableEncodingData *dataPtr; unsigned size; unsigned short i; if (encodingsInitialized) { return; } Tcl_MutexLock(&encodingMutex); Tcl_InitHashTable(&encodingTable, TCL_STRING_KEYS); Tcl_MutexUnlock(&encodingMutex); /* * Create a few initial encodings. Note that the UTF-8 to UTF-8 * translation is not a no-op, because it will turn a stream of improperly ................................................................................ type.toUtfProc = UtfExtToUtfIntProc; type.fromUtfProc = UtfIntToUtfExtProc; type.freeProc = NULL; type.nullSize = 1; type.clientData = NULL; Tcl_CreateEncoding(&type); type.encodingName = "unicode"; type.toUtfProc = UniCharToUtfProc; type.fromUtfProc = UtfToUniCharProc; type.freeProc = NULL; type.nullSize = 2; type.clientData = NULL; Tcl_CreateEncoding(&type); /* * Need the iso8859-1 encoding in order to process binary data, so force * it to always be embedded. Note that this encoding *must* be a proper * table encoding or some of the escape encodings crash! Hence the ugly * code to duplicate the structure of a table encoding here. */ ................................................................................ result = encodingPtr->toUtfProc(encodingPtr->clientData, src, srcLen, flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr, dstCharsPtr); if (*dstCharsPtr <= maxChars) { break; } dstLen = Tcl_UtfAtIndex(dst, maxChars) - 1 - dst + TCL_UTF_MAX; flags = savedFlags; *statePtr = savedState; } while (1); if (!noTerminate) { /* ...and then append it */ dst[*dstWrotePtr] = '\0'; ................................................................................ *dstCharsPtr = numChars; return result; } /* *------------------------------------------------------------------------- * * UniCharToUtfProc -- * * Convert from Unicode to UTF-8. * * Results: * Returns TCL_OK if conversion was successful. * * Side effects: * None. * *------------------------------------------------------------------------- */ static int UniCharToUtfProc( ClientData clientData, /* Not used. */ const char *src, /* Source string in Unicode. */ int srcLen, /* Source string length in bytes. */ int flags, /* Conversion control flags. */ Tcl_EncodingState *statePtr,/* Place for conversion routine to store state * information used during a piecewise * conversion. Contents of statePtr are * initialized and/or reset by conversion ................................................................................ for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) { if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; break; } /* * Special case for 1-byte utf chars for speed. Make sure we work with * unsigned short-size data. */ ch = *(unsigned short *)src; if (ch && ch < 0x80) { *dst++ = (ch & 0xFF); } else { dst += Tcl_UniCharToUtf(ch, dst); } src += sizeof(unsigned short); } ................................................................................ *dstCharsPtr = numChars; return result; } /* *------------------------------------------------------------------------- * * UtfToUniCharProc -- * * Convert from UTF-8 to Unicode. * * Results: * Returns TCL_OK if conversion was successful. * * Side effects: * None. * *------------------------------------------------------------------------- */ static int UtfToUniCharProc( ClientData clientData, /* TableEncodingData that specifies * encoding. */ const char *src, /* Source string in UTF-8. */ int srcLen, /* Source string length in bytes. */ int flags, /* Conversion control flags. */ Tcl_EncodingState *statePtr,/* Place for conversion routine to store state * information used during a piecewise * conversion. Contents of statePtr are * initialized and/or reset by conversion ................................................................................ src += TclUtfToUniChar(src, chPtr); /* * Need to handle this in a way that won't cause misalignment by * casting dst to a Tcl_UniChar. [Bug 1122671] */ #ifdef WORDS_BIGENDIAN #if TCL_UTF_MAX > 4 if (*chPtr <= 0xFFFF) { *dst++ = (*chPtr >> 8); *dst++ = (*chPtr & 0xFF); } else { *dst++ = ((*chPtr & 0x3) >> 8) | 0xDC; *dst++ = (*chPtr & 0xFF); *dst++ = (((*chPtr - 0x10000) >> 18) & 0x3) | 0xD8; *dst++ = (((*chPtr - 0x10000) >> 10) & 0xFF); } #else *dst++ = (*chPtr >> 8); *dst++ = (*chPtr & 0xFF); #endif #else #if TCL_UTF_MAX > 4 if (*chPtr <= 0xFFFF) { *dst++ = (*chPtr & 0xFF); *dst++ = (*chPtr >> 8); } else { *dst++ = (((*chPtr - 0x10000) >> 10) & 0xFF); *dst++ = (((*chPtr - 0x10000) >> 18) & 0x3) | 0xD8; *dst++ = (*chPtr & 0xFF); *dst++ = ((*chPtr & 0x3) >> 8) | 0xDC; } #else *dst++ = (*chPtr & 0xFF); *dst++ = (*chPtr >> 8); #endif #endif } *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; *dstCharsPtr = numChars; return result; } /* *------------------------------------------------------------------------- * * TableToUtfProc -- * * Convert from the encoding specified by the TableEncodingData into * UTF-8. |
| > > > > > | > > > > > > > > > > > > > > > | > > > | | > | > > > > > > > > > > > | | | | | > > > > > < < | | | | < | | | | | | < | > | | | | | | | | | | < | > | | | | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | |
230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 ... 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 ... 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 .... 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 .... 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 .... 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 .... 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 .... 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 |
int *srcReadPtr, int *dstWrotePtr, int *dstCharsPtr); static int TableToUtfProc(ClientData clientData, const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, int *dstCharsPtr); static size_t unilen(const char *src); static int Utf16ToUtfProc(ClientData clientData, const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, int *dstCharsPtr); static int UtfToUtf16Proc(ClientData clientData, const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, int *dstCharsPtr); static int UtfToUcs2Proc(ClientData clientData, const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, int *dstCharsPtr); static int UtfToUtfProc(ClientData clientData, const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, ................................................................................ void TclInitEncodingSubsystem(void) { Tcl_EncodingType type; TableEncodingData *dataPtr; unsigned size; unsigned short i; union { char c; short s; } isLe; if (encodingsInitialized) { return; } isLe.s = 1; Tcl_MutexLock(&encodingMutex); Tcl_InitHashTable(&encodingTable, TCL_STRING_KEYS); Tcl_MutexUnlock(&encodingMutex); /* * Create a few initial encodings. Note that the UTF-8 to UTF-8 * translation is not a no-op, because it will turn a stream of improperly ................................................................................ type.toUtfProc = UtfExtToUtfIntProc; type.fromUtfProc = UtfIntToUtfExtProc; type.freeProc = NULL; type.nullSize = 1; type.clientData = NULL; Tcl_CreateEncoding(&type); type.toUtfProc = Utf16ToUtfProc; type.fromUtfProc = UtfToUcs2Proc; type.freeProc = NULL; type.nullSize = 2; type.encodingName = "ucs-2le"; type.clientData = INT2PTR(1); Tcl_CreateEncoding(&type); type.encodingName = "ucs-2be"; type.clientData = INT2PTR(0); Tcl_CreateEncoding(&type); type.encodingName = "ucs-2"; type.clientData = INT2PTR(isLe.c); Tcl_CreateEncoding(&type); type.toUtfProc = Utf16ToUtfProc; type.fromUtfProc = UtfToUtf16Proc; type.freeProc = NULL; type.nullSize = 2; type.encodingName = "utf-16le"; type.clientData = INT2PTR(1);; Tcl_CreateEncoding(&type); type.encodingName = "utf-16be"; type.clientData = INT2PTR(0); Tcl_CreateEncoding(&type); type.encodingName = "utf-16"; type.clientData = INT2PTR(isLe.c);; Tcl_CreateEncoding(&type); #ifndef TCL_NO_DEPRECATED type.encodingName = "unicode"; Tcl_CreateEncoding(&type); #endif /* * Need the iso8859-1 encoding in order to process binary data, so force * it to always be embedded. Note that this encoding *must* be a proper * table encoding or some of the escape encodings crash! Hence the ugly * code to duplicate the structure of a table encoding here. */ ................................................................................ result = encodingPtr->toUtfProc(encodingPtr->clientData, src, srcLen, flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr, dstCharsPtr); if (*dstCharsPtr <= maxChars) { break; } dstLen = Tcl_UtfAtIndex(dst, maxChars) - dst + (TCL_UTF_MAX - 1); flags = savedFlags; *statePtr = savedState; } while (1); if (!noTerminate) { /* ...and then append it */ dst[*dstWrotePtr] = '\0'; ................................................................................ *dstCharsPtr = numChars; return result; } /* *------------------------------------------------------------------------- * * Utf16ToUtfProc -- * * Convert from UTF-16 to UTF-8. * * Results: * Returns TCL_OK if conversion was successful. * * Side effects: * None. * *------------------------------------------------------------------------- */ static int Utf16ToUtfProc( ClientData clientData, /* != NULL means LE, == NUL means BE */ const char *src, /* Source string in Unicode. */ int srcLen, /* Source string length in bytes. */ int flags, /* Conversion control flags. */ Tcl_EncodingState *statePtr,/* Place for conversion routine to store state * information used during a piecewise * conversion. Contents of statePtr are * initialized and/or reset by conversion ................................................................................ for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) { if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; break; } if (clientData) { ch = (src[1] & 0xFF) << 8 | (src[0] & 0xFF); } else { ch = (src[0] & 0xFF) << 8 | (src[1] & 0xFF); } /* * Special case for 1-byte utf chars for speed. Make sure we work with * unsigned short-size data. */ if (ch && ch < 0x80) { *dst++ = (ch & 0xFF); } else { dst += Tcl_UniCharToUtf(ch, dst); } src += sizeof(unsigned short); } ................................................................................ *dstCharsPtr = numChars; return result; } /* *------------------------------------------------------------------------- * * UtfToUtf16Proc -- * * Convert from UTF-8 to UTF-16. * * Results: * Returns TCL_OK if conversion was successful. * * Side effects: * None. * *------------------------------------------------------------------------- */ static int UtfToUtf16Proc( ClientData clientData, /* != NULL means LE, == NUL means BE */ const char *src, /* Source string in UTF-8. */ int srcLen, /* Source string length in bytes. */ int flags, /* Conversion control flags. */ Tcl_EncodingState *statePtr,/* Place for conversion routine to store state * information used during a piecewise * conversion. Contents of statePtr are * initialized and/or reset by conversion ................................................................................ src += TclUtfToUniChar(src, chPtr); /* * Need to handle this in a way that won't cause misalignment by * casting dst to a Tcl_UniChar. [Bug 1122671] */ if (clientData) { #if TCL_UTF_MAX > 4 if (*chPtr <= 0xFFFF) { *dst++ = (*chPtr & 0xFF); *dst++ = (*chPtr >> 8); } else { *dst++ = (((*chPtr - 0x10000) >> 10) & 0xFF); *dst++ = (((*chPtr - 0x10000) >> 18) & 0x3) | 0xD8; *dst++ = (*chPtr & 0xFF); *dst++ = ((*chPtr & 0x3) >> 8) | 0xDC; } #else *dst++ = (*chPtr & 0xFF); *dst++ = (*chPtr >> 8); #endif } else { #if TCL_UTF_MAX > 4 if (*chPtr <= 0xFFFF) { *dst++ = (*chPtr >> 8); *dst++ = (*chPtr & 0xFF); } else { *dst++ = ((*chPtr & 0x3) >> 8) | 0xDC; *dst++ = (*chPtr & 0xFF); *dst++ = (((*chPtr - 0x10000) >> 18) & 0x3) | 0xD8; *dst++ = (((*chPtr - 0x10000) >> 10) & 0xFF); } #else *dst++ = (*chPtr >> 8); *dst++ = (*chPtr & 0xFF); #endif } } *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; *dstCharsPtr = numChars; return result; } /* *------------------------------------------------------------------------- * * UtfToUcs2Proc -- * * Convert from UTF-8 to UCS-2. * * Results: * Returns TCL_OK if conversion was successful. * * Side effects: * None. * *------------------------------------------------------------------------- */ static int UtfToUcs2Proc( ClientData clientData, /* != NULL means LE, == NUL means BE */ const char *src, /* Source string in UTF-8. */ int srcLen, /* Source string length in bytes. */ int flags, /* Conversion control flags. */ Tcl_EncodingState *statePtr,/* Place for conversion routine to store state * information used during a piecewise * conversion. Contents of statePtr are * initialized and/or reset by conversion * routine under control of flags argument. */ char *dst, /* Output buffer in which converted string is * stored. */ int dstLen, /* The maximum length of output buffer in * bytes. */ int *srcReadPtr, /* Filled with the number of bytes from the * source string that were converted. This may * be less than the original source length if * there was a problem converting some source * characters. */ int *dstWrotePtr, /* Filled with the number of bytes that were * stored in the output buffer as a result of * the conversion. */ int *dstCharsPtr) /* Filled with the number of characters that * correspond to the bytes stored in the * output buffer. */ { const char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd; int result, numChars; #if TCL_UTF_MAX <= 4 int len; #endif Tcl_UniChar ch = 0; srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; if ((flags & TCL_ENCODING_END) == 0) { srcClose -= TCL_UTF_MAX; } dstStart = dst; dstEnd = dst + dstLen - sizeof(Tcl_UniChar); result = TCL_OK; for (numChars = 0; src < srcEnd; numChars++) { if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) { /* * If there is more string to follow, this will ensure that the * last UTF-8 character in the source buffer hasn't been cut off. */ result = TCL_CONVERT_MULTIBYTE; break; } if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; break; } #if TCL_UTF_MAX <= 4 src += (len = TclUtfToUniChar(src, &ch)); if ((ch >= 0xD800) && (len < 3)) { src += TclUtfToUniChar(src, &ch); ch = 0xFFFD; } #else src += TclUtfToUniChar(src, &ch); if (ch > 0xFFFF) { ch = 0xFFFD; } #endif /* * Need to handle this in a way that won't cause misalignment by * casting dst to a Tcl_UniChar. [Bug 1122671] */ if (clientData) { *dst++ = (ch & 0xFF); *dst++ = (ch >> 8); } else { *dst++ = (ch >> 8); *dst++ = (ch & 0xFF); } } *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; *dstCharsPtr = numChars; return result; } /* *------------------------------------------------------------------------- * * TableToUtfProc -- * * Convert from the encoding specified by the TableEncodingData into * UTF-8. |
Changes to tests/binary.test.
2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 |
apply {{a b} {
set one [binary format H* $a]
return $one[binary format H* $b]
}} ab cd
} [binary format H* abcd]
test binary-78.1 {unicode (out of BMP) to byte-array conversion, bug-[bd94500678]} -body {
# just test for BO-segfault (high surrogate w/o advance source pointer for out of BMP char if TCL_UTF_MAX <= 4):
binary encode hex \U0001f415
binary scan \U0001f415 a* v; set v
set str {}
} -result {}
# ----------------------------------------------------------------------
# cleanup
|
| |
2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 |
apply {{a b} {
set one [binary format H* $a]
return $one[binary format H* $b]
}} ab cd
} [binary format H* abcd]
test binary-78.1 {unicode (out of BMP) to byte-array conversion, bug-[bd94500678]} -body {
# just test for BO-segfault (high surrogate w/o advance source pointer for out of BMP char if TCL_UTF_MAX == 3):
binary encode hex \U0001f415
binary scan \U0001f415 a* v; set v
set str {}
} -result {}
# ----------------------------------------------------------------------
# cleanup
|
Changes to tests/chanio.test.
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
....
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
|
} -result {bbbbbbbbbbbbbbb 15 123456789abcdef 1 4 abcd 0 3 efg} test chan-io-6.45 {Tcl_GetsObj: input saw cr, skip right number of bytes} -setup { set x "" } -constraints {stdio testchannel openpipe fileevent} -body { # Tcl_ExternalToUtf() set f [openpipe w+ $path(cat)] chan configure $f -translation {auto lf} -buffering none chan configure $f -encoding unicode chan puts -nonewline $f "bbbbbbbbbbbbbbb\n123456789abcdef\r" chan configure $f -buffersize 16 chan gets $f chan configure $f -blocking 0 lappend x [chan gets $f line] $line [testchannel queuedcr $f] chan configure $f -blocking 1 chan puts -nonewline $f "\nabcd\refg" ................................................................................ # not (bufPtr->nextPtr == NULL) set f [openpipe w+ $path(cat)] chan configure $f -translation lf -encoding ascii -buffering none chan puts -nonewline $f "123456789012345\r\nbcdefghijklmnopqrstuvwxyz" chan event $f read [namespace code { lappend x [chan gets $f line] $line [testchannel inputbuffered $f] }] chan configure $f -encoding unicode -buffersize 16 -blocking 0 vwait [namespace which -variable x] chan configure $f -translation auto -encoding ascii -blocking 1 # here vwait [namespace which -variable x] return $x } -cleanup { chan close $f |
|
|
|
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
....
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
|
} -result {bbbbbbbbbbbbbbb 15 123456789abcdef 1 4 abcd 0 3 efg} test chan-io-6.45 {Tcl_GetsObj: input saw cr, skip right number of bytes} -setup { set x "" } -constraints {stdio testchannel openpipe fileevent} -body { # Tcl_ExternalToUtf() set f [openpipe w+ $path(cat)] chan configure $f -translation {auto lf} -buffering none chan configure $f -encoding utf-16 chan puts -nonewline $f "bbbbbbbbbbbbbbb\n123456789abcdef\r" chan configure $f -buffersize 16 chan gets $f chan configure $f -blocking 0 lappend x [chan gets $f line] $line [testchannel queuedcr $f] chan configure $f -blocking 1 chan puts -nonewline $f "\nabcd\refg" ................................................................................ # not (bufPtr->nextPtr == NULL) set f [openpipe w+ $path(cat)] chan configure $f -translation lf -encoding ascii -buffering none chan puts -nonewline $f "123456789012345\r\nbcdefghijklmnopqrstuvwxyz" chan event $f read [namespace code { lappend x [chan gets $f line] $line [testchannel inputbuffered $f] }] chan configure $f -encoding utf-16 -buffersize 16 -blocking 0 vwait [namespace which -variable x] chan configure $f -translation auto -encoding ascii -blocking 1 # here vwait [namespace which -variable x] return $x } -cleanup { chan close $f |
Changes to tests/encoding.test.
318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 |
} 00 test encoding-15.3 {UtfToUtfProc null character input} teststringbytes { set y [encoding convertfrom utf-8 [encoding convertto utf-8 \u0000]] binary scan [teststringbytes $y] H* z set z } c080 test encoding-16.1 {UnicodeToUtfProc} -body { set val [encoding convertfrom unicode NN] list $val [format %x [scan $val %c]] } -result "\u4e4e 4e4e" test encoding-16.2 {UnicodeToUtfProc} -body { set val [encoding convertfrom unicode "\xd8\xd8\xdc\xdc"] list $val [format %x [scan $val %c]] } -result "\U460dc 460dc" test encoding-17.1 {UtfToUnicodeProc} -body { encoding convertto unicode "\U460dc" } -result "\xd8\xd8\xdc\xdc" test encoding-18.1 {TableToUtfProc} { } {} test encoding-19.1 {TableFromUtfProc} { } {} |
| | | > > > > | > > > > | | > > > |
318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 |
} 00 test encoding-15.3 {UtfToUtfProc null character input} teststringbytes { set y [encoding convertfrom utf-8 [encoding convertto utf-8 \u0000]] binary scan [teststringbytes $y] H* z set z } c080 test encoding-16.1 {Utf16ToUtfProc} -body { set val [encoding convertfrom utf-16 NN] list $val [format %x [scan $val %c]] } -result "\u4e4e 4e4e" test encoding-16.2 {Utf16ToUtfProc} -body { set val [encoding convertfrom utf-16 "\xd8\xd8\xdc\xdc"] list $val [format %x [scan $val %c]] } -result "\U460dc 460dc" test encoding-16.3 {Ucs2ToUtfProc} -body { set val [encoding convertfrom ucs-2 NN] list $val [format %x [scan $val %c]] } -result "\u4e4e 4e4e" test encoding-16.4 {Ucs2ToUtfProc} -body { set val [encoding convertfrom ucs-2 "\xd8\xd8\xdc\xdc"] list $val [format %x [scan $val %c]] } -result "\U460dc 460dc" test encoding-17.1 {UtfToUtf16Proc} -body { encoding convertto utf-16 "\U460dc" } -result "\xd8\xd8\xdc\xdc" test encoding-17.2 {UtfToUcs2Proc} -body { encoding convertfrom utf-16 [encoding convertto ucs-2 "\U460dc"] } -result "\ufffd" test encoding-18.1 {TableToUtfProc} { } {} test encoding-19.1 {TableFromUtfProc} { } {} |
Changes to tests/io.test.
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
....
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
|
set x } [list "bbbbbbbbbbbbbbb" 15 "123456789abcdef" 1 4 "abcd" 0 3 "efg"] test io-6.45 {Tcl_GetsObj: input saw cr, skip right number of bytes} {stdio testchannel openpipe fileevent} { # Tcl_ExternalToUtf() set f [open "|[list [interpreter] $path(cat)]" w+] fconfigure $f -translation {auto lf} -buffering none fconfigure $f -encoding unicode puts -nonewline $f "bbbbbbbbbbbbbbb\n123456789abcdef\r" fconfigure $f -buffersize 16 gets $f fconfigure $f -blocking 0 set x [list [gets $f line] $line [testchannel queuedcr $f]] fconfigure $f -blocking 1 puts -nonewline $f "\nabcd\refg" ................................................................................ puts -nonewline $f "123456789012345\r\nbcdefghijklmnopqrstuvwxyz" variable x {} fileevent $f read [namespace code "ready $f"] proc ready {f} { variable x lappend x [gets $f line] $line [testchannel inputbuffered $f] } fconfigure $f -encoding unicode -buffersize 16 -blocking 0 vwait [namespace which -variable x] fconfigure $f -translation auto -encoding ascii -blocking 1 # here vwait [namespace which -variable x] close $f set x } [list -1 "" 42 15 "123456789012345" 25] |
|
|
|
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
....
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
|
set x } [list "bbbbbbbbbbbbbbb" 15 "123456789abcdef" 1 4 "abcd" 0 3 "efg"] test io-6.45 {Tcl_GetsObj: input saw cr, skip right number of bytes} {stdio testchannel openpipe fileevent} { # Tcl_ExternalToUtf() set f [open "|[list [interpreter] $path(cat)]" w+] fconfigure $f -translation {auto lf} -buffering none fconfigure $f -encoding utf-16 puts -nonewline $f "bbbbbbbbbbbbbbb\n123456789abcdef\r" fconfigure $f -buffersize 16 gets $f fconfigure $f -blocking 0 set x [list [gets $f line] $line [testchannel queuedcr $f]] fconfigure $f -blocking 1 puts -nonewline $f "\nabcd\refg" ................................................................................ puts -nonewline $f "123456789012345\r\nbcdefghijklmnopqrstuvwxyz" variable x {} fileevent $f read [namespace code "ready $f"] proc ready {f} { variable x lappend x [gets $f line] $line [testchannel inputbuffered $f] } fconfigure $f -encoding utf-16 -buffersize 16 -blocking 0 vwait [namespace which -variable x] fconfigure $f -translation auto -encoding ascii -blocking 1 # here vwait [namespace which -variable x] close $f set x } [list -1 "" 42 15 "123456789012345" 25] |
Changes to tests/ioCmd.test.
237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 |
test iocmd-8.6 {fconfigure command} -returnCodes error -body { fconfigure stdin -translation froboz } -result {bad value for -translation: must be one of auto, binary, cr, lf, crlf, or platform} test iocmd-8.7 {fconfigure command} -setup { file delete $path(test1) } -body { set f1 [open $path(test1) w] fconfigure $f1 -translation lf -eofchar {} -encoding unicode fconfigure $f1 } -cleanup { catch {close $f1} } -result {-blocking 1 -buffering full -buffersize 4096 -encoding unicode -eofchar {} -translation lf} test iocmd-8.8 {fconfigure command} -setup { file delete $path(test1) set x {} } -body { set f1 [open $path(test1) w] fconfigure $f1 -translation lf -buffering line -buffersize 3030 \ -eofchar {} -encoding unicode lappend x [fconfigure $f1 -buffering] lappend x [fconfigure $f1] } -cleanup { catch {close $f1} } -result {line {-blocking 1 -buffering line -buffersize 3030 -encoding unicode -eofchar {} -translation lf}} test iocmd-8.9 {fconfigure command} -setup { file delete $path(test1) } -body { set f1 [open $path(test1) w] fconfigure $f1 -translation binary -buffering none -buffersize 4040 \ -eofchar {} -encoding binary fconfigure $f1 |
| | | | |
237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 |
test iocmd-8.6 {fconfigure command} -returnCodes error -body { fconfigure stdin -translation froboz } -result {bad value for -translation: must be one of auto, binary, cr, lf, crlf, or platform} test iocmd-8.7 {fconfigure command} -setup { file delete $path(test1) } -body { set f1 [open $path(test1) w] fconfigure $f1 -translation lf -eofchar {} -encoding utf-16 fconfigure $f1 } -cleanup { catch {close $f1} } -result {-blocking 1 -buffering full -buffersize 4096 -encoding utf-16 -eofchar {} -translation lf} test iocmd-8.8 {fconfigure command} -setup { file delete $path(test1) set x {} } -body { set f1 [open $path(test1) w] fconfigure $f1 -translation lf -buffering line -buffersize 3030 \ -eofchar {} -encoding utf-16 lappend x [fconfigure $f1 -buffering] lappend x [fconfigure $f1] } -cleanup { catch {close $f1} } -result {line {-blocking 1 -buffering line -buffersize 3030 -encoding utf-16 -eofchar {} -translation lf}} test iocmd-8.9 {fconfigure command} -setup { file delete $path(test1) } -body { set f1 [open $path(test1) w] fconfigure $f1 -translation binary -buffering none -buffersize 4040 \ -eofchar {} -encoding binary fconfigure $f1 |
Changes to tests/source.test.
236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 |
# and use of the Control-Z character (\u001A) as a cross-platform # EOF character by [source]. Here we write out and the [source] a # file that contains the byte \x1A, although not the character \u001A in # the indicated encoding. set sourcefile [makeFile {} source.file] file delete $sourcefile set f [open $sourcefile w] fconfigure $f -encoding unicode puts $f "set symbol(square-root) \u221A; set x correct" close $f } -body { set x unset source -encoding unicode $sourcefile set x } -cleanup { removeFile source.file } -result correct test source-7.3 {source -encoding: syntax} -body { # Have to spell out the -encoding option source -e utf-8 no_file |
| | |
236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 |
# and use of the Control-Z character (\u001A) as a cross-platform # EOF character by [source]. Here we write out and the [source] a # file that contains the byte \x1A, although not the character \u001A in # the indicated encoding. set sourcefile [makeFile {} source.file] file delete $sourcefile set f [open $sourcefile w] fconfigure $f -encoding utf-16 puts $f "set symbol(square-root) \u221A; set x correct" close $f } -body { set x unset source -encoding utf-16 $sourcefile set x } -cleanup { removeFile source.file } -result correct test source-7.3 {source -encoding: syntax} -body { # Have to spell out the -encoding option source -e utf-8 no_file |