Tcl Source Code

Changes On Branch tip-547
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Changes In Branch tip-547 Excluding Merge-Ins

This is equivalent to a diff from 34ea059cbf to 25074a8c8b

2019-06-28
22:36
Implement TIP #547: New encodings: UTF-16, UCS-2 check-in: 2cef9a0691 user: jan.nijtmans tags: core-8-branch
2019-06-27
13:40
Fix [15d851e394]: Fix spelling in comment of tclCmdIL.c check-in: e138330fe5 user: jan.nijtmans tags: core-8-branch
13:08
merge 8.7 check-in: a1193f802a user: dgp tags: core-8-7-a3-rc
2019-06-26
20:08
Merge tip-547 check-in: 6f4e263391 user: jan.nijtmans tags: tip-548
20:06
Merge 8.7 Closed-Leaf check-in: 25074a8c8b user: jan.nijtmans tags: tip-547
08:34
Merge 8.7 check-in: 7723dac835 user: jan.nijtmans tags: trunk
08:32
Merge 8.6 check-in: 34ea059cbf user: jan.nijtmans tags: core-8-branch
08:27
UNEXEC win/tclWinFile.c check-in: eaf145bcd7 user: jan.nijtmans tags: core-8-6-branch
2019-06-25
22:32
Merge 8.6. Also fix mp_get_long_long for VC++ 6.0 (will be deprecated by libtommath) check-in: f4b325d680 user: jan.nijtmans tags: core-8-branch
2019-06-16
11:37
Merge 8.7 check-in: f9c29c1321 user: jan.nijtmans tags: tip-547

Changes to generic/tclEncoding.c.

230
231
232
233
234
235
236
237






238
239
240
241
242

243
244
245
246
247
248
249
230
231
232
233
234
235
236

237
238
239
240
241
242
243
244
245
246

247
248
249
250
251
252
253
254







-
+
+
+
+
+
+




-
+







			    int *srcReadPtr, int *dstWrotePtr,
			    int *dstCharsPtr);
static int		TableToUtfProc(ClientData clientData, const char *src,
			    int srcLen, int flags, Tcl_EncodingState *statePtr,
			    char *dst, int dstLen, int *srcReadPtr,
			    int *dstWrotePtr, int *dstCharsPtr);
static size_t		unilen(const char *src);
static int		UniCharToUtfProc(ClientData clientData,
static int		Utf16ToUtfProc(ClientData clientData,
			    const char *src, int srcLen, int flags,
			    Tcl_EncodingState *statePtr, char *dst, int dstLen,
			    int *srcReadPtr, int *dstWrotePtr,
			    int *dstCharsPtr);
static int		UtfToUtf16Proc(ClientData clientData,
			    const char *src, int srcLen, int flags,
			    Tcl_EncodingState *statePtr, char *dst, int dstLen,
			    int *srcReadPtr, int *dstWrotePtr,
			    int *dstCharsPtr);
static int		UtfToUniCharProc(ClientData clientData,
static int		UtfToUcs2Proc(ClientData clientData,
			    const char *src, int srcLen, int flags,
			    Tcl_EncodingState *statePtr, char *dst, int dstLen,
			    int *srcReadPtr, int *dstWrotePtr,
			    int *dstCharsPtr);
static int		UtfToUtfProc(ClientData clientData,
			    const char *src, int srcLen, int flags,
			    Tcl_EncodingState *statePtr, char *dst, int dstLen,
560
561
562
563
564
565
566




567
568
569
570
571

572
573
574
575
576
577
578
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588







+
+
+
+





+







void
TclInitEncodingSubsystem(void)
{
    Tcl_EncodingType type;
    TableEncodingData *dataPtr;
    unsigned size;
    unsigned short i;
    union {
        char c;
        short s;
    } isLe;

    if (encodingsInitialized) {
	return;
    }

    isLe.s = 1;
    Tcl_MutexLock(&encodingMutex);
    Tcl_InitHashTable(&encodingTable, TCL_STRING_KEYS);
    Tcl_MutexUnlock(&encodingMutex);

    /*
     * Create a few initial encodings. Note that the UTF-8 to UTF-8
     * translation is not a no-op, because it will turn a stream of improperly
591
592
593
594
595
596
597




598
599
600












601
602







603

604





605
606
607
608
609
610
611
601
602
603
604
605
606
607
608
609
610
611



612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632

633
634
635
636
637
638
639
640
641
642
643
644
645
646







+
+
+
+
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+


+
+
+
+
+
+
+
-
+

+
+
+
+
+







    type.toUtfProc	= UtfExtToUtfIntProc;
    type.fromUtfProc	= UtfIntToUtfExtProc;
    type.freeProc	= NULL;
    type.nullSize	= 1;
    type.clientData	= NULL;
    Tcl_CreateEncoding(&type);

    type.toUtfProc	= Utf16ToUtfProc;
    type.fromUtfProc    = UtfToUcs2Proc;
    type.freeProc	= NULL;
    type.nullSize	= 2;
    type.encodingName   = "unicode";
    type.toUtfProc	= UniCharToUtfProc;
    type.fromUtfProc    = UtfToUniCharProc;
    type.encodingName   = "ucs-2le";
    type.clientData	= INT2PTR(1);
    Tcl_CreateEncoding(&type);
    type.encodingName   = "ucs-2be";
    type.clientData	= INT2PTR(0);
    Tcl_CreateEncoding(&type);
    type.encodingName   = "ucs-2";
    type.clientData	= INT2PTR(isLe.c);
    Tcl_CreateEncoding(&type);

    type.toUtfProc	= Utf16ToUtfProc;
    type.fromUtfProc    = UtfToUtf16Proc;
    type.freeProc	= NULL;
    type.nullSize	= 2;
    type.encodingName   = "utf-16le";
    type.clientData	= INT2PTR(1);;
    Tcl_CreateEncoding(&type);
    type.encodingName   = "utf-16be";
    type.clientData	= INT2PTR(0);
    Tcl_CreateEncoding(&type);
    type.encodingName   = "utf-16";
    type.clientData	= NULL;
    type.clientData	= INT2PTR(isLe.c);;
    Tcl_CreateEncoding(&type);

#ifndef TCL_NO_DEPRECATED
    type.encodingName   = "unicode";
    Tcl_CreateEncoding(&type);
#endif

    /*
     * Need the iso8859-1 encoding in order to process binary data, so force
     * it to always be embedded. Note that this encoding *must* be a proper
     * table encoding or some of the escape encodings crash! Hence the ugly
     * code to duplicate the structure of a table encoding here.
     */
1275
1276
1277
1278
1279
1280
1281
1282

1283
1284
1285
1286
1287
1288
1289
1310
1311
1312
1313
1314
1315
1316

1317
1318
1319
1320
1321
1322
1323
1324







-
+








	result = encodingPtr->toUtfProc(encodingPtr->clientData, src, srcLen,
		flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr,
		dstCharsPtr);
	if (*dstCharsPtr <= maxChars) {
	    break;
	}
	dstLen = Tcl_UtfAtIndex(dst, maxChars) - 1 - dst + TCL_UTF_MAX;
	dstLen = Tcl_UtfAtIndex(dst, maxChars) - dst + (TCL_UTF_MAX - 1);
	flags = savedFlags;
	*statePtr = savedState;
    } while (1);
    if (!noTerminate) {
	/* ...and then append it */

	dst[*dstWrotePtr] = '\0';
2397
2398
2399
2400
2401
2402
2403
2404

2405
2406

2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419


2420
2421
2422
2423
2424
2425
2426
2432
2433
2434
2435
2436
2437
2438

2439
2440

2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452


2453
2454
2455
2456
2457
2458
2459
2460
2461







-
+

-
+











-
-
+
+







    *dstCharsPtr = numChars;
    return result;
}

/*
 *-------------------------------------------------------------------------
 *
 * UniCharToUtfProc --
 * Utf16ToUtfProc --
 *
 *	Convert from Unicode to UTF-8.
 *	Convert from UTF-16 to UTF-8.
 *
 * Results:
 *	Returns TCL_OK if conversion was successful.
 *
 * Side effects:
 *	None.
 *
 *-------------------------------------------------------------------------
 */

static int
UniCharToUtfProc(
    ClientData clientData,	/* Not used. */
Utf16ToUtfProc(
    ClientData clientData,	/* != NULL means LE, == NUL means BE */
    const char *src,		/* Source string in Unicode. */
    int srcLen,			/* Source string length in bytes. */
    int flags,			/* Conversion control flags. */
    Tcl_EncodingState *statePtr,/* Place for conversion routine to store state
				 * information used during a piecewise
				 * conversion. Contents of statePtr are
				 * initialized and/or reset by conversion
2464
2465
2466
2467
2468
2469
2470





2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494

2495
2496

2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509


2510
2511
2512
2513
2514
2515
2516
2517
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514


2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531

2532
2533

2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545


2546
2547

2548
2549
2550
2551
2552
2553
2554







+
+
+
+
+




-
-

















-
+

-
+











-
-
+
+
-








    for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) {
	if (dst > dstEnd) {
	    result = TCL_CONVERT_NOSPACE;
	    break;
	}

	if (clientData) {
	    ch = (src[1] & 0xFF) << 8 | (src[0] & 0xFF);
	} else {
	    ch = (src[0] & 0xFF) << 8 | (src[1] & 0xFF);
	}
	/*
	 * Special case for 1-byte utf chars for speed. Make sure we work with
	 * unsigned short-size data.
	 */

	ch = *(unsigned short *)src;
	if (ch && ch < 0x80) {
	    *dst++ = (ch & 0xFF);
	} else {
	    dst += Tcl_UniCharToUtf(ch, dst);
	}
	src += sizeof(unsigned short);
    }

    *srcReadPtr = src - srcStart;
    *dstWrotePtr = dst - dstStart;
    *dstCharsPtr = numChars;
    return result;
}

/*
 *-------------------------------------------------------------------------
 *
 * UtfToUniCharProc --
 * UtfToUtf16Proc --
 *
 *	Convert from UTF-8 to Unicode.
 *	Convert from UTF-8 to UTF-16.
 *
 * Results:
 *	Returns TCL_OK if conversion was successful.
 *
 * Side effects:
 *	None.
 *
 *-------------------------------------------------------------------------
 */

static int
UtfToUniCharProc(
    ClientData clientData,	/* TableEncodingData that specifies
UtfToUtf16Proc(
    ClientData clientData,	/* != NULL means LE, == NUL means BE */
				 * encoding. */
    const char *src,		/* Source string in UTF-8. */
    int srcLen,			/* Source string length in bytes. */
    int flags,			/* Conversion control flags. */
    Tcl_EncodingState *statePtr,/* Place for conversion routine to store state
				 * information used during a piecewise
				 * conversion. Contents of statePtr are
				 * initialized and/or reset by conversion
2567
2568
2569
2570
2571
2572
2573

2574















2575
2576
2577
2578
2579
2580
2581
2582
2583
2584









2585
2586
2587


2588
2589
2590
2591
2592
2593
2594






2595
2596
2597
2598
2599
2600
2601
2602

















































2603






































2604













2605
2606
2607
2608
2609
2610
2611

2612
2613
2614
2615
2616
2617
2618
2604
2605
2606
2607
2608
2609
2610
2611

2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627









2628
2629
2630
2631
2632
2633
2634
2635
2636
2637


2638
2639
2640






2641
2642
2643
2644
2645
2646








2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754

2755
2756
2757
2758
2759
2760
2761
2762







+
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+

-
-
+
+

-
-
-
-
-
-
+
+
+
+
+
+
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

+
+
+
+
+
+
+
+
+
+
+
+
+






-
+







	src += TclUtfToUniChar(src, chPtr);

	/*
	 * Need to handle this in a way that won't cause misalignment by
	 * casting dst to a Tcl_UniChar. [Bug 1122671]
	 */

	if (clientData) {
#ifdef WORDS_BIGENDIAN
#if TCL_UTF_MAX > 4
	    if (*chPtr <= 0xFFFF) {
		*dst++ = (*chPtr & 0xFF);
		*dst++ = (*chPtr >> 8);
	    } else {
		*dst++ = (((*chPtr - 0x10000) >> 10) & 0xFF);
		*dst++ = (((*chPtr - 0x10000) >> 18) & 0x3) | 0xD8;
		*dst++ = (*chPtr & 0xFF);
		*dst++ = ((*chPtr & 0x3) >> 8) | 0xDC;
	    }
#else
	    *dst++ = (*chPtr & 0xFF);
	    *dst++ = (*chPtr >> 8);
#endif
	} else {
#if TCL_UTF_MAX > 4
	if (*chPtr <= 0xFFFF) {
	    *dst++ = (*chPtr >> 8);
	    *dst++ = (*chPtr & 0xFF);
	} else {
	    *dst++ = ((*chPtr & 0x3) >> 8) | 0xDC;
	    *dst++ = (*chPtr & 0xFF);
	    *dst++ = (((*chPtr - 0x10000) >> 18) & 0x3) | 0xD8;
	    *dst++ = (((*chPtr - 0x10000) >> 10) & 0xFF);
	}
	    if (*chPtr <= 0xFFFF) {
		*dst++ = (*chPtr >> 8);
		*dst++ = (*chPtr & 0xFF);
	    } else {
		*dst++ = ((*chPtr & 0x3) >> 8) | 0xDC;
		*dst++ = (*chPtr & 0xFF);
		*dst++ = (((*chPtr - 0x10000) >> 18) & 0x3) | 0xD8;
		*dst++ = (((*chPtr - 0x10000) >> 10) & 0xFF);
	    }
#else
	*dst++ = (*chPtr >> 8);
	*dst++ = (*chPtr & 0xFF);
	    *dst++ = (*chPtr >> 8);
	    *dst++ = (*chPtr & 0xFF);
#endif
#else
#if TCL_UTF_MAX > 4
	if (*chPtr <= 0xFFFF) {
	    *dst++ = (*chPtr & 0xFF);
	    *dst++ = (*chPtr >> 8);
	} else {
	}
    }
    *srcReadPtr = src - srcStart;
    *dstWrotePtr = dst - dstStart;
    *dstCharsPtr = numChars;
    return result;
	    *dst++ = (((*chPtr - 0x10000) >> 10) & 0xFF);
	    *dst++ = (((*chPtr - 0x10000) >> 18) & 0x3) | 0xD8;
	    *dst++ = (*chPtr & 0xFF);
	    *dst++ = ((*chPtr & 0x3) >> 8) | 0xDC;
	}
#else
	*dst++ = (*chPtr & 0xFF);
	*dst++ = (*chPtr >> 8);
}

/*
 *-------------------------------------------------------------------------
 *
 * UtfToUcs2Proc --
 *
 *	Convert from UTF-8 to UCS-2.
 *
 * Results:
 *	Returns TCL_OK if conversion was successful.
 *
 * Side effects:
 *	None.
 *
 *-------------------------------------------------------------------------
 */

static int
UtfToUcs2Proc(
    ClientData clientData,	/* != NULL means LE, == NUL means BE */
    const char *src,		/* Source string in UTF-8. */
    int srcLen,			/* Source string length in bytes. */
    int flags,			/* Conversion control flags. */
    Tcl_EncodingState *statePtr,/* Place for conversion routine to store state
				 * information used during a piecewise
				 * conversion. Contents of statePtr are
				 * initialized and/or reset by conversion
				 * routine under control of flags argument. */
    char *dst,			/* Output buffer in which converted string is
				 * stored. */
    int dstLen,			/* The maximum length of output buffer in
				 * bytes. */
    int *srcReadPtr,		/* Filled with the number of bytes from the
				 * source string that were converted. This may
				 * be less than the original source length if
				 * there was a problem converting some source
				 * characters. */
    int *dstWrotePtr,		/* Filled with the number of bytes that were
				 * stored in the output buffer as a result of
				 * the conversion. */
    int *dstCharsPtr)		/* Filled with the number of characters that
				 * correspond to the bytes stored in the
				 * output buffer. */
{
    const char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd;
    int result, numChars;
#if TCL_UTF_MAX <= 4
    int len;
#endif
    Tcl_UniChar ch = 0;

    srcStart = src;
    srcEnd = src + srcLen;
    srcClose = srcEnd;
    if ((flags & TCL_ENCODING_END) == 0) {
	srcClose -= TCL_UTF_MAX;
    }

    dstStart = dst;
    dstEnd   = dst + dstLen - sizeof(Tcl_UniChar);

    result = TCL_OK;
    for (numChars = 0; src < srcEnd; numChars++) {
	if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) {
	    /*
	     * If there is more string to follow, this will ensure that the
	     * last UTF-8 character in the source buffer hasn't been cut off.
	     */

	    result = TCL_CONVERT_MULTIBYTE;
	    break;
	}
	if (dst > dstEnd) {
	    result = TCL_CONVERT_NOSPACE;
	    break;
	}
#if TCL_UTF_MAX <= 4
	src += (len = TclUtfToUniChar(src, &ch));
	if ((ch >= 0xD800) && (len < 3)) {
	    src += TclUtfToUniChar(src, &ch);
	    ch = 0xFFFD;
	}
#else
	src += TclUtfToUniChar(src, &ch);
	if (ch > 0xFFFF) {
	    ch = 0xFFFD;
	}
#endif

	/*
	 * Need to handle this in a way that won't cause misalignment by
	 * casting dst to a Tcl_UniChar. [Bug 1122671]
	 */

	if (clientData) {
	    *dst++ = (ch & 0xFF);
	    *dst++ = (ch >> 8);
	} else {
	    *dst++ = (ch >> 8);
	    *dst++ = (ch & 0xFF);
	}
    }
    *srcReadPtr = src - srcStart;
    *dstWrotePtr = dst - dstStart;
    *dstCharsPtr = numChars;
    return result;
}


/*
 *-------------------------------------------------------------------------
 *
 * TableToUtfProc --
 *
 *	Convert from the encoding specified by the TableEncodingData into
 *	UTF-8.

Changes to tests/binary.test.

2908
2909
2910
2911
2912
2913
2914
2915

2916
2917
2918
2919
2920
2921
2922
2908
2909
2910
2911
2912
2913
2914

2915
2916
2917
2918
2919
2920
2921
2922







-
+







    apply {{a b} {
	set one [binary format H* $a]
	return $one[binary format H* $b]
    }} ab cd
} [binary format H* abcd]

test binary-78.1 {unicode (out of BMP) to byte-array conversion, bug-[bd94500678]} -body {
    # just test for BO-segfault (high surrogate w/o advance source pointer for out of BMP char if TCL_UTF_MAX <= 4):
    # just test for BO-segfault (high surrogate w/o advance source pointer for out of BMP char if TCL_UTF_MAX == 3):
    binary encode hex \U0001f415
    binary scan \U0001f415 a* v; set v
    set str {}
} -result {}

# ----------------------------------------------------------------------
# cleanup

Changes to tests/chanio.test.

884
885
886
887
888
889
890
891

892
893
894
895
896
897
898
884
885
886
887
888
889
890

891
892
893
894
895
896
897
898







-
+







} -result {bbbbbbbbbbbbbbb 15 123456789abcdef 1 4 abcd 0 3 efg}
test chan-io-6.45 {Tcl_GetsObj: input saw cr, skip right number of bytes} -setup {
    set x ""
} -constraints {stdio testchannel openpipe fileevent} -body {
    # Tcl_ExternalToUtf()
    set f [openpipe w+ $path(cat)]
    chan configure $f -translation {auto lf} -buffering none
    chan configure $f -encoding unicode
    chan configure $f -encoding utf-16
    chan puts -nonewline $f "bbbbbbbbbbbbbbb\n123456789abcdef\r"
    chan configure $f -buffersize 16
    chan gets $f
    chan configure $f -blocking 0
    lappend x [chan gets $f line] $line [testchannel queuedcr $f]
    chan configure $f -blocking 1
    chan puts -nonewline $f "\nabcd\refg"
1125
1126
1127
1128
1129
1130
1131
1132

1133
1134
1135
1136
1137
1138
1139
1125
1126
1127
1128
1129
1130
1131

1132
1133
1134
1135
1136
1137
1138
1139







-
+







    # not (bufPtr->nextPtr == NULL)
    set f [openpipe w+ $path(cat)]
    chan configure $f -translation lf -encoding ascii -buffering none
    chan puts -nonewline $f "123456789012345\r\nbcdefghijklmnopqrstuvwxyz"
    chan event $f read [namespace code {
	lappend x [chan gets $f line] $line [testchannel inputbuffered $f]
    }]
    chan configure $f -encoding unicode -buffersize 16 -blocking 0
    chan configure $f -encoding utf-16 -buffersize 16 -blocking 0
    vwait [namespace which -variable x]
    chan configure $f -translation auto -encoding ascii -blocking 1
    # here
    vwait [namespace which -variable x]
    return $x
} -cleanup {
    chan close $f

Changes to tests/encoding.test.

318
319
320
321
322
323
324
325
326


327
328
329
330










331
332
333
334
335


336



337
338
339
340
341
342
343
318
319
320
321
322
323
324


325
326
327
328


329
330
331
332
333
334
335
336
337
338
339
340
341


342
343
344
345
346
347
348
349
350
351
352
353
354







-
-
+
+


-
-
+
+
+
+
+
+
+
+
+
+



-
-
+
+

+
+
+







} 00
test encoding-15.3 {UtfToUtfProc null character input} teststringbytes {
    set y [encoding convertfrom utf-8 [encoding convertto utf-8 \u0000]]
    binary scan [teststringbytes $y] H* z
    set z
} c080

test encoding-16.1 {UnicodeToUtfProc} -body {
    set val [encoding convertfrom unicode NN]
test encoding-16.1 {Utf16ToUtfProc} -body {
    set val [encoding convertfrom utf-16 NN]
    list $val [format %x [scan $val %c]]
} -result "\u4e4e 4e4e"
test encoding-16.2 {UnicodeToUtfProc} -body {
    set val [encoding convertfrom unicode "\xd8\xd8\xdc\xdc"]
test encoding-16.2 {Utf16ToUtfProc} -body {
    set val [encoding convertfrom utf-16 "\xd8\xd8\xdc\xdc"]
    list $val [format %x [scan $val %c]]
} -result "\U460dc 460dc"
test encoding-16.3 {Ucs2ToUtfProc} -body {
    set val [encoding convertfrom ucs-2 NN]
    list $val [format %x [scan $val %c]]
} -result "\u4e4e 4e4e"
test encoding-16.4 {Ucs2ToUtfProc} -body {
    set val [encoding convertfrom ucs-2 "\xd8\xd8\xdc\xdc"]
    list $val [format %x [scan $val %c]]
} -result "\U460dc 460dc"

test encoding-17.1 {UtfToUnicodeProc} -body {
    encoding convertto unicode "\U460dc"
test encoding-17.1 {UtfToUtf16Proc} -body {
    encoding convertto utf-16 "\U460dc"
} -result "\xd8\xd8\xdc\xdc"
test encoding-17.2 {UtfToUcs2Proc} -body {
    encoding convertfrom utf-16 [encoding convertto ucs-2 "\U460dc"]
} -result "\ufffd"

test encoding-18.1 {TableToUtfProc} {
} {}

test encoding-19.1 {TableFromUtfProc} {
} {}

Changes to tests/io.test.

914
915
916
917
918
919
920
921

922
923
924
925
926
927
928
914
915
916
917
918
919
920

921
922
923
924
925
926
927
928







-
+







    set x
} [list "bbbbbbbbbbbbbbb" 15 "123456789abcdef" 1 4 "abcd" 0 3 "efg"]
test io-6.45 {Tcl_GetsObj: input saw cr, skip right number of bytes} {stdio testchannel openpipe fileevent} {
    # Tcl_ExternalToUtf()

    set f [open "|[list [interpreter] $path(cat)]" w+]
    fconfigure $f -translation {auto lf} -buffering none
    fconfigure $f -encoding unicode
    fconfigure $f -encoding utf-16
    puts -nonewline $f "bbbbbbbbbbbbbbb\n123456789abcdef\r"
    fconfigure $f -buffersize 16
    gets $f
    fconfigure $f -blocking 0
    set x [list [gets $f line] $line [testchannel queuedcr $f]]
    fconfigure $f -blocking 1
    puts -nonewline $f "\nabcd\refg"
1158
1159
1160
1161
1162
1163
1164
1165

1166
1167
1168
1169
1170
1171
1172
1158
1159
1160
1161
1162
1163
1164

1165
1166
1167
1168
1169
1170
1171
1172







-
+







    puts -nonewline $f "123456789012345\r\nbcdefghijklmnopqrstuvwxyz"
    variable x {}
    fileevent $f read [namespace code "ready $f"]
    proc ready {f} {
	variable x
	lappend x [gets $f line] $line [testchannel inputbuffered $f]
    }
    fconfigure $f -encoding unicode -buffersize 16 -blocking 0
    fconfigure $f -encoding utf-16 -buffersize 16 -blocking 0
    vwait [namespace which -variable x]
    fconfigure $f -translation auto -encoding ascii -blocking 1
    # here
    vwait [namespace which -variable x]
    close $f
    set x
} [list -1 "" 42 15 "123456789012345" 25]

Changes to tests/ioCmd.test.

237
238
239
240
241
242
243
244

245
246
247
248

249
250
251
252
253
254
255

256
257
258
259
260

261
262
263
264
265
266
267
237
238
239
240
241
242
243

244
245
246
247

248
249
250
251
252
253
254

255
256
257
258
259

260
261
262
263
264
265
266
267







-
+



-
+






-
+




-
+







test iocmd-8.6 {fconfigure command} -returnCodes error -body {
    fconfigure stdin -translation froboz
} -result {bad value for -translation: must be one of auto, binary, cr, lf, crlf, or platform}
test iocmd-8.7 {fconfigure command} -setup {
    file delete $path(test1)
} -body {
    set f1 [open $path(test1) w]
    fconfigure $f1 -translation lf -eofchar {} -encoding unicode
    fconfigure $f1 -translation lf -eofchar {} -encoding utf-16
    fconfigure $f1
} -cleanup {
    catch {close $f1}
} -result {-blocking 1 -buffering full -buffersize 4096 -encoding unicode -eofchar {} -translation lf}
} -result {-blocking 1 -buffering full -buffersize 4096 -encoding utf-16 -eofchar {} -translation lf}
test iocmd-8.8 {fconfigure command} -setup {
    file delete $path(test1)
    set x {}
} -body {
    set f1 [open $path(test1) w]
    fconfigure $f1 -translation lf -buffering line -buffersize 3030 \
		-eofchar {} -encoding unicode
		-eofchar {} -encoding utf-16
    lappend x [fconfigure $f1 -buffering]
    lappend x [fconfigure $f1]
} -cleanup {
    catch {close $f1}
} -result {line {-blocking 1 -buffering line -buffersize 3030 -encoding unicode -eofchar {} -translation lf}}
} -result {line {-blocking 1 -buffering line -buffersize 3030 -encoding utf-16 -eofchar {} -translation lf}}
test iocmd-8.9 {fconfigure command} -setup {
    file delete $path(test1)
} -body {
    set f1 [open $path(test1) w]
    fconfigure $f1 -translation binary -buffering none -buffersize 4040 \
		-eofchar {} -encoding binary
    fconfigure $f1

Changes to tests/source.test.

236
237
238
239
240
241
242
243

244
245
246
247
248

249
250
251
252
253
254
255
236
237
238
239
240
241
242

243
244
245
246
247

248
249
250
251
252
253
254
255







-
+




-
+







    # and use of the Control-Z character (\u001A) as a cross-platform
    # EOF character by [source].  Here we write out and the [source] a
    # file that contains the byte \x1A, although not the character \u001A in
    # the indicated encoding.
    set sourcefile [makeFile {} source.file]
    file delete $sourcefile
    set f [open $sourcefile w]
    fconfigure $f -encoding unicode
    fconfigure $f -encoding utf-16
    puts $f "set symbol(square-root) \u221A; set x correct"
    close $f
} -body {
    set x unset
    source -encoding unicode $sourcefile
    source -encoding utf-16 $sourcefile
    set x
} -cleanup {
    removeFile source.file
} -result correct
test source-7.3 {source -encoding: syntax} -body {
    # Have to spell out the -encoding option
    source -e utf-8 no_file