Tcl Source Code

Check-in [0fa21022ea]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:merge trunk
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | dgp-refactor
Files: files | file ages | folders
SHA3-256: 0fa21022eafed4ece359db700f0dbb07f3cebe44791228d812d5ad084f8305b3
User & Date: dgp 2019-02-23 13:09:53.352
Context
2019-02-25
16:04
merge trunk check-in: 7fd2df44ff user: dgp tags: dgp-refactor
2019-02-23
13:09
merge trunk check-in: 0fa21022ea user: dgp tags: dgp-refactor
2019-02-22
18:01
merge 8.7 check-in: adfe04dbbd user: dgp tags: trunk
2019-02-14
19:46
merge trunk check-in: f87070ace5 user: dgp tags: dgp-refactor
Changes
Unified Diff Ignore Whitespace Patch
Changes to doc/Utf.3.
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
sequence consists of a lead byte followed by some number of trail bytes.
.PP
\fBTCL_UTF_MAX\fR is the maximum number of bytes that it takes to
represent one Unicode character in the UTF-8 representation.
.PP
\fBTcl_UniCharToUtf\fR stores the character \fIch\fR as a UTF-8 string
in starting at \fIbuf\fR.  The return value is the number of bytes stored
in \fIbuf\fR. If ch is an upper surrogate (range U+D800 - U+DBFF), then
the return value will be 0 and nothing will be stored. If you still
want to produce UTF-8 output for it (even though knowing it's an illegal
code-point on its own), just call \fBTcl_UniCharToUtf\fR again using ch = -1.
.PP
\fBTcl_UtfToUniChar\fR reads one UTF-8 character starting at \fIsrc\fR
and stores it as a Tcl_UniChar in \fI*chPtr\fR.  The return value is the
number of bytes read from \fIsrc\fR.  The caller must ensure that the







|







125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
sequence consists of a lead byte followed by some number of trail bytes.
.PP
\fBTCL_UTF_MAX\fR is the maximum number of bytes that it takes to
represent one Unicode character in the UTF-8 representation.
.PP
\fBTcl_UniCharToUtf\fR stores the character \fIch\fR as a UTF-8 string
in starting at \fIbuf\fR.  The return value is the number of bytes stored
in \fIbuf\fR. If ch is a high surrogate (range U+D800 - U+DBFF), then
the return value will be 0 and nothing will be stored. If you still
want to produce UTF-8 output for it (even though knowing it's an illegal
code-point on its own), just call \fBTcl_UniCharToUtf\fR again using ch = -1.
.PP
\fBTcl_UtfToUniChar\fR reads one UTF-8 character starting at \fIsrc\fR
and stores it as a Tcl_UniChar in \fI*chPtr\fR.  The return value is the
number of bytes read from \fIsrc\fR.  The caller must ensure that the
Changes to generic/tclBinary.c.
53
54
55
56
57
58
59


60
61
62

63
64
65
66
67
68
69

/*
 * Prototypes for local procedures defined in this file:
 */

static void		DupByteArrayInternalRep(Tcl_Obj *srcPtr,
			    Tcl_Obj *copyPtr);


static int		FormatNumber(Tcl_Interp *interp, int type,
			    Tcl_Obj *src, unsigned char **cursorPtr);
static void		FreeByteArrayInternalRep(Tcl_Obj *objPtr);

static int		GetFormatSpec(const char **formatPtr, char *cmdPtr,
			    size_t *countPtr, int *flagsPtr);
static Tcl_Obj *	ScanNumber(unsigned char *buffer, int type,
			    int flags, Tcl_HashTable **numberCachePtr);
static int		SetByteArrayFromAny(Tcl_Interp *interp,
			    Tcl_Obj *objPtr);
static void		UpdateStringOfByteArray(Tcl_Obj *listPtr);







>
>



>







53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72

/*
 * Prototypes for local procedures defined in this file:
 */

static void		DupByteArrayInternalRep(Tcl_Obj *srcPtr,
			    Tcl_Obj *copyPtr);
static void		DupProperByteArrayInternalRep(Tcl_Obj *srcPtr,
			    Tcl_Obj *copyPtr);
static int		FormatNumber(Tcl_Interp *interp, int type,
			    Tcl_Obj *src, unsigned char **cursorPtr);
static void		FreeByteArrayInternalRep(Tcl_Obj *objPtr);
static void		FreeProperByteArrayInternalRep(Tcl_Obj *objPtr);
static int		GetFormatSpec(const char **formatPtr, char *cmdPtr,
			    size_t *countPtr, int *flagsPtr);
static Tcl_Obj *	ScanNumber(unsigned char *buffer, int type,
			    int flags, Tcl_HashTable **numberCachePtr);
static int		SetByteArrayFromAny(Tcl_Interp *interp,
			    Tcl_Obj *objPtr);
static void		UpdateStringOfByteArray(Tcl_Obj *listPtr);
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
 * so that Tcl 9 will no longer have any trace of it.  Prescribing a
 * migration path will be the key element of that work.  The internal
 * changes now in place are the limit of what can be done short of
 * interface repair.  They provide a great expansion of the histories
 * over which bytearray values can be useful in the meanwhile.
 */

const Tcl_ObjType tclPureByteArrayType = {
    "bytearray",
    FreeByteArrayInternalRep,
    DupByteArrayInternalRep,
    UpdateStringOfByteArray,
    NULL
};

const Tcl_ObjType tclByteArrayType = {
    "bytearray",
    FreeByteArrayInternalRep,







|

|
|







244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
 * so that Tcl 9 will no longer have any trace of it.  Prescribing a
 * migration path will be the key element of that work.  The internal
 * changes now in place are the limit of what can be done short of
 * interface repair.  They provide a great expansion of the histories
 * over which bytearray values can be useful in the meanwhile.
 */

static const Tcl_ObjType properByteArrayType = {
    "bytearray",
    FreeProperByteArrayInternalRep,
    DupProperByteArrayInternalRep,
    UpdateStringOfByteArray,
    NULL
};

const Tcl_ObjType tclByteArrayType = {
    "bytearray",
    FreeByteArrayInternalRep,
279
280
281
282
283
284
285







286
287
288
289
290
291
292
} ByteArray;

#define BYTEARRAY_SIZE(len) \
		((TclOffset(ByteArray, bytes) + (len)))
#define GET_BYTEARRAY(irPtr) ((ByteArray *) (irPtr)->twoPtrValue.ptr1)
#define SET_BYTEARRAY(irPtr, baPtr) \
		(irPtr)->twoPtrValue.ptr1 = (baPtr)








/*
 *----------------------------------------------------------------------
 *
 * Tcl_NewByteArrayObj --
 *
 *	This procedure is creates a new ByteArray object and initializes it







>
>
>
>
>
>
>







282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
} ByteArray;

#define BYTEARRAY_SIZE(len) \
		((TclOffset(ByteArray, bytes) + (len)))
#define GET_BYTEARRAY(irPtr) ((ByteArray *) (irPtr)->twoPtrValue.ptr1)
#define SET_BYTEARRAY(irPtr, baPtr) \
		(irPtr)->twoPtrValue.ptr1 = (baPtr)

int
TclIsPureByteArray(
    Tcl_Obj * objPtr)
{
    return objPtr->typePtr == &properByteArrayType;
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_NewByteArrayObj --
 *
 *	This procedure is creates a new ByteArray object and initializes it
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
    byteArrayPtr->allocated = length;

    if ((bytes != NULL) && (length > 0)) {
	memcpy(byteArrayPtr->bytes, bytes, length);
    }
    SET_BYTEARRAY(&ir, byteArrayPtr);

    Tcl_StoreIntRep(objPtr, &tclPureByteArrayType, &ir);
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_GetByteArrayFromObj --
 *







|







416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
    byteArrayPtr->allocated = length;

    if ((bytes != NULL) && (length > 0)) {
	memcpy(byteArrayPtr->bytes, bytes, length);
    }
    SET_BYTEARRAY(&ir, byteArrayPtr);

    Tcl_StoreIntRep(objPtr, &properByteArrayType, &ir);
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_GetByteArrayFromObj --
 *
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
unsigned char *
Tcl_GetByteArrayFromObj(
    Tcl_Obj *objPtr,		/* The ByteArray object. */
    int *lengthPtr)		/* If non-NULL, filled with length of the
				 * array of bytes in the ByteArray object. */
{
    ByteArray *baPtr;
    const Tcl_ObjIntRep *irPtr = TclFetchIntRep(objPtr, &tclPureByteArrayType);

    if (irPtr == NULL) {
	irPtr = TclFetchIntRep(objPtr, &tclByteArrayType);
	if (irPtr == NULL) {
	    SetByteArrayFromAny(NULL, objPtr);
	    irPtr = TclFetchIntRep(objPtr, &tclPureByteArrayType);
	    if (irPtr == NULL) {
		irPtr = TclFetchIntRep(objPtr, &tclByteArrayType);
	    }
	}
    }
    baPtr = GET_BYTEARRAY(irPtr);








|





|







444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
unsigned char *
Tcl_GetByteArrayFromObj(
    Tcl_Obj *objPtr,		/* The ByteArray object. */
    int *lengthPtr)		/* If non-NULL, filled with length of the
				 * array of bytes in the ByteArray object. */
{
    ByteArray *baPtr;
    const Tcl_ObjIntRep *irPtr = TclFetchIntRep(objPtr, &properByteArrayType);

    if (irPtr == NULL) {
	irPtr = TclFetchIntRep(objPtr, &tclByteArrayType);
	if (irPtr == NULL) {
	    SetByteArrayFromAny(NULL, objPtr);
	    irPtr = TclFetchIntRep(objPtr, &properByteArrayType);
	    if (irPtr == NULL) {
		irPtr = TclFetchIntRep(objPtr, &tclByteArrayType);
	    }
	}
    }
    baPtr = GET_BYTEARRAY(irPtr);

488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
    ByteArray *byteArrayPtr;
    Tcl_ObjIntRep *irPtr;

    if (Tcl_IsShared(objPtr)) {
	Tcl_Panic("%s called with shared object", "Tcl_SetByteArrayLength");
    }

    irPtr = TclFetchIntRep(objPtr, &tclPureByteArrayType);
    if (irPtr == NULL) {
	irPtr = TclFetchIntRep(objPtr, &tclByteArrayType);
	if (irPtr == NULL) {
	    SetByteArrayFromAny(NULL, objPtr);
	    irPtr = TclFetchIntRep(objPtr, &tclPureByteArrayType);
	    if (irPtr == NULL) {
		irPtr = TclFetchIntRep(objPtr, &tclByteArrayType);
	    }
	}
    }

    byteArrayPtr = GET_BYTEARRAY(irPtr);







|




|







498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
    ByteArray *byteArrayPtr;
    Tcl_ObjIntRep *irPtr;

    if (Tcl_IsShared(objPtr)) {
	Tcl_Panic("%s called with shared object", "Tcl_SetByteArrayLength");
    }

    irPtr = TclFetchIntRep(objPtr, &properByteArrayType);
    if (irPtr == NULL) {
	irPtr = TclFetchIntRep(objPtr, &tclByteArrayType);
	if (irPtr == NULL) {
	    SetByteArrayFromAny(NULL, objPtr);
	    irPtr = TclFetchIntRep(objPtr, &properByteArrayType);
	    if (irPtr == NULL) {
		irPtr = TclFetchIntRep(objPtr, &tclByteArrayType);
	    }
	}
    }

    byteArrayPtr = GET_BYTEARRAY(irPtr);
540
541
542
543
544
545
546
547


548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
    int improper = 0;
    const char *src, *srcEnd;
    unsigned char *dst;
    Tcl_UniChar ch = 0;
    ByteArray *byteArrayPtr;
    Tcl_ObjIntRep ir;

    if ((objPtr->typePtr == &tclPureByteArrayType)


	    || (objPtr->typePtr == &tclByteArrayType)) {
	return TCL_OK;
    }

    src = TclGetStringFromObj(objPtr, &length);
    srcEnd = src + length;

    byteArrayPtr = Tcl_Alloc(BYTEARRAY_SIZE(length));
    for (dst = byteArrayPtr->bytes; src < srcEnd; ) {
	src += TclUtfToUniChar(src, &ch);
	improper = improper || (ch > 255);
	*dst++ = UCHAR(ch);
    }

    byteArrayPtr->used = dst - byteArrayPtr->bytes;
    byteArrayPtr->allocated = length;

    SET_BYTEARRAY(&ir, byteArrayPtr);
    Tcl_StoreIntRep(objPtr,
	    improper ? &tclByteArrayType : &tclPureByteArrayType, &ir);
    return TCL_OK;
}

/*
 *----------------------------------------------------------------------
 *
 * FreeByteArrayInternalRep --







|
>
>
|


















|







550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
    int improper = 0;
    const char *src, *srcEnd;
    unsigned char *dst;
    Tcl_UniChar ch = 0;
    ByteArray *byteArrayPtr;
    Tcl_ObjIntRep ir;

    if (objPtr->typePtr == &properByteArrayType) {
	return TCL_OK;
    }
    if (objPtr->typePtr == &tclByteArrayType) {
	return TCL_OK;
    }

    src = TclGetStringFromObj(objPtr, &length);
    srcEnd = src + length;

    byteArrayPtr = Tcl_Alloc(BYTEARRAY_SIZE(length));
    for (dst = byteArrayPtr->bytes; src < srcEnd; ) {
	src += TclUtfToUniChar(src, &ch);
	improper = improper || (ch > 255);
	*dst++ = UCHAR(ch);
    }

    byteArrayPtr->used = dst - byteArrayPtr->bytes;
    byteArrayPtr->allocated = length;

    SET_BYTEARRAY(&ir, byteArrayPtr);
    Tcl_StoreIntRep(objPtr,
	    improper ? &tclByteArrayType : &properByteArrayType, &ir);
    return TCL_OK;
}

/*
 *----------------------------------------------------------------------
 *
 * FreeByteArrayInternalRep --
585
586
587
588
589
590
591
592







593
594
595
596
597
598
599
 *----------------------------------------------------------------------
 */

static void
FreeByteArrayInternalRep(
    Tcl_Obj *objPtr)		/* Object with internal rep to free. */
{
    Tcl_Free(GET_BYTEARRAY(&(objPtr->internalRep)));







}

/*
 *----------------------------------------------------------------------
 *
 * DupByteArrayInternalRep --
 *







|
>
>
>
>
>
>
>







597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
 *----------------------------------------------------------------------
 */

static void
FreeByteArrayInternalRep(
    Tcl_Obj *objPtr)		/* Object with internal rep to free. */
{
    Tcl_Free(GET_BYTEARRAY(TclFetchIntRep(objPtr, &tclByteArrayType)));
}

static void
FreeProperByteArrayInternalRep(
    Tcl_Obj *objPtr)		/* Object with internal rep to free. */
{
    Tcl_Free(GET_BYTEARRAY(TclFetchIntRep(objPtr, &properByteArrayType)));
}

/*
 *----------------------------------------------------------------------
 *
 * DupByteArrayInternalRep --
 *
614
615
616
617
618
619
620
621





















622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
    Tcl_Obj *srcPtr,		/* Object with internal rep to copy. */
    Tcl_Obj *copyPtr)		/* Object with internal rep to set. */
{
    size_t length;
    ByteArray *srcArrayPtr, *copyArrayPtr;
    Tcl_ObjIntRep ir;

    srcArrayPtr = GET_BYTEARRAY(&(srcPtr->internalRep));





















    length = srcArrayPtr->used;

    copyArrayPtr = Tcl_Alloc(BYTEARRAY_SIZE(length));
    copyArrayPtr->used = length;
    copyArrayPtr->allocated = length;
    memcpy(copyArrayPtr->bytes, srcArrayPtr->bytes, length);

    SET_BYTEARRAY(&ir, copyArrayPtr);
    Tcl_StoreIntRep(copyPtr, srcPtr->typePtr, &ir);
}

/*
 *----------------------------------------------------------------------
 *
 * UpdateStringOfByteArray --
 *







|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>





|


|







633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
    Tcl_Obj *srcPtr,		/* Object with internal rep to copy. */
    Tcl_Obj *copyPtr)		/* Object with internal rep to set. */
{
    size_t length;
    ByteArray *srcArrayPtr, *copyArrayPtr;
    Tcl_ObjIntRep ir;

    srcArrayPtr = GET_BYTEARRAY(TclFetchIntRep(srcPtr, &tclByteArrayType));
    length = srcArrayPtr->used;

    copyArrayPtr = ckalloc(BYTEARRAY_SIZE(length));
    copyArrayPtr->used = length;
    copyArrayPtr->allocated = length;
    memcpy(copyArrayPtr->bytes, srcArrayPtr->bytes, (size_t) length);

    SET_BYTEARRAY(&ir, copyArrayPtr);
    Tcl_StoreIntRep(copyPtr, &tclByteArrayType, &ir);
}

static void
DupProperByteArrayInternalRep(
    Tcl_Obj *srcPtr,		/* Object with internal rep to copy. */
    Tcl_Obj *copyPtr)		/* Object with internal rep to set. */
{
    unsigned int length;
    ByteArray *srcArrayPtr, *copyArrayPtr;
    Tcl_ObjIntRep ir;

    srcArrayPtr = GET_BYTEARRAY(TclFetchIntRep(srcPtr, &properByteArrayType));
    length = srcArrayPtr->used;

    copyArrayPtr = Tcl_Alloc(BYTEARRAY_SIZE(length));
    copyArrayPtr->used = length;
    copyArrayPtr->allocated = length;
    memcpy(copyArrayPtr->bytes, srcArrayPtr->bytes, (size_t) length);

    SET_BYTEARRAY(&ir, copyArrayPtr);
    Tcl_StoreIntRep(copyPtr, &properByteArrayType, &ir);
}

/*
 *----------------------------------------------------------------------
 *
 * UpdateStringOfByteArray --
 *
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
 */

static void
UpdateStringOfByteArray(
    Tcl_Obj *objPtr)		/* ByteArray object whose string rep to
				 * update. */
{
    const Tcl_ObjIntRep *irPtr = TclFetchIntRep(objPtr, &tclPureByteArrayType);
    ByteArray *byteArrayPtr = GET_BYTEARRAY(irPtr);
    unsigned char *src = byteArrayPtr->bytes;
    size_t i, length = byteArrayPtr->used;
    size_t size = length;

    /*
     * How much space will string rep need?







|







688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
 */

static void
UpdateStringOfByteArray(
    Tcl_Obj *objPtr)		/* ByteArray object whose string rep to
				 * update. */
{
    const Tcl_ObjIntRep *irPtr = TclFetchIntRep(objPtr, &properByteArrayType);
    ByteArray *byteArrayPtr = GET_BYTEARRAY(irPtr);
    unsigned char *src = byteArrayPtr->bytes;
    size_t i, length = byteArrayPtr->used;
    size_t size = length;

    /*
     * How much space will string rep need?
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
		"TclAppendBytesToByteArray");
    }
    if (len == 0) {
	/* Append zero bytes is a no-op. */
	return;
    }

    irPtr = TclFetchIntRep(objPtr, &tclPureByteArrayType);
    if (irPtr == NULL) {
	irPtr = TclFetchIntRep(objPtr, &tclByteArrayType);
	if (irPtr == NULL) {
	    SetByteArrayFromAny(NULL, objPtr);
	    irPtr = TclFetchIntRep(objPtr, &tclPureByteArrayType);
	    if (irPtr == NULL) {
		irPtr = TclFetchIntRep(objPtr, &tclByteArrayType);
	    }
	}
    }
    byteArrayPtr = GET_BYTEARRAY(irPtr);








|




|







758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
		"TclAppendBytesToByteArray");
    }
    if (len == 0) {
	/* Append zero bytes is a no-op. */
	return;
    }

    irPtr = TclFetchIntRep(objPtr, &properByteArrayType);
    if (irPtr == NULL) {
	irPtr = TclFetchIntRep(objPtr, &tclByteArrayType);
	if (irPtr == NULL) {
	    SetByteArrayFromAny(NULL, objPtr);
	    irPtr = TclFetchIntRep(objPtr, &properByteArrayType);
	    if (irPtr == NULL) {
		irPtr = TclFetchIntRep(objPtr, &tclByteArrayType);
	    }
	}
    }
    byteArrayPtr = GET_BYTEARRAY(irPtr);

Changes to generic/tclInt.h.
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
/*
 * Variables denoting the Tcl object types defined in the core.
 */

MODULE_SCOPE const Tcl_ObjType tclBignumType;
MODULE_SCOPE const Tcl_ObjType tclBooleanType;
MODULE_SCOPE const Tcl_ObjType tclByteArrayType;
MODULE_SCOPE const Tcl_ObjType tclPureByteArrayType;
MODULE_SCOPE const Tcl_ObjType tclByteCodeType;
MODULE_SCOPE const Tcl_ObjType tclDoubleType;
MODULE_SCOPE const Tcl_ObjType tclIntType;
MODULE_SCOPE const Tcl_ObjType tclListType;
MODULE_SCOPE const Tcl_ObjType tclDictType;
MODULE_SCOPE const Tcl_ObjType tclProcBodyType;
MODULE_SCOPE const Tcl_ObjType tclStringType;







<







2717
2718
2719
2720
2721
2722
2723

2724
2725
2726
2727
2728
2729
2730
/*
 * Variables denoting the Tcl object types defined in the core.
 */

MODULE_SCOPE const Tcl_ObjType tclBignumType;
MODULE_SCOPE const Tcl_ObjType tclBooleanType;
MODULE_SCOPE const Tcl_ObjType tclByteArrayType;

MODULE_SCOPE const Tcl_ObjType tclByteCodeType;
MODULE_SCOPE const Tcl_ObjType tclDoubleType;
MODULE_SCOPE const Tcl_ObjType tclIntType;
MODULE_SCOPE const Tcl_ObjType tclListType;
MODULE_SCOPE const Tcl_ObjType tclDictType;
MODULE_SCOPE const Tcl_ObjType tclProcBodyType;
MODULE_SCOPE const Tcl_ObjType tclStringType;
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
 * but we don't do that at the moment since this is purely about efficiency.
 * The ANSI C "prototype" for this macro is:
 *
 * MODULE_SCOPE int	TclIsPureByteArray(Tcl_Obj *objPtr);
 *----------------------------------------------------------------
 */

#define TclIsPureByteArray(objPtr) \
	((objPtr)->typePtr==&tclPureByteArrayType)
#define TclIsPureDict(objPtr) \
	(((objPtr)->bytes==NULL) && ((objPtr)->typePtr==&tclDictType))
#define TclFetchIntRep(objPtr, type) \
	(((objPtr)->typePtr == type) ? &((objPtr)->internalRep) : NULL)


/*







|
<







4672
4673
4674
4675
4676
4677
4678
4679

4680
4681
4682
4683
4684
4685
4686
 * but we don't do that at the moment since this is purely about efficiency.
 * The ANSI C "prototype" for this macro is:
 *
 * MODULE_SCOPE int	TclIsPureByteArray(Tcl_Obj *objPtr);
 *----------------------------------------------------------------
 */

MODULE_SCOPE int	TclIsPureByteArray(Tcl_Obj *objPtr);

#define TclIsPureDict(objPtr) \
	(((objPtr)->bytes==NULL) && ((objPtr)->typePtr==&tclDictType))
#define TclFetchIntRep(objPtr, type) \
	(((objPtr)->typePtr == type) ? &((objPtr)->internalRep) : NULL)


/*
Changes to generic/tclParse.c.
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430

  done:
    if (readPtr != NULL) {
	*readPtr = count;
    }
    count = Tcl_UniCharToUtf(result, dst);
    if (!count) {
	/* Special case for handling upper surrogates. */
	count = Tcl_UniCharToUtf(-1, dst);
    }
    return count;
}

/*
 *----------------------------------------------------------------------







|







1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430

  done:
    if (readPtr != NULL) {
	*readPtr = count;
    }
    count = Tcl_UniCharToUtf(result, dst);
    if (!count) {
	/* Special case for handling high surrogates. */
	count = Tcl_UniCharToUtf(-1, dst);
    }
    return count;
}

/*
 *----------------------------------------------------------------------
Changes to generic/tclStringObj.c.
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
	    int code, length;

	    if (TclGetIntFromObj(interp, segment, &code) != TCL_OK) {
		goto error;
	    }
	    length = Tcl_UniCharToUtf(code, buf);
	    if (!length) {
		/* Special case for handling upper surrogates. */
		length = Tcl_UniCharToUtf(-1, buf);
	    }
	    segment = Tcl_NewStringObj(buf, length);
	    Tcl_IncrRefCount(segment);
	    allocSegment = 1;
	    break;
	}







|







1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
	    int code, length;

	    if (TclGetIntFromObj(interp, segment, &code) != TCL_OK) {
		goto error;
	    }
	    length = Tcl_UniCharToUtf(code, buf);
	    if (!length) {
		/* Special case for handling high surrogates. */
		length = Tcl_UniCharToUtf(-1, buf);
	    }
	    segment = Tcl_NewStringObj(buf, length);
	    Tcl_IncrRefCount(segment);
	    allocSegment = 1;
	    break;
	}
Changes to generic/tclStubInit.c.
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
    Tcl_DStringSetLength(dsPtr, oldLength + (len + 1) * 4);
    result = Tcl_DStringValue(dsPtr) + oldLength;

    p = result;
    wEnd = (wchar_t *)string + len;
    for (w = (wchar_t *)string; w < wEnd; ) {
	if (!blen && ((*w & 0xFC00) != 0xDC00)) {
	    /* Special case for handling upper surrogates. */
	    p += Tcl_UniCharToUtf(-1, p);
	}
	blen = Tcl_UniCharToUtf(*w, p);
	p += blen;
	w++;
    }
    if (!blen) {
	/* Special case for handling upper surrogates. */
	p += Tcl_UniCharToUtf(-1, p);
    }
    Tcl_DStringSetLength(dsPtr, oldLength + (p - result));

    return result;
#else
    return Tcl_UniCharToUtfDString((Tcl_UniChar *)string, len, dsPtr);







|







|







207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
    Tcl_DStringSetLength(dsPtr, oldLength + (len + 1) * 4);
    result = Tcl_DStringValue(dsPtr) + oldLength;

    p = result;
    wEnd = (wchar_t *)string + len;
    for (w = (wchar_t *)string; w < wEnd; ) {
	if (!blen && ((*w & 0xFC00) != 0xDC00)) {
	    /* Special case for handling high surrogates. */
	    p += Tcl_UniCharToUtf(-1, p);
	}
	blen = Tcl_UniCharToUtf(*w, p);
	p += blen;
	w++;
    }
    if (!blen) {
	/* Special case for handling high surrogates. */
	p += Tcl_UniCharToUtf(-1, p);
    }
    Tcl_DStringSetLength(dsPtr, oldLength + (p - result));

    return result;
#else
    return Tcl_UniCharToUtfDString((Tcl_UniChar *)string, len, dsPtr);
Changes to generic/tclUtf.c.
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
    Tcl_DStringSetLength(dsPtr, oldLength + (uniLength + 1) * 4);
    string = Tcl_DStringValue(dsPtr) + oldLength;

    p = string;
    wEnd = uniStr + uniLength;
    for (w = uniStr; w < wEnd; ) {
	if (!len && ((*w & 0xFC00) != 0xDC00)) {
	    /* Special case for handling upper surrogates. */
	    p += Tcl_UniCharToUtf(-1, p);
	}
	len = Tcl_UniCharToUtf(*w, p);
	p += len;
	w++;
    }
    if (!len) {
	/* Special case for handling upper surrogates. */
	p += Tcl_UniCharToUtf(-1, p);
    }
    Tcl_DStringSetLength(dsPtr, oldLength + (p - string));

    return string;
}








|







|







229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
    Tcl_DStringSetLength(dsPtr, oldLength + (uniLength + 1) * 4);
    string = Tcl_DStringValue(dsPtr) + oldLength;

    p = string;
    wEnd = uniStr + uniLength;
    for (w = uniStr; w < wEnd; ) {
	if (!len && ((*w & 0xFC00) != 0xDC00)) {
	    /* Special case for handling high surrogates. */
	    p += Tcl_UniCharToUtf(-1, p);
	}
	len = Tcl_UniCharToUtf(*w, p);
	p += len;
	w++;
    }
    if (!len) {
	/* Special case for handling high surrogates. */
	p += Tcl_UniCharToUtf(-1, p);
    }
    Tcl_DStringSetLength(dsPtr, oldLength + (p - string));

    return string;
}

767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
#else
	src += TclUtfToUniChar(src, &ch);
#endif
    }
    fullchar = ch;
#if TCL_UTF_MAX <= 4
     if (!len) {
	/* If last Tcl_UniChar was an upper surrogate, combine with lower surrogate */
	(void)TclUtfToUniChar(src, &ch);
	fullchar = (((fullchar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000;
    }
#endif
    return fullchar;
}








|







767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
#else
	src += TclUtfToUniChar(src, &ch);
#endif
    }
    fullchar = ch;
#if TCL_UTF_MAX <= 4
     if (!len) {
	/* If last Tcl_UniChar was a high surrogate, combine with low surrogate */
	(void)TclUtfToUniChar(src, &ch);
	fullchar = (((fullchar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000;
    }
#endif
    return fullchar;
}

Changes to tests/utf.test.
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
} 1
test utf-1.6 {Tcl_UniCharToUtf: negative Tcl_UniChar} testbytestring {
    expr {[format %c -1] eq [testbytestring "\xef\xbf\xbd"]}
} 1
test utf-1.7 {Tcl_UniCharToUtf: 4 byte sequences} -constraints testbytestring -body {
    expr {"\U014e4e" eq [testbytestring "\xf0\x94\xb9\x8e"]}
} -result 1
test utf-1.8 {Tcl_UniCharToUtf: 3 byte sequence, upper surrogate} testbytestring {
    expr {"\ud842" eq [testbytestring "\xed\xa1\x82"]}
} 1
test utf-1.9 {Tcl_UniCharToUtf: 3 byte sequence, lower surrogate} testbytestring {
    expr {"\udc42" eq [testbytestring "\xed\xb1\x82"]}
} 1
test utf-1.10 {Tcl_UniCharToUtf: 3 byte sequence, upper surrogate} testbytestring {
    expr {[format %c 0xd842] eq [testbytestring "\xed\xa1\x82"]}
} 1
test utf-1.11 {Tcl_UniCharToUtf: 3 byte sequence, lower surrogate} testbytestring {
    expr {[format %c 0xdc42] eq [testbytestring "\xed\xb1\x82"]}
} 1

test utf-2.1 {Tcl_UtfToUniChar: low ascii} {
    string length "abc"
} {3}
test utf-2.2 {Tcl_UtfToUniChar: naked trail bytes} testbytestring {







|


|


|


|







40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
} 1
test utf-1.6 {Tcl_UniCharToUtf: negative Tcl_UniChar} testbytestring {
    expr {[format %c -1] eq [testbytestring "\xef\xbf\xbd"]}
} 1
test utf-1.7 {Tcl_UniCharToUtf: 4 byte sequences} -constraints testbytestring -body {
    expr {"\U014e4e" eq [testbytestring "\xf0\x94\xb9\x8e"]}
} -result 1
test utf-1.8 {Tcl_UniCharToUtf: 3 byte sequence, high surrogate} testbytestring {
    expr {"\ud842" eq [testbytestring "\xed\xa1\x82"]}
} 1
test utf-1.9 {Tcl_UniCharToUtf: 3 byte sequence, low surrogate} testbytestring {
    expr {"\udc42" eq [testbytestring "\xed\xb1\x82"]}
} 1
test utf-1.10 {Tcl_UniCharToUtf: 3 byte sequence, high surrogate} testbytestring {
    expr {[format %c 0xd842] eq [testbytestring "\xed\xa1\x82"]}
} 1
test utf-1.11 {Tcl_UniCharToUtf: 3 byte sequence, low surrogate} testbytestring {
    expr {[format %c 0xdc42] eq [testbytestring "\xed\xb1\x82"]}
} 1

test utf-2.1 {Tcl_UtfToUniChar: low ascii} {
    string length "abc"
} {3}
test utf-2.2 {Tcl_UtfToUniChar: naked trail bytes} testbytestring {
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
} "\u4e4e"
test utf-8.3 {Tcl_UniCharAtIndex: index > 0} {
    string index abcd 2
} {c}
test utf-8.4 {Tcl_UniCharAtIndex: index > 0} {
    string index \u4e4e\u25a\xff\u543 2
} "\uff"
test utf-8.5 {Tcl_UniCharAtIndex: upper surrogate} {
    string index \ud842 0
} "\ud842"
test utf-8.5 {Tcl_UniCharAtIndex: lower surrogate} {
    string index \udc42 0
} "\udc42"

test utf-9.1 {Tcl_UtfAtIndex: index = 0} {
    string range abcd 0 2
} {abc}
test utf-9.2 {Tcl_UtfAtIndex: index > 0} {







|


|







154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
} "\u4e4e"
test utf-8.3 {Tcl_UniCharAtIndex: index > 0} {
    string index abcd 2
} {c}
test utf-8.4 {Tcl_UniCharAtIndex: index > 0} {
    string index \u4e4e\u25a\xff\u543 2
} "\uff"
test utf-8.5 {Tcl_UniCharAtIndex: high surrogate} {
    string index \ud842 0
} "\ud842"
test utf-8.5 {Tcl_UniCharAtIndex: low surrogate} {
    string index \udc42 0
} "\udc42"

test utf-9.1 {Tcl_UtfAtIndex: index = 0} {
    string range abcd 0 2
} {abc}
test utf-9.2 {Tcl_UtfAtIndex: index > 0} {
Changes to win/tclWin32Dll.c.
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
    Tcl_DStringSetLength(dsPtr, oldLength + (len + 1) * 4);
    result = Tcl_DStringValue(dsPtr) + oldLength;

    p = result;
    wEnd = (TCHAR *)string + len;
    for (w = (TCHAR *)string; w < wEnd; ) {
	if (!blen && ((*w & 0xFC00) != 0xDC00)) {
	    /* Special case for handling upper surrogates. */
	    p += Tcl_UniCharToUtf(-1, p);
	}
	blen = Tcl_UniCharToUtf(*w, p);
	p += blen;
	w++;
    }
    if (!blen) {
	/* Special case for handling upper surrogates. */
	p += Tcl_UniCharToUtf(-1, p);
    }
    Tcl_DStringSetLength(dsPtr, oldLength + (p - result));

    return result;
#else
    return Tcl_UniCharToUtfDString((Tcl_UniChar *)string, len, dsPtr);







|







|







563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
    Tcl_DStringSetLength(dsPtr, oldLength + (len + 1) * 4);
    result = Tcl_DStringValue(dsPtr) + oldLength;

    p = result;
    wEnd = (TCHAR *)string + len;
    for (w = (TCHAR *)string; w < wEnd; ) {
	if (!blen && ((*w & 0xFC00) != 0xDC00)) {
	    /* Special case for handling high surrogates. */
	    p += Tcl_UniCharToUtf(-1, p);
	}
	blen = Tcl_UniCharToUtf(*w, p);
	p += blen;
	w++;
    }
    if (!blen) {
	/* Special case for handling high surrogates. */
	p += Tcl_UniCharToUtf(-1, p);
    }
    Tcl_DStringSetLength(dsPtr, oldLength + (p - result));

    return result;
#else
    return Tcl_UniCharToUtfDString((Tcl_UniChar *)string, len, dsPtr);
Changes to win/tclWinPipe.c.
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
	}

	quote &= ~(CL_ESCAPE|CL_QUOTE); /* reset escape flags */
	bspos = NULL;
	if (arg[0] == '\0') {
	    quote = CL_QUOTE;
	} else {
	    int count;
	    Tcl_UniChar ch;
	    for (start = arg;
		*start != '\0' &&
		    (quote & (CL_ESCAPE|CL_QUOTE)) != (CL_ESCAPE|CL_QUOTE);
		start += count
	    ) {
		count = Tcl_UtfToUniChar(start, &ch);
		if (count > 1) continue;
		if (Tcl_UniCharIsSpace(ch)) {
		    quote |= CL_QUOTE; /* quote only */
		    if (bspos) { /* if backslash found - escape & quote */
			quote |= CL_ESCAPE;
			break;
		    }
		    continue;
		}







<
<



|

<
|
|







1550
1551
1552
1553
1554
1555
1556


1557
1558
1559
1560
1561

1562
1563
1564
1565
1566
1567
1568
1569
1570
	}

	quote &= ~(CL_ESCAPE|CL_QUOTE); /* reset escape flags */
	bspos = NULL;
	if (arg[0] == '\0') {
	    quote = CL_QUOTE;
	} else {


	    for (start = arg;
		*start != '\0' &&
		    (quote & (CL_ESCAPE|CL_QUOTE)) != (CL_ESCAPE|CL_QUOTE);
		start++
	    ) {

		if (*start & 0x80) continue;
		if (TclIsSpaceProc(*start)) {
		    quote |= CL_QUOTE; /* quote only */
		    if (bspos) { /* if backslash found - escape & quote */
			quote |= CL_ESCAPE;
			break;
		    }
		    continue;
		}