Tcl Source Code

Check-in [4a7b807856]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Make Tcl_AppendObjToObj more efficient and avoid unnecessarily generating a string representation when the object to append to is the empty string.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | main
Files: files | file ages | folders
SHA3-256: 4a7b807856c2753acf673d66afd2e501d3a70d0bad67e7341ecf4b1a9760181a
User & Date: pooryorick 2023-04-21 20:32:38
References
2023-04-23
11:28
Add bytearray checking to TclCheckEmptyString(), and then use TclCheckEmptyString() in Tcl_AppendOb... check-in: a61fef8429 user: pooryorick tags: pyk-whatever
2023-04-22
16:29
Backout [4a7b807856], It breaks the build. See: [https://github.com/tcltk/tcl/actions/runs/477158685... check-in: 1d3415a44e user: jan.nijtmans tags: trunk, main
Context
2023-04-22
16:29
Backout [4a7b807856], It breaks the build. See: [https://github.com/tcltk/tcl/actions/runs/477158685... check-in: 1d3415a44e user: jan.nijtmans tags: trunk, main
2023-04-21
20:39
Merge 8.7 check-in: 9cbb0b212f user: jan.nijtmans tags: trunk, main
20:32
Make Tcl_AppendObjToObj more efficient and avoid unnecessarily generating a string representation wh... check-in: 4a7b807856 user: pooryorick tags: trunk, main
20:26
Add testcases check-in: aaf4d33fc7 user: jan.nijtmans tags: trunk, main
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to doc/StringObj.3.

111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
returned as a new value. If negative, behave the same as if the
value was 0.
.AP Tcl_Size last in
The index of the last Unicode character in the Unicode range to be
returned as a new value. If negative, take all characters up to
the last one available.
.AP Tcl_Obj *objPtr in/out
Points to a value to manipulate.
.AP Tcl_Obj *appendObjPtr in
The value to append to \fIobjPtr\fR in \fBTcl_AppendObjToObj\fR.
.AP "Tcl_Size \&| int" *lengthPtr out
The location where \fBTcl_GetStringFromObj\fR will store the length
of a value's string representation. May be (int *)NULL when not used.
.AP "const char" *string in
Null-terminated string value to append to \fIobjPtr\fR.







|







111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
returned as a new value. If negative, behave the same as if the
value was 0.
.AP Tcl_Size last in
The index of the last Unicode character in the Unicode range to be
returned as a new value. If negative, take all characters up to
the last one available.
.AP Tcl_Obj *objPtr in/out
A pointer to a value to read, or to an unshared value to modify.
.AP Tcl_Obj *appendObjPtr in
The value to append to \fIobjPtr\fR in \fBTcl_AppendObjToObj\fR.
.AP "Tcl_Size \&| int" *lengthPtr out
The location where \fBTcl_GetStringFromObj\fR will store the length
of a value's string representation. May be (int *)NULL when not used.
.AP "const char" *string in
Null-terminated string value to append to \fIobjPtr\fR.

Changes to generic/tclStringObj.c.

547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
 *	Fills unichar with the index'th Unicode character.
 *
 *----------------------------------------------------------------------
 */

int
Tcl_GetUniChar(
    Tcl_Obj *objPtr,		/* The object to get the Unicode charater
				 * from. */
    Tcl_Size index)		/* Get the index'th Unicode character. */
{
    String *stringPtr;
    int ch;

    if (index < 0) {
	return -1;
    }

    /*
     * Optimize the case where we're really dealing with a ByteArray object
     * we don't need to convert to a string to perform the indexing operation.
     */

    if (TclIsPureByteArray(objPtr)) {
	Tcl_Size length = 0;
	unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &length);
	if (index >= length) {
		return -1;
	}

	return bytes[index];
    }

    /*
     * OK, need to work with the object as a string.
     */

    SetStringFromAny(NULL, objPtr);
    stringPtr = GET_STRING(objPtr);

    if (stringPtr->hasUnicode == 0) {
	/*







|
<
|









|
|













|







547
548
549
550
551
552
553
554

555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
 *	Fills unichar with the index'th Unicode character.
 *
 *----------------------------------------------------------------------
 */

int
Tcl_GetUniChar(
    Tcl_Obj *objPtr,	/* The object to get the Unicode character from. */

    Tcl_Size index)	/* Get the index'th Unicode character. */
{
    String *stringPtr;
    int ch;

    if (index < 0) {
	return -1;
    }

    /*
     * For a ByteArray object there is no need to convert to a string to
     * perform the indexing operation.
     */

    if (TclIsPureByteArray(objPtr)) {
	Tcl_Size length = 0;
	unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &length);
	if (index >= length) {
		return -1;
	}

	return bytes[index];
    }

    /*
     * Must work with the object as a string.
     */

    SetStringFromAny(NULL, objPtr);
    stringPtr = GET_STRING(objPtr);

    if (stringPtr->hasUnicode == 0) {
	/*
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
    }
#endif
    return ch;
}

int
TclGetUniChar(
    Tcl_Obj *objPtr,		/* The object to get the Unicode charater
				 * from. */
    Tcl_Size index)		/* Get the index'th Unicode character. */
{
    int ch = 0;

    if (index < 0) {
	return -1;
    }








|
<
|







619
620
621
622
623
624
625
626

627
628
629
630
631
632
633
634
    }
#endif
    return ch;
}

int
TclGetUniChar(
    Tcl_Obj *objPtr,	/* The object to get the Unicode character from. */

    Tcl_Size index)	/* Get the index'th Unicode character. */
{
    int ch = 0;

    if (index < 0) {
	return -1;
    }

1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437

1438

1439

1440
1441
1442




1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
    }
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_AppendObjToObj --
 *
 *	This function appends the string rep of one object to another.
 *	"objPtr" cannot be a shared object.
 *
 * Results:
 *	None.
 *
 * Side effects:
 *	The string rep of appendObjPtr is appended to the string
 *	representation of objPtr.
 *	IMPORTANT: This routine does not and MUST NOT shimmer appendObjPtr.
 *	Callers are counting on that.
 *
 *----------------------------------------------------------------------
 */

void
Tcl_AppendObjToObj(
    Tcl_Obj *objPtr,		/* Points to the object to append to. */
    Tcl_Obj *appendObjPtr)	/* Object to append. */
{
    String *stringPtr;
    Tcl_Size length = 0, numChars;
    Tcl_Size appendNumChars = TCL_INDEX_NONE;
    const char *bytes;

    /*
     * Special case: second object is standard-empty is fast case. We know
     * that appending nothing to anything leaves that starting anything...
     */



    if (appendObjPtr->bytes == &tclEmptyString) {

	return;
    }





    /*
     * Handle append of one ByteArray object to another as a special case.
     * Note that we only do this when the objects are pure so that the
     * bytearray faithfully represent the true value; Otherwise appending the
     * byte arrays together could lose information;
     */

    if ((TclIsPureByteArray(objPtr) || objPtr->bytes == &tclEmptyString)
	    && TclIsPureByteArray(appendObjPtr)) {
	/*
	 * You might expect the code here to be
	 *
	 *  bytes = Tcl_GetByteArrayFromObj(appendObjPtr, &length);
	 *  TclAppendBytesToByteArray(objPtr, bytes, length);
	 *
	 * and essentially all of the time that would be fine. However, it
	 * would run into trouble in the case where objPtr and appendObjPtr
	 * point to the same thing. That may never be a good idea. It seems to







<
|
<





<
<
|







|
|






<
<
|
<
>
|
>
|
>



>
>
>
>
|
<
|
|
|
|

<
<

|







1399
1400
1401
1402
1403
1404
1405

1406

1407
1408
1409
1410
1411


1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427


1428

1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441

1442
1443
1444
1445
1446


1447
1448
1449
1450
1451
1452
1453
1454
1455
    }
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_AppendObjToObj --

 *	Appends the value of apppendObjPtr to objPtr, which must not be shared.

 *
 * Results:
 *	None.
 *
 * Side effects:


 *	IMPORTANT: Does not and MUST NOT shimmer appendObjPtr.
 *	Callers are counting on that.
 *
 *----------------------------------------------------------------------
 */

void
Tcl_AppendObjToObj(
    Tcl_Obj *objPtr,		/* Points to the value to append to. */
    Tcl_Obj *appendObjPtr)	/* The value to append. */
{
    String *stringPtr;
    Tcl_Size length = 0, numChars;
    Tcl_Size appendNumChars = TCL_INDEX_NONE;
    const char *bytes;



    if (appendObjPtr->bytes == &tclEmptyString) {

	return;
    }

    if (objPtr->bytes == &tclEmptyString) {
	TclSetDuplicateObj(objPtr, appendObjPtr);
	return;
    }

    if (
	TclIsPureByteArray(appendObjPtr)
	&& (TclIsPureByteArray(objPtr) || objPtr->bytes == &tclEmptyString)
    ) {
	/*

	 * Both bytearray objects are pure.  Therefore they faithfully
	 * represent the true values, making it safe to append the second
	 * bytearray to the first.
	 */



	/*
	 * One might expect the code here to be
	 *
	 *  bytes = Tcl_GetByteArrayFromObj(appendObjPtr, &length);
	 *  TclAppendBytesToByteArray(objPtr, bytes, length);
	 *
	 * and essentially all of the time that would be fine. However, it
	 * would run into trouble in the case where objPtr and appendObjPtr
	 * point to the same thing. That may never be a good idea. It seems to
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
	Tcl_UniChar *dst;

	if (inPlace && !Tcl_IsShared(*objv)) {
	    Tcl_Size start;

	    objResultPtr = *objv++; objc--;

	    /* Ugly interface! Force resize of the unicode array. */
	    (void)Tcl_GetUnicodeFromObj(objResultPtr, &start);
	    Tcl_InvalidateStringRep(objResultPtr);
	    if (0 == Tcl_AttemptSetObjLength(objResultPtr, length)) {
		if (interp) {
		    Tcl_SetObjResult(interp, Tcl_ObjPrintf(
		    	"concatenation failed: unable to alloc %"
			TCL_Z_MODIFIER "u bytes",







|







3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
	Tcl_UniChar *dst;

	if (inPlace && !Tcl_IsShared(*objv)) {
	    Tcl_Size start;

	    objResultPtr = *objv++; objc--;

	    /* Ugly interface! Force resize of the Unicode array. */
	    (void)Tcl_GetUnicodeFromObj(objResultPtr, &start);
	    Tcl_InvalidateStringRep(objResultPtr);
	    if (0 == Tcl_AttemptSetObjLength(objResultPtr, length)) {
		if (interp) {
		    Tcl_SetObjResult(interp, Tcl_ObjPrintf(
		    	"concatenation failed: unable to alloc %"
			TCL_Z_MODIFIER "u bytes",
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
 *	Reallocates the String internal rep.
 *
 *---------------------------------------------------------------------------
 */

static void
FillUnicodeRep(
    Tcl_Obj *objPtr)		/* The object in which to fill the unicode
				 * rep. */
{
    String *stringPtr = GET_STRING(objPtr);

    ExtendUnicodeRepWithString(objPtr, objPtr->bytes, objPtr->length,
	    stringPtr->numChars);
}







|







4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
 *	Reallocates the String internal rep.
 *
 *---------------------------------------------------------------------------
 */

static void
FillUnicodeRep(
    Tcl_Obj *objPtr)		/* The object in which to fill the Unicode
				 * rep. */
{
    String *stringPtr = GET_STRING(objPtr);

    ExtendUnicodeRepWithString(objPtr, objPtr->bytes, objPtr->length,
	    stringPtr->numChars);
}