Tcl Source Code

Check-in [6758341b2f]
Login
Bounty program for improvements to Tcl and certain Tcl packages.
Tcl 2019 Conference, Houston/TX, US, Nov 4-8
Send your abstracts to [email protected]
or submit via the online form by Sep 9.

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add test-cases for bug [11ae2be95dac9417], and make a start fixing it. Almost works.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | tip-389
Files: files | file ages | folders
SHA3-256: 6758341b2f5181c3171d95c3fe47736f8943ec8dc2a602b64e264bd190880175
User & Date: jan.nijtmans 2018-01-11 14:11:38
Context
2018-01-11
15:59
merge core-8-branch. Remove left-over debugging code. Test-case string-5.21 still fails. check-in: ad24e66451 user: jan.nijtmans tags: tip-389
14:11
Add test-cases for bug [11ae2be95dac9417], and make a start fixing it. Almost works. check-in: 6758341b2f user: jan.nijtmans tags: tip-389
2018-01-10
14:03
merge core-8-branch check-in: d43a6a594e user: jan.nijtmans tags: tip-389
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to generic/tclCmdMZ.c.

1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447








1448
1449
1450
1451
1452
1453
1454
    if (objc != 3) {
	Tcl_WrongNumArgs(interp, 1, objv, "string charIndex");
	return TCL_ERROR;
    }

    /*
     * Get the char length to calulate what 'end' means.
     */

    length = Tcl_GetCharLength(objv[1]);
    if (TclGetIntForIndexM(interp, objv[2], length-1, &index) != TCL_OK) {
	return TCL_ERROR;
    }

    if ((index >= 0) && (index < length)) {
	Tcl_UniChar ch = Tcl_GetUniChar(objv[1], index);









	/*
	 * If we have a ByteArray object, we're careful to generate a new
	 * bytearray for a result.
	 */

	if (TclIsPureByteArray(objv[1])) {






|








|
>
>
>
>
>
>
>
>







1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
    if (objc != 3) {
	Tcl_WrongNumArgs(interp, 1, objv, "string charIndex");
	return TCL_ERROR;
    }

    /*
     * Get the char length to calculate what 'end' means.
     */

    length = Tcl_GetCharLength(objv[1]);
    if (TclGetIntForIndexM(interp, objv[2], length-1, &index) != TCL_OK) {
	return TCL_ERROR;
    }

    if ((index >= 0) && (index < length)) {
	int ch = Tcl_GetUniChar(objv[1], index);

	if (ch >= 0x10000) {
	    printf("HI: %x\n", ch);
	}
	if (ch == -1) {
	    printf("LO: %x\n", ch);
	    return TCL_OK;
	}

	/*
	 * If we have a ByteArray object, we're careful to generate a new
	 * bytearray for a result.
	 */

	if (TclIsPureByteArray(objv[1])) {

Changes to generic/tclStringObj.c.

456
457
458
459
460
461
462
463

464
465
466
467
468
469
470
...
474
475
476
477
478
479
480

481
482
483
484
485
486
487
...
508
509
510
511
512
513
514

515















516
517
518
519
520
521
522
...
652
653
654
655
656
657
658











659
660
661
662
663
664
665
 
/*
 *----------------------------------------------------------------------
 *
 * Tcl_GetUniChar --
 *
 *	Get the index'th Unicode character from the String object. The index
 *	is assumed to be in the appropriate range.

 *
 * Results:
 *	Returns the index'th Unicode character in the Object.
 *
 * Side effects:
 *	Fills unichar with the index'th Unicode character.
 *
................................................................................
int
Tcl_GetUniChar(
    Tcl_Obj *objPtr,		/* The object to get the Unicode charater
				 * from. */
    int index)			/* Get the index'th Unicode character. */
{
    String *stringPtr;


    /*
     * Optimize the case where we're really dealing with a bytearray object
     * without string representation; we don't need to convert to a string to
     * perform the indexing operation.
     */

................................................................................
	}
	if (stringPtr->numChars == objPtr->length) {
	    return (Tcl_UniChar) objPtr->bytes[index];
	}
	FillUnicodeRep(objPtr);
	stringPtr = GET_STRING(objPtr);
    }

    return (int) stringPtr->unicode[index];















}
 
/*
 *----------------------------------------------------------------------
 *
 * Tcl_GetUnicode --
 *
................................................................................
	    stringPtr->numChars = newObjPtr->length;
	    return newObjPtr;
	}
	FillUnicodeRep(objPtr);
	stringPtr = GET_STRING(objPtr);
    }












    return Tcl_NewUnicodeObj(stringPtr->unicode + first, last-first+1);
}
 
/*
 *----------------------------------------------------------------------
 *
 * Tcl_SetStringObj --






|
>







 







>







 







>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







>
>
>
>
>
>
>
>
>
>
>







456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
...
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
...
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
...
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
 
/*
 *----------------------------------------------------------------------
 *
 * Tcl_GetUniChar --
 *
 *	Get the index'th Unicode character from the String object. The index
 *	is assumed to be in the appropriate range. If index references a lower
 *	surrogate preceded by a higher surrogate, the result = -1;
 *
 * Results:
 *	Returns the index'th Unicode character in the Object.
 *
 * Side effects:
 *	Fills unichar with the index'th Unicode character.
 *
................................................................................
int
Tcl_GetUniChar(
    Tcl_Obj *objPtr,		/* The object to get the Unicode charater
				 * from. */
    int index)			/* Get the index'th Unicode character. */
{
    String *stringPtr;
    int ch;

    /*
     * Optimize the case where we're really dealing with a bytearray object
     * without string representation; we don't need to convert to a string to
     * perform the indexing operation.
     */

................................................................................
	}
	if (stringPtr->numChars == objPtr->length) {
	    return (Tcl_UniChar) objPtr->bytes[index];
	}
	FillUnicodeRep(objPtr);
	stringPtr = GET_STRING(objPtr);
    }

	ch = stringPtr->unicode[index];
#if TCL_UTF_MAX == 4
	/* See: bug [11ae2be95dac9417] */
	if ((ch&0xF800) == 0xD800) {
	    if (ch&0x400) {
		if ((index > 0) && ((stringPtr->unicode[index-1]&0xFC00) == 0xD800)) {
		    ch = -1; /* low surrogate preceded by high surrogate */
		}
	    } else if ((++index < stringPtr->numChars)
		    && ((stringPtr->unicode[index]&0xFC00) == 0xDC00)) {
		/* high surrogate followed by low surrogate */
		ch = (((ch & 0x3FF) << 10) | (stringPtr->unicode[index] & 0x3FF)) + 0x10000;
	    }
	}
#endif
    return ch;
}
 
/*
 *----------------------------------------------------------------------
 *
 * Tcl_GetUnicode --
 *
................................................................................
	    stringPtr->numChars = newObjPtr->length;
	    return newObjPtr;
	}
	FillUnicodeRep(objPtr);
	stringPtr = GET_STRING(objPtr);
    }

#if TCL_UTF_MAX == 4
	/* See: bug [11ae2be95dac9417] */
	if ((first>0) && ((stringPtr->unicode[first]&0xFC00) == 0xDC00)
		&& ((stringPtr->unicode[first-1]&0xFC00) == 0xD800)) {
	    ++first;
	}
	if ((last+1<stringPtr->numChars) && ((stringPtr->unicode[last+1]&0xFC00) == 0xDC00)
		&& ((stringPtr->unicode[last]&0xFC00) == 0xD800)) {
	    ++last;
	}
#endif
    return Tcl_NewUnicodeObj(stringPtr->unicode + first, last-first+1);
}
 
/*
 *----------------------------------------------------------------------
 *
 * Tcl_SetStringObj --

Changes to tests/string.test.

20
21
22
23
24
25
26

27
28
29
30
31
32
33
...
298
299
300
301
302
303
304



305
306
307
308
309
310
311
....
1284
1285
1286
1287
1288
1289
1290



1291
1292
1293
1294
1295
1296
1297
::tcltest::loadTestedCommands
catch [list package require -exact Tcltest [info patchlevel]]

# Some tests require the testobj command

testConstraint testobj [expr {[info commands testobj] != {}}]
testConstraint testindexobj [expr {[info commands testindexobj] != {}}]


# Used for constraining memory leak tests
testConstraint memory [llength [info commands memory]]

proc representationpoke s {
    set r [::tcl::unsupported::representation $s]
    list [lindex $r 3] [string match {*, string representation "*"} $r]
................................................................................
} -match glob -result {1 {*invalid octal number*}}
test string-5.19 {string index, bytearray object out of bounds} {
    string index [binary format I* {0x50515253 0x52}] -1
} {}
test string-5.20 {string index, bytearray object out of bounds} {
    string index [binary format I* {0x50515253 0x52}] 20
} {}





proc largest_int {} {
    # This will give us what the largest valid int on this machine is,
    # so we can test for overflow properly below on >32 bit systems
    set int 1
    set exp 7; # assume we get at least 8 bits
................................................................................
    list $input_hex $rxBuffer_hex $rxCRC_hex
} {000341 000341 0341}
test string-12.22 {string range, shimmering binary/index} {
    set s 0000000001
    binary scan $s a* x
    string range $s $s end
} 000000001




test string-13.1 {string repeat} {
    list [catch {string repeat} msg] $msg
} {1 {wrong # args: should be "string repeat string count"}}
test string-13.2 {string repeat} {
    list [catch {string repeat abc 10 oops} msg] $msg
} {1 {wrong # args: should be "string repeat string count"}}






>







 







>
>
>







 







>
>
>







20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
...
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
....
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
::tcltest::loadTestedCommands
catch [list package require -exact Tcltest [info patchlevel]]

# Some tests require the testobj command

testConstraint testobj [expr {[info commands testobj] != {}}]
testConstraint testindexobj [expr {[info commands testindexobj] != {}}]
testConstraint fullutf [expr {[format %c 0x010000] != "\ufffd"}]

# Used for constraining memory leak tests
testConstraint memory [llength [info commands memory]]

proc representationpoke s {
    set r [::tcl::unsupported::representation $s]
    list [lindex $r 3] [string match {*, string representation "*"} $r]
................................................................................
} -match glob -result {1 {*invalid octal number*}}
test string-5.19 {string index, bytearray object out of bounds} {
    string index [binary format I* {0x50515253 0x52}] -1
} {}
test string-5.20 {string index, bytearray object out of bounds} {
    string index [binary format I* {0x50515253 0x52}] 20
} {}
test string-5.21 {string index, surrogates, bug [11ae2be95dac9417]} fullutf {
    list [string index a\U100000b 1] [string index a\U100000b 2] [string index a\U100000b 3]
} [list \U100000 {} b]


proc largest_int {} {
    # This will give us what the largest valid int on this machine is,
    # so we can test for overflow properly below on >32 bit systems
    set int 1
    set exp 7; # assume we get at least 8 bits
................................................................................
    list $input_hex $rxBuffer_hex $rxCRC_hex
} {000341 000341 0341}
test string-12.22 {string range, shimmering binary/index} {
    set s 0000000001
    binary scan $s a* x
    string range $s $s end
} 000000001
test string-12.23 {string range, surrogates, bug [11ae2be95dac9417]} fullutf {
    list [string range a\U100000b 1 1] [string range a\U100000b 2 2] [string range a\U100000b 3 3]
} [list \U100000 {} b]

test string-13.1 {string repeat} {
    list [catch {string repeat} msg] $msg
} {1 {wrong # args: should be "string repeat string count"}}
test string-13.2 {string repeat} {
    list [catch {string repeat abc 10 oops} msg] $msg
} {1 {wrong # args: should be "string repeat string count"}}