Tcl Source Code

Check-in [4c5d9c6963]
Login
Bounty program for improvements to Tcl and certain Tcl packages.
Tcl 2019 Conference, Houston/TX, US, Nov 4-8
Send your abstracts to [email protected]
or submit via the online form by Sep 9.

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:fixes [408568] "variable substitution parsing limited to ASCII alphanumerics"
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | bug-408568
Files: files | file ages | folders
SHA3-256: 4c5d9c6963a24454e4096c618e02e5edbd4ca3340bef7dda1f945c4616d33787
User & Date: sebres 2019-03-08 00:44:23
Context
2019-03-08
01:17
fixed test parse-12.26 - variable now Leaf check-in: 32568cdfa1 user: sebres tags: bug-408568
00:44
fixes [408568] "variable substitution parsing limited to ASCII alphanumerics" check-in: 4c5d9c6963 user: sebres tags: bug-408568
00:34
test case for [408568] "variable substitution parsing limited to ASCII alphanumerics": illustrating ... check-in: 745068c247 user: sebres tags: bug-408568
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to generic/tclInt.h.

2576
2577
2578
2579
2580
2581
2582

2583
2584
2585
2586
2587
2588
2589
MODULE_SCOPE Tcl_Obj *	TclGetProcessGlobalValue(ProcessGlobalValue *pgvPtr);
MODULE_SCOPE const char *TclGetSrcInfoForCmd(Interp *iPtr, int *lenPtr);
MODULE_SCOPE char *	TclGetStringStorage(Tcl_Obj *objPtr,
			    unsigned int *sizePtr);
MODULE_SCOPE int	TclGlob(Tcl_Interp *interp, char *pattern,
			    Tcl_Obj *unquotedPrefix, int globFlags,
			    Tcl_GlobTypeData *types);

MODULE_SCOPE int	TclIncrObj(Tcl_Interp *interp, Tcl_Obj *valuePtr,
			    Tcl_Obj *incrPtr);
MODULE_SCOPE Tcl_Obj *	TclIncrObjVar2(Tcl_Interp *interp, Tcl_Obj *part1Ptr,
			    Tcl_Obj *part2Ptr, Tcl_Obj *incrPtr, int flags);
MODULE_SCOPE int	TclInfoExistsCmd(ClientData dummy, Tcl_Interp *interp,
			    int objc, Tcl_Obj *const objv[]);
MODULE_SCOPE Tcl_Obj *	TclInfoFrame(Tcl_Interp *interp, CmdFrame *framePtr);






>







2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
MODULE_SCOPE Tcl_Obj *	TclGetProcessGlobalValue(ProcessGlobalValue *pgvPtr);
MODULE_SCOPE const char *TclGetSrcInfoForCmd(Interp *iPtr, int *lenPtr);
MODULE_SCOPE char *	TclGetStringStorage(Tcl_Obj *objPtr,
			    unsigned int *sizePtr);
MODULE_SCOPE int	TclGlob(Tcl_Interp *interp, char *pattern,
			    Tcl_Obj *unquotedPrefix, int globFlags,
			    Tcl_GlobTypeData *types);
MODULE_SCOPE size_t	TclFindUtfBarewordEnd(const char *src, size_t numBytes);
MODULE_SCOPE int	TclIncrObj(Tcl_Interp *interp, Tcl_Obj *valuePtr,
			    Tcl_Obj *incrPtr);
MODULE_SCOPE Tcl_Obj *	TclIncrObjVar2(Tcl_Interp *interp, Tcl_Obj *part1Ptr,
			    Tcl_Obj *part2Ptr, Tcl_Obj *incrPtr, int flags);
MODULE_SCOPE int	TclInfoExistsCmd(ClientData dummy, Tcl_Interp *interp,
			    int objc, Tcl_Obj *const objv[]);
MODULE_SCOPE Tcl_Obj *	TclInfoFrame(Tcl_Interp *interp, CmdFrame *framePtr);

Changes to generic/tclParse.c.

624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
...
661
662
663
664
665
666
667














































668
669
670
671
672
673
674
....
1466
1467
1468
1469
1470
1471
1472
1473

1474
1475
1476


1477
1478
1479
1480
1481
1482
1483
{
    return CHAR_TYPE(byte) & (TYPE_SPACE) || byte == '\n';
}
 
/*
 *----------------------------------------------------------------------
 *
 * TclIsBareword--
 *
 *	Report whether byte is one that can be part of a "bareword".
 *	This concept is named in expression parsing, where it determines
 *	what can be a legal function name, but is the same definition used
 *	in determining what variable names can be parsed as variable
 *	substitutions without the benefit of enclosing braces.  The set of
 *	ASCII chars that are accepted are the numeric chars ('0'-'9'),
................................................................................
	return 1;
    }
    if (byte < 'A' || byte > 'Z') {
	return 0;
    }
    return 1;
}














































 
/*
 *----------------------------------------------------------------------
 *
 * ParseWhiteSpace --
 *
 *	Scans up to numBytes bytes starting at src, consuming white space
................................................................................
	src++;
    } else {
	tokenPtr->type = TCL_TOKEN_TEXT;
	tokenPtr->start = src;
	tokenPtr->numComponents = 0;

	while (numBytes) {
	    if (TclIsBareword(*src)) {

		src += 1;
		numBytes -= 1;
		continue;


	    }
	    if ((src[0] == ':') && (numBytes != 1) && (src[1] == ':')) {
		src += 2;
		numBytes -= 2;
		while (numBytes && (*src == ':')) {
		    src++;
		    numBytes--;






|







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







|
>
|
|
<
>
>







624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
...
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
....
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522

1523
1524
1525
1526
1527
1528
1529
1530
1531
{
    return CHAR_TYPE(byte) & (TYPE_SPACE) || byte == '\n';
}
 
/*
 *----------------------------------------------------------------------
 *
 * TclIsBareword --
 *
 *	Report whether byte is one that can be part of a "bareword".
 *	This concept is named in expression parsing, where it determines
 *	what can be a legal function name, but is the same definition used
 *	in determining what variable names can be parsed as variable
 *	substitutions without the benefit of enclosing braces.  The set of
 *	ASCII chars that are accepted are the numeric chars ('0'-'9'),
................................................................................
	return 1;
    }
    if (byte < 'A' || byte > 'Z') {
	return 0;
    }
    return 1;
}
 
/*
 *----------------------------------------------------------------------
 *
 * TclFindBarewordEndUni --
 *
 *      Scan src to the end of a "bareword".
 *      The set of chars that are accepted is alnum (UTF-8 compatible)
 *      and underscore ('_').
 *
 * Results:
 *      Returns length of word (equal offset in bytes to position after the
 *	end of word, 0 if not found).
 *
 * Side effects:
 *      None.
 *
 *----------------------------------------------------------------------
 */

size_t
TclFindUtfBarewordEnd(
    const char *src,
    size_t numBytes)
{
    size_t p = 0, l;
    Tcl_UniChar ch;

    while (p < numBytes) {
	/* 0-9,A-Z,a-Z,_ */
	if (TclIsBareword(src[p])) {
	    p++;
	    continue;
	}
	if (!(src[p] & 0x80) || numBytes <= 1) { /* single byte or end reached */
	    break;
	}
	/* test unicode alnum (consider NTS-char) */
	l = TclUtfToUniChar(src+p, &ch);
	if (!Tcl_UniCharIsAlnum(ch) || p+l > numBytes) {
	    break;
	}
	p += l;
    }
    return p;
}
 
/*
 *----------------------------------------------------------------------
 *
 * ParseWhiteSpace --
 *
 *	Scans up to numBytes bytes starting at src, consuming white space
................................................................................
	src++;
    } else {
	tokenPtr->type = TCL_TOKEN_TEXT;
	tokenPtr->start = src;
	tokenPtr->numComponents = 0;

	while (numBytes) {
	    size_t wordlen = TclFindUtfBarewordEnd(src, numBytes);

	    src += wordlen;
	    numBytes -= wordlen;

	    if (!numBytes) {
		break;
	    }
	    if ((src[0] == ':') && (numBytes != 1) && (src[1] == ':')) {
		src += 2;
		numBytes -= 2;
		while (numBytes && (*src == ':')) {
		    src++;
		    numBytes--;