Tcl Source Code

Check-in [e2a4fef6e1]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Make TclTrim* routines robust against some malformation in inputs. Better than access violations and panics!
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | core-8-5-branch
Files: files | file ages | folders
SHA3-256: e2a4fef6e100cfcf1012e4e6689e826988e2106c66d1ee94b8dee27073395c59
User & Date: dgp 2018-03-14 00:03:59.081
Context
2018-03-14
00:53
merge 8.5 check-in: 4fd30be85b user: dgp tags: core-8-6-branch
00:45
TclTrim must write to *trimRight even when making a quick exit. check-in: 8b3e6a3ee5 user: dgp tags: core-8-5-branch
00:03
Make TclTrim* routines robust against some malformation in inputs. Better than access violations and... check-in: e2a4fef6e1 user: dgp tags: core-8-5-branch
2018-03-13
23:56
A few minor revisions. Closed-Leaf check-in: d8df7220e3 user: dgp tags: sebres-8-5-fix-trim-on-mailformed
03:15
Tidy up and comment [string replace] and its corner cases. check-in: dfe3934edb user: dgp tags: core-8-5-branch
Changes
Unified Diff Ignore Whitespace Patch
Changes to generic/tclCmdMZ.c.
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
	length2 = strlen(string2);
    } else {
	Tcl_WrongNumArgs(interp, 1, objv, "string ?chars?");
	return TCL_ERROR;
    }
    string1 = TclGetStringFromObj(objv[1], &length1);

    triml = TclTrimLeft(string1, length1, string2, length2);
    trimr = TclTrimRight(string1 + triml, length1 - triml, string2, length2);

    Tcl_SetObjResult(interp,
	    Tcl_NewStringObj(string1 + triml, length1 - triml - trimr));
    return TCL_OK;
}

/*







|
<







3136
3137
3138
3139
3140
3141
3142
3143

3144
3145
3146
3147
3148
3149
3150
	length2 = strlen(string2);
    } else {
	Tcl_WrongNumArgs(interp, 1, objv, "string ?chars?");
	return TCL_ERROR;
    }
    string1 = TclGetStringFromObj(objv[1], &length1);

    triml = TclTrim(string1, length1, string2, length2, &trimr);


    Tcl_SetObjResult(interp,
	    Tcl_NewStringObj(string1 + triml, length1 - triml - trimr));
    return TCL_OK;
}

/*
Changes to generic/tclInt.h.
2743
2744
2745
2746
2747
2748
2749


2750
2751
2752
2753
2754
2755
2756
			    Tcl_Obj *patternObj, int flags);
MODULE_SCOPE Tcl_Obj *	TclStringObjReverse(Tcl_Obj *objPtr);
MODULE_SCOPE int	TclSubstTokens(Tcl_Interp *interp, Tcl_Token *tokenPtr,
			    int count, int *tokensLeftPtr, int line,
			    int *clNextOuter, CONST char *outerScript);
MODULE_SCOPE void	TclTransferResult(Tcl_Interp *sourceInterp, int result,
			    Tcl_Interp *targetInterp);


MODULE_SCOPE int	TclTrimLeft(const char *bytes, int numBytes,
			    const char *trim, int numTrim);
MODULE_SCOPE int	TclTrimRight(const char *bytes, int numBytes,
			    const char *trim, int numTrim);
MODULE_SCOPE Tcl_Obj *	TclpNativeToNormalized(ClientData clientData);
MODULE_SCOPE Tcl_Obj *	TclpFilesystemPathType(Tcl_Obj *pathPtr);
MODULE_SCOPE Tcl_PackageInitProc *TclpFindSymbol(Tcl_Interp *interp,







>
>







2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
			    Tcl_Obj *patternObj, int flags);
MODULE_SCOPE Tcl_Obj *	TclStringObjReverse(Tcl_Obj *objPtr);
MODULE_SCOPE int	TclSubstTokens(Tcl_Interp *interp, Tcl_Token *tokenPtr,
			    int count, int *tokensLeftPtr, int line,
			    int *clNextOuter, CONST char *outerScript);
MODULE_SCOPE void	TclTransferResult(Tcl_Interp *sourceInterp, int result,
			    Tcl_Interp *targetInterp);
MODULE_SCOPE int	TclTrim(const char *bytes, int numBytes,
			    const char *trim, int numTrim, int *trimRight);
MODULE_SCOPE int	TclTrimLeft(const char *bytes, int numBytes,
			    const char *trim, int numTrim);
MODULE_SCOPE int	TclTrimRight(const char *bytes, int numBytes,
			    const char *trim, int numTrim);
MODULE_SCOPE Tcl_Obj *	TclpNativeToNormalized(ClientData clientData);
MODULE_SCOPE Tcl_Obj *	TclpFilesystemPathType(Tcl_Obj *pathPtr);
MODULE_SCOPE Tcl_PackageInitProc *TclpFindSymbol(Tcl_Interp *interp,
Changes to generic/tclUtil.c.
1495
1496
1497
1498
1499
1500
1501




































1502
1503
1504

1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
    TclUtfToUniChar(buf, &ch);
    return (char) ch;
}

/*
 *----------------------------------------------------------------------
 *




































 * TclTrimRight --
 *	Takes two counted strings in the Tcl encoding which must both be
 *	null terminated.  Conceptually trims from the right side of the

 *	first string all characters found in the second string.
 *
 * Results:
 *	The number of bytes to be removed from the end of the string.
 *
 * Side effects:
 *	None.
 *
 *----------------------------------------------------------------------
 */

int
TclTrimRight(
    const char *bytes,	/* String to be trimmed... */
    int numBytes,	/* ...and its length in bytes */
    const char *trim,	/* String of trim characters... */
    int numTrim)	/* ...and its length in bytes */
{
    const char *p = bytes + numBytes;
    int pInc;

    if ((bytes[numBytes] != '\0') || (trim[numTrim] != '\0')) {
	Tcl_Panic("TclTrimRight works only on null-terminated strings");
    }

    /* Empty strings -> nothing to do */
    if ((numBytes == 0) || (numTrim == 0)) {
	return 0;
    }

    /* Outer loop: iterate over string to be trimmed */
    do {
	Tcl_UniChar ch1;
	const char *q = trim;
	int bytesLeft = numTrim;

	p = Tcl_UtfPrev(p, bytes);







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

|
<
>











|
|








<
<
<
<
<
<
<
<
<







1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539

1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561









1562
1563
1564
1565
1566
1567
1568
    TclUtfToUniChar(buf, &ch);
    return (char) ch;
}

/*
 *----------------------------------------------------------------------
 *
 * UtfWellFormedEnd --
 *	Checks the end of utf string is malformed, if yes - wraps bytes
 *	to the given buffer (as well-formed NTS string).  The buffer
 *	argument should be initialized by the caller and ready to use.
 *
 * Results:
 *	The bytes with well-formed end of the string.
 *
 * Side effects:
 *	Buffer (DString) may be allocated, so must be released.
 *
 *----------------------------------------------------------------------
 */

static inline const char*
UtfWellFormedEnd(
    Tcl_DString *buffer,	/* Buffer used to hold well-formed string. */
    CONST char *bytes,		/* Pointer to the beginning of the string. */
    int length)			/* Length of the string. */
{
    CONST char *l = bytes + length;
    CONST char *p = Tcl_UtfPrev(l, bytes);

    if (Tcl_UtfCharComplete(p, l - p)) {
	return bytes;
    }
    /* 
     * Malformed utf-8 end, be sure we've NTS to safe compare of end-character,
     * avoid segfault by access violation out of range.
     */
    Tcl_DStringAppend(buffer, bytes, length);
    return Tcl_DStringValue(buffer);
}
/*
 *----------------------------------------------------------------------
 *
 * TclTrimRight --
 *	Takes two counted strings in the Tcl encoding.  Conceptually

 *	finds the sub string (offset) to trim from the right side of the
 *	first string all characters found in the second string.
 *
 * Results:
 *	The number of bytes to be removed from the end of the string.
 *
 * Side effects:
 *	None.
 *
 *----------------------------------------------------------------------
 */

static inline int
TrimRight(
    const char *bytes,	/* String to be trimmed... */
    int numBytes,	/* ...and its length in bytes */
    const char *trim,	/* String of trim characters... */
    int numTrim)	/* ...and its length in bytes */
{
    const char *p = bytes + numBytes;
    int pInc;










    /* Outer loop: iterate over string to be trimmed */
    do {
	Tcl_UniChar ch1;
	const char *q = trim;
	int bytesLeft = numTrim;

	p = Tcl_UtfPrev(p, bytes);
1559
1560
1561
1562
1563
1564
1565































1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
	    p += pInc;
	    break;
	}
    } while (p > bytes);

    return numBytes - (p - bytes);
}
































/*
 *----------------------------------------------------------------------
 *
 * TclTrimLeft --
 *	Takes two counted strings in the Tcl encoding which must both be
 *	null terminated.  Conceptually trims from the left side of the
 *	first string all characters found in the second string.
 *
 * Results:
 *	The number of bytes to be removed from the start of the string.
 *
 * Side effects:
 *	None.
 *
 *----------------------------------------------------------------------
 */

int
TclTrimLeft(
    const char *bytes,	/* String to be trimmed... */
    int numBytes,	/* ...and its length in bytes */
    const char *trim,	/* String of trim characters... */
    int numTrim)	/* ...and its length in bytes */
{
    const char *p = bytes;

    if ((bytes[numBytes] != '\0') || (trim[numTrim] != '\0')) {
	Tcl_Panic("TclTrimLeft works only on null-terminated strings");
    }

    /* Empty strings -> nothing to do */
    if ((numBytes == 0) || (numTrim == 0)) {
	return 0;
    }

    /* Outer loop: iterate over string to be trimmed */
    do {
	Tcl_UniChar ch1;
	int pInc = TclUtfToUniChar(p, &ch1);
	const char *q = trim;
	int bytesLeft = numTrim;








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>





|
|











|
|







<
<
<
<
<
<
<
<
<







1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650









1651
1652
1653
1654
1655
1656
1657
	    p += pInc;
	    break;
	}
    } while (p > bytes);

    return numBytes - (p - bytes);
}

int
TclTrimRight(
    const char *bytes,	/* String to be trimmed... */
    int numBytes,	/* ...and its length in bytes */
    const char *trim,	/* String of trim characters... */
    int numTrim)	/* ...and its length in bytes */
{
    int res;
    Tcl_DString bytesBuf, trimBuf;

    /* Empty strings -> nothing to do */
    if ((numBytes == 0) || (numTrim == 0)) {
	return 0;
    }

    Tcl_DStringInit(&bytesBuf);
    Tcl_DStringInit(&trimBuf);
    bytes = UtfWellFormedEnd(&bytesBuf, bytes, numBytes);
    trim = UtfWellFormedEnd(&trimBuf, trim, numTrim);

    res = TrimRight(bytes, numBytes, trim, numTrim);
    if (res > numBytes) {
	res = numBytes;
    }

    Tcl_DStringFree(&bytesBuf);
    Tcl_DStringFree(&trimBuf);

    return res;
}

/*
 *----------------------------------------------------------------------
 *
 * TclTrimLeft --
 *	Takes two counted strings in the Tcl encoding.  Conceptually
 *	finds the sub string (offset) to trim from the left side of the
 *	first string all characters found in the second string.
 *
 * Results:
 *	The number of bytes to be removed from the start of the string.
 *
 * Side effects:
 *	None.
 *
 *----------------------------------------------------------------------
 */

static inline int
TrimLeft(
    const char *bytes,	/* String to be trimmed... */
    int numBytes,	/* ...and its length in bytes */
    const char *trim,	/* String of trim characters... */
    int numTrim)	/* ...and its length in bytes */
{
    const char *p = bytes;










    /* Outer loop: iterate over string to be trimmed */
    do {
	Tcl_UniChar ch1;
	int pInc = TclUtfToUniChar(p, &ch1);
	const char *q = trim;
	int bytesLeft = numTrim;

1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
























































































1633
1634
1635
1636
1637
1638
1639
	if (bytesLeft == 0) {
	    /* No match; trim task done; *p is first non-trimmed char */
	    break;
	}

	p += pInc;
	numBytes -= pInc;
    } while (numBytes);

    return p - bytes;
}

























































































/*
 *----------------------------------------------------------------------
 *
 * Tcl_Concat --
 *
 *	Concatenate a set of strings into a single large string.







|



>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
	if (bytesLeft == 0) {
	    /* No match; trim task done; *p is first non-trimmed char */
	    break;
	}

	p += pInc;
	numBytes -= pInc;
    } while (numBytes > 0);

    return p - bytes;
}

int
TclTrimLeft(
    const char *bytes,	/* String to be trimmed... */
    int numBytes,	/* ...and its length in bytes */
    const char *trim,	/* String of trim characters... */
    int numTrim)	/* ...and its length in bytes */
{
    int res;
    Tcl_DString bytesBuf, trimBuf;

    /* Empty strings -> nothing to do */
    if ((numBytes == 0) || (numTrim == 0)) {
	return 0;
    }

    Tcl_DStringInit(&bytesBuf);
    Tcl_DStringInit(&trimBuf);
    bytes = UtfWellFormedEnd(&bytesBuf, bytes, numBytes);
    trim = UtfWellFormedEnd(&trimBuf, trim, numTrim);

    res = TrimLeft(bytes, numBytes, trim, numTrim);
    if (res > numBytes) {
	res = numBytes;
    }

    Tcl_DStringFree(&bytesBuf);
    Tcl_DStringFree(&trimBuf);

    return res;
}

/*
 *----------------------------------------------------------------------
 *
 * TclTrim --
 *	Finds the sub string (offset) to trim from both sides of the
 *	first string all characters found in the second string.
 *
 * Results:
 *	The number of bytes to be removed from the start of the string
 *
 * Side effects:
 *	None.
 *
 *----------------------------------------------------------------------
 */

int
TclTrim(
    const char *bytes,	/* String to be trimmed... */
    int numBytes,	/* ...and its length in bytes */
    const char *trim,	/* String of trim characters... */
    int numTrim,	/* ...and its length in bytes */
    int *trimRight)		/* Offset from the end of the string. */
{
    int trimLeft;
    Tcl_DString bytesBuf, trimBuf;

    /* Empty strings -> nothing to do */
    if ((numBytes == 0) || (numTrim == 0)) {
	return 0;
    }

    Tcl_DStringInit(&bytesBuf);
    Tcl_DStringInit(&trimBuf);
    bytes = UtfWellFormedEnd(&bytesBuf, bytes, numBytes);
    trim = UtfWellFormedEnd(&trimBuf, trim, numTrim);

    trimLeft = TrimLeft(bytes, numBytes, trim, numTrim);
    if (trimLeft > numBytes) {
	trimLeft = numBytes;
    }
    numBytes -= trimLeft;
    *trimRight = 0;
    if (numBytes) {
	bytes += trimLeft;
	*trimRight = TrimRight(bytes, numBytes, trim, numTrim);
	if (*trimRight > numBytes) {
	    *trimRight = numBytes;
	}
    }

    Tcl_DStringFree(&bytesBuf);
    Tcl_DStringFree(&trimBuf);

    return trimLeft;
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_Concat --
 *
 *	Concatenate a set of strings into a single large string.
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
	 */
	Tcl_Panic("Tcl_Concat: max size of Tcl value exceeded");
    }
    /* All element bytes + (argc - 1) spaces + 1 terminating NULL */
    result = (char *) ckalloc((unsigned) (bytesNeeded + argc));

    for (p = result, i = 0;  i < argc;  i++) {
	int trim, elemLength;
	const char *element;

	element = argv[i];
	elemLength = strlen(argv[i]);

	/* Trim away the leading whitespace */
	trim = TclTrimLeft(element, elemLength, CONCAT_WS, CONCAT_WS_SIZE);
	element += trim;
	elemLength -= trim;

	/*
	 * Trim away the trailing whitespace.  Do not permit trimming
	 * to expose a final backslash character.
	 */

	trim = TclTrimRight(element, elemLength, CONCAT_WS, CONCAT_WS_SIZE);
	trim -= trim && (element[elemLength - trim - 1] == '\\');
	elemLength -= trim;

	/* If we're left with empty element after trimming, do nothing */
	if (elemLength == 0) {
	    continue;
	}

	/* Append to the result with space if needed */







|





|
|
|
|
<
<
<
|
<
<
<
|
<







1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836



1837



1838

1839
1840
1841
1842
1843
1844
1845
	 */
	Tcl_Panic("Tcl_Concat: max size of Tcl value exceeded");
    }
    /* All element bytes + (argc - 1) spaces + 1 terminating NULL */
    result = (char *) ckalloc((unsigned) (bytesNeeded + argc));

    for (p = result, i = 0;  i < argc;  i++) {
	int triml, trimr, elemLength;
	const char *element;

	element = argv[i];
	elemLength = strlen(argv[i]);

	/* Trim away the leading/trailing whitespace. */
	triml = TclTrim(element, elemLength, CONCAT_WS, CONCAT_WS_SIZE, &trimr);
	element += triml;
	elemLength -= triml + trimr;



	/* Do not permit trimming to expose a final backslash character. */



	elemLength += trimr && (element[elemLength - 1] == '\\');


	/* If we're left with empty element after trimming, do nothing */
	if (elemLength == 0) {
	    continue;
	}

	/* Append to the result with space if needed */
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
     * string append algorithm.  When that fails it will report the error.
     */
    TclNewObj(resPtr);
    Tcl_AttemptSetObjLength(resPtr, bytesNeeded + objc - 1);
    Tcl_SetObjLength(resPtr, 0);

    for (i = 0;  i < objc;  i++) {
	int trim;

	element = TclGetStringFromObj(objv[i], &elemLength);

	/* Trim away the leading whitespace */
	trim = TclTrimLeft(element, elemLength, CONCAT_WS, CONCAT_WS_SIZE);
	element += trim;
	elemLength -= trim;

	/*
	 * Trim away the trailing whitespace.  Do not permit trimming
	 * to expose a final backslash character.
	 */

	trim = TclTrimRight(element, elemLength, CONCAT_WS, CONCAT_WS_SIZE);
	trim -= trim && (element[elemLength - trim - 1] == '\\');
	elemLength -= trim;

	/* If we're left with empty element after trimming, do nothing */
	if (elemLength == 0) {
	    continue;
	}

	/* Append to the result with space if needed */







|



|
|
|
|
<
<
<
|
<
<
<
|
<







1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972



1973



1974

1975
1976
1977
1978
1979
1980
1981
     * string append algorithm.  When that fails it will report the error.
     */
    TclNewObj(resPtr);
    Tcl_AttemptSetObjLength(resPtr, bytesNeeded + objc - 1);
    Tcl_SetObjLength(resPtr, 0);

    for (i = 0;  i < objc;  i++) {
	int triml, trimr;

	element = TclGetStringFromObj(objv[i], &elemLength);

	/* Trim away the leading/trailing whitespace. */
	triml = TclTrim(element, elemLength, CONCAT_WS, CONCAT_WS_SIZE, &trimr);
	element += triml;
	elemLength -= triml + trimr;



	/* Do not permit trimming to expose a final backslash character. */



	elemLength += trimr && (element[elemLength - 1] == '\\');


	/* If we're left with empty element after trimming, do nothing */
	if (elemLength == 0) {
	    continue;
	}

	/* Append to the result with space if needed */