Tcl Source Code

Check-in [b3969d0fab]
Login
Bounty program for improvements to Tcl and certain Tcl packages.
Tcl 2019 Conference, Houston/TX, US, Nov 4-8
Send your abstracts to [email protected]
or submit via the online form by Sep 9.

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:remove unexpected panic by TclTrimLeft/TclTrimRight, handling rewritten: - instead of check the NTS-byte after string (may cause segfault by access violation if out of range), it checks now the end-character is well-formed utf8-sequence; - both work well now on non-NTS strings (without null character at end); - additionally fixed wrong offsets (trim-length if last char malformed in case of out of range); new function TclTrim introduced (as combination of TclTrimLeft/TclTrimRight).
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | sebres-8-5-fix-trim-on-mailformed
Files: files | file ages | folders
SHA3-256: b3969d0fab1963600a55cfd357e0563969e4fa5c4df415c87b9a69bbbe1a142e
User & Date: sebres 2018-03-13 18:52:45
References
2018-03-13
18:59 Ticket [de243b694d] Several trim mechanisms cause segfault / wrong check for NTS status still Open with 4 other changes artifact: 3b8e06c0e7 user: sebres
Context
2018-03-13
23:56
A few minor revisions. Closed-Leaf check-in: d8df7220e3 user: dgp tags: sebres-8-5-fix-trim-on-mailformed
18:52
remove unexpected panic by TclTrimLeft/TclTrimRight, handling rewritten: - instead of check the NTS-... check-in: b3969d0fab user: sebres tags: sebres-8-5-fix-trim-on-mailformed
2018-03-05
17:37
fixes [1873ea0ee4f01b26]: wrong initialization of std-channels, if no std-handles available at all (... check-in: 041bc74867 user: sebres tags: core-8-5-branch
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to generic/tclCmdMZ.c.

3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
	length2 = strlen(string2);
    } else {
	Tcl_WrongNumArgs(interp, 1, objv, "string ?chars?");
	return TCL_ERROR;
    }
    string1 = TclGetStringFromObj(objv[1], &length1);

    triml = TclTrimLeft(string1, length1, string2, length2);
    trimr = TclTrimRight(string1 + triml, length1 - triml, string2, length2);

    Tcl_SetObjResult(interp,
	    Tcl_NewStringObj(string1 + triml, length1 - triml - trimr));
    return TCL_OK;
}
 
/*






|
<







3123
3124
3125
3126
3127
3128
3129
3130

3131
3132
3133
3134
3135
3136
3137
	length2 = strlen(string2);
    } else {
	Tcl_WrongNumArgs(interp, 1, objv, "string ?chars?");
	return TCL_ERROR;
    }
    string1 = TclGetStringFromObj(objv[1], &length1);

    triml = TclTrim(string1, length1, string2, length2, &trimr);


    Tcl_SetObjResult(interp,
	    Tcl_NewStringObj(string1 + triml, length1 - triml - trimr));
    return TCL_OK;
}
 
/*

Changes to generic/tclInt.h.

2743
2744
2745
2746
2747
2748
2749


2750
2751
2752
2753
2754
2755
2756
			    Tcl_Obj *patternObj, int flags);
MODULE_SCOPE Tcl_Obj *	TclStringObjReverse(Tcl_Obj *objPtr);
MODULE_SCOPE int	TclSubstTokens(Tcl_Interp *interp, Tcl_Token *tokenPtr,
			    int count, int *tokensLeftPtr, int line,
			    int *clNextOuter, CONST char *outerScript);
MODULE_SCOPE void	TclTransferResult(Tcl_Interp *sourceInterp, int result,
			    Tcl_Interp *targetInterp);


MODULE_SCOPE int	TclTrimLeft(const char *bytes, int numBytes,
			    const char *trim, int numTrim);
MODULE_SCOPE int	TclTrimRight(const char *bytes, int numBytes,
			    const char *trim, int numTrim);
MODULE_SCOPE Tcl_Obj *	TclpNativeToNormalized(ClientData clientData);
MODULE_SCOPE Tcl_Obj *	TclpFilesystemPathType(Tcl_Obj *pathPtr);
MODULE_SCOPE Tcl_PackageInitProc *TclpFindSymbol(Tcl_Interp *interp,






>
>







2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
			    Tcl_Obj *patternObj, int flags);
MODULE_SCOPE Tcl_Obj *	TclStringObjReverse(Tcl_Obj *objPtr);
MODULE_SCOPE int	TclSubstTokens(Tcl_Interp *interp, Tcl_Token *tokenPtr,
			    int count, int *tokensLeftPtr, int line,
			    int *clNextOuter, CONST char *outerScript);
MODULE_SCOPE void	TclTransferResult(Tcl_Interp *sourceInterp, int result,
			    Tcl_Interp *targetInterp);
MODULE_SCOPE int	TclTrim(const char *bytes, int numBytes,
			    const char *trim, int numTrim, int *trimRight);
MODULE_SCOPE int	TclTrimLeft(const char *bytes, int numBytes,
			    const char *trim, int numTrim);
MODULE_SCOPE int	TclTrimRight(const char *bytes, int numBytes,
			    const char *trim, int numTrim);
MODULE_SCOPE Tcl_Obj *	TclpNativeToNormalized(ClientData clientData);
MODULE_SCOPE Tcl_Obj *	TclpFilesystemPathType(Tcl_Obj *pathPtr);
MODULE_SCOPE Tcl_PackageInitProc *TclpFindSymbol(Tcl_Interp *interp,

Changes to generic/tclUtil.c.

1495
1496
1497
1498
1499
1500
1501





































1502
1503
1504

1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
....
1559
1560
1561
1562
1563
1564
1565





























1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
....
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632




















































































1633
1634
1635
1636
1637
1638
1639
....
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
....
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
    TclUtfToUniChar(buf, &ch);
    return (char) ch;
}
 
/*
 *----------------------------------------------------------------------
 *





































 * TclTrimRight --
 *	Takes two counted strings in the Tcl encoding which must both be
 *	null terminated.  Conceptually trims from the right side of the

 *	first string all characters found in the second string.
 *
 * Results:
 *	The number of bytes to be removed from the end of the string.
 *
 * Side effects:
 *	None.
 *
 *----------------------------------------------------------------------
 */

int
TclTrimRight(
    const char *bytes,	/* String to be trimmed... */
    int numBytes,	/* ...and its length in bytes */
    const char *trim,	/* String of trim characters... */
    int numTrim)	/* ...and its length in bytes */
{
    const char *p = bytes + numBytes;
    int pInc;

    if ((bytes[numBytes] != '\0') || (trim[numTrim] != '\0')) {
	Tcl_Panic("TclTrimRight works only on null-terminated strings");
    }

    /* Empty strings -> nothing to do */
    if ((numBytes == 0) || (numTrim == 0)) {
	return 0;
    }

    /* Outer loop: iterate over string to be trimmed */
    do {
	Tcl_UniChar ch1;
	const char *q = trim;
	int bytesLeft = numTrim;

	p = Tcl_UtfPrev(p, bytes);
................................................................................
	    p += pInc;
	    break;
	}
    } while (p > bytes);

    return numBytes - (p - bytes);
}





























 
/*
 *----------------------------------------------------------------------
 *
 * TclTrimLeft --
 *	Takes two counted strings in the Tcl encoding which must both be
 *	null terminated.  Conceptually trims from the left side of the
 *	first string all characters found in the second string.
 *
 * Results:
 *	The number of bytes to be removed from the start of the string.
 *
 * Side effects:
 *	None.
 *
 *----------------------------------------------------------------------
 */

int
TclTrimLeft(
    const char *bytes,	/* String to be trimmed... */
    int numBytes,	/* ...and its length in bytes */
    const char *trim,	/* String of trim characters... */
    int numTrim)	/* ...and its length in bytes */
{
    const char *p = bytes;

    if ((bytes[numBytes] != '\0') || (trim[numTrim] != '\0')) {
	Tcl_Panic("TclTrimLeft works only on null-terminated strings");
    }

    /* Empty strings -> nothing to do */
    if ((numBytes == 0) || (numTrim == 0)) {
	return 0;
    }

    /* Outer loop: iterate over string to be trimmed */
    do {
	Tcl_UniChar ch1;
	int pInc = TclUtfToUniChar(p, &ch1);
	const char *q = trim;
	int bytesLeft = numTrim;

................................................................................
	if (bytesLeft == 0) {
	    /* No match; trim task done; *p is first non-trimmed char */
	    break;
	}

	p += pInc;
	numBytes -= pInc;
    } while (numBytes);

    return p - bytes;
}




















































































 
/*
 *----------------------------------------------------------------------
 *
 * Tcl_Concat --
 *
 *	Concatenate a set of strings into a single large string.
................................................................................
	 */
	Tcl_Panic("Tcl_Concat: max size of Tcl value exceeded");
    }
    /* All element bytes + (argc - 1) spaces + 1 terminating NULL */
    result = (char *) ckalloc((unsigned) (bytesNeeded + argc));

    for (p = result, i = 0;  i < argc;  i++) {
	int trim, elemLength;
	const char *element;

	element = argv[i];
	elemLength = strlen(argv[i]);

	/* Trim away the leading whitespace */
	trim = TclTrimLeft(element, elemLength, CONCAT_WS, CONCAT_WS_SIZE);
	element += trim;
	elemLength -= trim;

	/*
	 * Trim away the trailing whitespace.  Do not permit trimming
	 * to expose a final backslash character.
	 */

	trim = TclTrimRight(element, elemLength, CONCAT_WS, CONCAT_WS_SIZE);
	trim -= trim && (element[elemLength - trim - 1] == '\\');
	elemLength -= trim;

	/* If we're left with empty element after trimming, do nothing */
	if (elemLength == 0) {
	    continue;
	}

	/* Append to the result with space if needed */
................................................................................
     * string append algorithm.  When that fails it will report the error.
     */
    TclNewObj(resPtr);
    Tcl_AttemptSetObjLength(resPtr, bytesNeeded + objc - 1);
    Tcl_SetObjLength(resPtr, 0);

    for (i = 0;  i < objc;  i++) {
	int trim;

	element = TclGetStringFromObj(objv[i], &elemLength);

	/* Trim away the leading whitespace */
	trim = TclTrimLeft(element, elemLength, CONCAT_WS, CONCAT_WS_SIZE);
	element += trim;
	elemLength -= trim;

	/*
	 * Trim away the trailing whitespace.  Do not permit trimming
	 * to expose a final backslash character.
	 */

	trim = TclTrimRight(element, elemLength, CONCAT_WS, CONCAT_WS_SIZE);
	trim -= trim && (element[elemLength - trim - 1] == '\\');
	elemLength -= trim;

	/* If we're left with empty element after trimming, do nothing */
	if (elemLength == 0) {
	    continue;
	}

	/* Append to the result with space if needed */






>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

|
<
>











|
|








<
<
<
<
<
<
<
<
<







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>





|
|











|
|







<
<
<
<
<
<
<
<
<







 







|



>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







|





|
|
|
|
<
<
<
|
<
<
<
|
<







 







|



|
|
|
|
<
<
<
|
<
<
<
|
<







1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540

1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562









1563
1564
1565
1566
1567
1568
1569
....
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649









1650
1651
1652
1653
1654
1655
1656
....
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
....
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831



1832



1833

1834
1835
1836
1837
1838
1839
1840
....
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967



1968



1969

1970
1971
1972
1973
1974
1975
1976
    TclUtfToUniChar(buf, &ch);
    return (char) ch;
}
 
/*
 *----------------------------------------------------------------------
 *
 * TclUtfWellFormedEnd --
 *	Checks the end of utf string is malformed, if yes - wraps bytes
 *	to the given buffer (as well-formed NTS string).
 *
 * Results:
 *	The bytes with well-formed end of the string.
 *
 * Side effects:
 *	Buffer (DString) may be allocated, so must be released.
 *
 *----------------------------------------------------------------------
 */

static inline const char*
TclUtfWellFormedEnd(
    Tcl_DString *buffer,	/* Buffer used to hold well-formed string. */
    CONST char *bytes,		/* Pointer to the beginning of the string. */
    int length)			/* Length of the string. */
{
    CONST char *l = bytes + length;
    CONST char *p = Tcl_UtfPrev(l, bytes);

    if (Tcl_UtfCharComplete(p, l - p)) {
	buffer->string = buffer->staticSpace;
	return bytes;
    }
    /* 
     * Malformed utf-8 end, be sure we've NTS to safe compare of end-character,
     * avoid segfault by access violation out of range.
     */
    Tcl_DStringInit(buffer);
    Tcl_DStringAppend(buffer, bytes, length);
    return Tcl_DStringValue(buffer);
}
/*
 *----------------------------------------------------------------------
 *
 * TclTrimRight --
 *	Takes two counted strings in the Tcl encoding.  Conceptually

 *	finds the sub string (offset) to trim from the right side of the
 *	first string all characters found in the second string.
 *
 * Results:
 *	The number of bytes to be removed from the end of the string.
 *
 * Side effects:
 *	None.
 *
 *----------------------------------------------------------------------
 */

static inline int
TrimRight(
    const char *bytes,	/* String to be trimmed... */
    int numBytes,	/* ...and its length in bytes */
    const char *trim,	/* String of trim characters... */
    int numTrim)	/* ...and its length in bytes */
{
    const char *p = bytes + numBytes;
    int pInc;










    /* Outer loop: iterate over string to be trimmed */
    do {
	Tcl_UniChar ch1;
	const char *q = trim;
	int bytesLeft = numTrim;

	p = Tcl_UtfPrev(p, bytes);
................................................................................
	    p += pInc;
	    break;
	}
    } while (p > bytes);

    return numBytes - (p - bytes);
}

int
TclTrimRight(
    const char *bytes,	/* String to be trimmed... */
    int numBytes,	/* ...and its length in bytes */
    const char *trim,	/* String of trim characters... */
    int numTrim)	/* ...and its length in bytes */
{
    int res;
    Tcl_DString bytesBuf, trimBuf;

    /* Empty strings -> nothing to do */
    if ((numBytes == 0) || (numTrim == 0)) {
	return 0;
    }

    bytes = TclUtfWellFormedEnd(&bytesBuf, bytes, numBytes);
    trim = TclUtfWellFormedEnd(&trimBuf, trim, numTrim);

    res = TrimRight(bytes, numBytes, trim, numTrim);
    if (res > numBytes) {
	res = numBytes;
    }

    Tcl_DStringFree(&bytesBuf);
    Tcl_DStringFree(&trimBuf);

    return res;
}
 
/*
 *----------------------------------------------------------------------
 *
 * TclTrimLeft --
 *	Takes two counted strings in the Tcl encoding.  Conceptually
 *	finds the sub string (offset) to trim from the left side of the
 *	first string all characters found in the second string.
 *
 * Results:
 *	The number of bytes to be removed from the start of the string.
 *
 * Side effects:
 *	None.
 *
 *----------------------------------------------------------------------
 */

static inline int
TrimLeft(
    const char *bytes,	/* String to be trimmed... */
    int numBytes,	/* ...and its length in bytes */
    const char *trim,	/* String of trim characters... */
    int numTrim)	/* ...and its length in bytes */
{
    const char *p = bytes;










    /* Outer loop: iterate over string to be trimmed */
    do {
	Tcl_UniChar ch1;
	int pInc = TclUtfToUniChar(p, &ch1);
	const char *q = trim;
	int bytesLeft = numTrim;

................................................................................
	if (bytesLeft == 0) {
	    /* No match; trim task done; *p is first non-trimmed char */
	    break;
	}

	p += pInc;
	numBytes -= pInc;
    } while (numBytes > 0);

    return p - bytes;
}

int
TclTrimLeft(
    const char *bytes,	/* String to be trimmed... */
    int numBytes,	/* ...and its length in bytes */
    const char *trim,	/* String of trim characters... */
    int numTrim)	/* ...and its length in bytes */
{
    int res;
    Tcl_DString bytesBuf, trimBuf;

    /* Empty strings -> nothing to do */
    if ((numBytes == 0) || (numTrim == 0)) {
	return 0;
    }

    bytes = TclUtfWellFormedEnd(&bytesBuf, bytes, numBytes);
    trim = TclUtfWellFormedEnd(&trimBuf, trim, numTrim);

    res = TrimLeft(bytes, numBytes, trim, numTrim);
    if (res > numBytes) {
	res = numBytes;
    }

    Tcl_DStringFree(&bytesBuf);
    Tcl_DStringFree(&trimBuf);

    return res;
}
 
/*
 *----------------------------------------------------------------------
 *
 * TclTrim --
 *	Finds the sub string (offset) to trim from both sides of the
 *	first string all characters found in the second string.
 *
 * Results:
 *	The number of bytes to be removed from the start of the string
 *
 * Side effects:
 *	None.
 *
 *----------------------------------------------------------------------
 */

int
TclTrim(
    const char *bytes,	/* String to be trimmed... */
    int numBytes,	/* ...and its length in bytes */
    const char *trim,	/* String of trim characters... */
    int numTrim,	/* ...and its length in bytes */
    int *trimRight)		/* Offset from the end of the string. */
{
    int trimLeft;
    Tcl_DString bytesBuf, trimBuf;

    /* Empty strings -> nothing to do */
    if ((numBytes == 0) || (numTrim == 0)) {
	return 0;
    }

    bytes = TclUtfWellFormedEnd(&bytesBuf, bytes, numBytes);
    trim = TclUtfWellFormedEnd(&trimBuf, trim, numTrim);

    trimLeft = TrimLeft(bytes, numBytes, trim, numTrim);
    if (trimLeft > numBytes) {
	trimLeft = numBytes;
    }
    numBytes -= trimLeft;
    *trimRight = 0;
    if (numBytes) {
	bytes += trimLeft;
	*trimRight = TrimRight(bytes, numBytes, trim, numTrim);
	if (*trimRight > numBytes) {
	    *trimRight = numBytes;
	}
    }

    Tcl_DStringFree(&bytesBuf);
    Tcl_DStringFree(&trimBuf);

    return trimLeft;
}
 
/*
 *----------------------------------------------------------------------
 *
 * Tcl_Concat --
 *
 *	Concatenate a set of strings into a single large string.
................................................................................
	 */
	Tcl_Panic("Tcl_Concat: max size of Tcl value exceeded");
    }
    /* All element bytes + (argc - 1) spaces + 1 terminating NULL */
    result = (char *) ckalloc((unsigned) (bytesNeeded + argc));

    for (p = result, i = 0;  i < argc;  i++) {
	int triml, trimr, elemLength;
	const char *element;

	element = argv[i];
	elemLength = strlen(argv[i]);

	/* Trim away the leading/trailing whitespace. */
	triml = TclTrim(element, elemLength, CONCAT_WS, CONCAT_WS_SIZE, &trimr);
	element += triml;
	elemLength -= triml + trimr;



	/* Do not permit trimming to expose a final backslash character. */



	elemLength += trimr && (element[elemLength - 1] == '\\');


	/* If we're left with empty element after trimming, do nothing */
	if (elemLength == 0) {
	    continue;
	}

	/* Append to the result with space if needed */
................................................................................
     * string append algorithm.  When that fails it will report the error.
     */
    TclNewObj(resPtr);
    Tcl_AttemptSetObjLength(resPtr, bytesNeeded + objc - 1);
    Tcl_SetObjLength(resPtr, 0);

    for (i = 0;  i < objc;  i++) {
	int triml, trimr;

	element = TclGetStringFromObj(objv[i], &elemLength);

	/* Trim away the leading/trailing whitespace. */
	triml = TclTrim(element, elemLength, CONCAT_WS, CONCAT_WS_SIZE, &trimr);
	element += triml;
	elemLength -= triml + trimr;



	/* Do not permit trimming to expose a final backslash character. */



	elemLength += trimr && (element[elemLength - 1] == '\\');


	/* If we're left with empty element after trimming, do nothing */
	if (elemLength == 0) {
	    continue;
	}

	/* Append to the result with space if needed */