Tcl Source Code

Check-in [e2a4fef6e1]
Login
Bounty program for improvements to Tcl and certain Tcl packages.
Tcl 2019 Conference, Houston/TX, US, Nov 4-8
Send your abstracts to [email protected]
or submit via the online form by Sep 9.

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Make TclTrim* routines robust against some malformation in inputs. Better than access violations and panics!
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core-8-5-branch
Files: files | file ages | folders
SHA3-256: e2a4fef6e100cfcf1012e4e6689e826988e2106c66d1ee94b8dee27073395c59
User & Date: dgp 2018-03-14 00:03:59
Context
2018-03-14
00:53
merge 8.5 check-in: 4fd30be85b user: dgp tags: core-8-6-branch
00:45
TclTrim must write to *trimRight even when making a quick exit. check-in: 8b3e6a3ee5 user: dgp tags: core-8-5-branch
00:03
Make TclTrim* routines robust against some malformation in inputs. Better than access violations and... check-in: e2a4fef6e1 user: dgp tags: core-8-5-branch
2018-03-13
23:56
A few minor revisions. Closed-Leaf check-in: d8df7220e3 user: dgp tags: sebres-8-5-fix-trim-on-mailformed
03:15
Tidy up and comment [string replace] and its corner cases. check-in: dfe3934edb user: dgp tags: core-8-5-branch
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to generic/tclCmdMZ.c.

  3136   3136   	length2 = strlen(string2);
  3137   3137       } else {
  3138   3138   	Tcl_WrongNumArgs(interp, 1, objv, "string ?chars?");
  3139   3139   	return TCL_ERROR;
  3140   3140       }
  3141   3141       string1 = TclGetStringFromObj(objv[1], &length1);
  3142   3142   
  3143         -    triml = TclTrimLeft(string1, length1, string2, length2);
  3144         -    trimr = TclTrimRight(string1 + triml, length1 - triml, string2, length2);
         3143  +    triml = TclTrim(string1, length1, string2, length2, &trimr);
  3145   3144   
  3146   3145       Tcl_SetObjResult(interp,
  3147   3146   	    Tcl_NewStringObj(string1 + triml, length1 - triml - trimr));
  3148   3147       return TCL_OK;
  3149   3148   }
  3150   3149   
  3151   3150   /*

Changes to generic/tclInt.h.

  2743   2743   			    Tcl_Obj *patternObj, int flags);
  2744   2744   MODULE_SCOPE Tcl_Obj *	TclStringObjReverse(Tcl_Obj *objPtr);
  2745   2745   MODULE_SCOPE int	TclSubstTokens(Tcl_Interp *interp, Tcl_Token *tokenPtr,
  2746   2746   			    int count, int *tokensLeftPtr, int line,
  2747   2747   			    int *clNextOuter, CONST char *outerScript);
  2748   2748   MODULE_SCOPE void	TclTransferResult(Tcl_Interp *sourceInterp, int result,
  2749   2749   			    Tcl_Interp *targetInterp);
         2750  +MODULE_SCOPE int	TclTrim(const char *bytes, int numBytes,
         2751  +			    const char *trim, int numTrim, int *trimRight);
  2750   2752   MODULE_SCOPE int	TclTrimLeft(const char *bytes, int numBytes,
  2751   2753   			    const char *trim, int numTrim);
  2752   2754   MODULE_SCOPE int	TclTrimRight(const char *bytes, int numBytes,
  2753   2755   			    const char *trim, int numTrim);
  2754   2756   MODULE_SCOPE Tcl_Obj *	TclpNativeToNormalized(ClientData clientData);
  2755   2757   MODULE_SCOPE Tcl_Obj *	TclpFilesystemPathType(Tcl_Obj *pathPtr);
  2756   2758   MODULE_SCOPE Tcl_PackageInitProc *TclpFindSymbol(Tcl_Interp *interp,

Changes to generic/tclUtil.c.

  1495   1495       TclUtfToUniChar(buf, &ch);
  1496   1496       return (char) ch;
  1497   1497   }
  1498   1498   
  1499   1499   /*
  1500   1500    *----------------------------------------------------------------------
  1501   1501    *
         1502  + * UtfWellFormedEnd --
         1503  + *	Checks the end of utf string is malformed, if yes - wraps bytes
         1504  + *	to the given buffer (as well-formed NTS string).  The buffer
         1505  + *	argument should be initialized by the caller and ready to use.
         1506  + *
         1507  + * Results:
         1508  + *	The bytes with well-formed end of the string.
         1509  + *
         1510  + * Side effects:
         1511  + *	Buffer (DString) may be allocated, so must be released.
         1512  + *
         1513  + *----------------------------------------------------------------------
         1514  + */
         1515  +
         1516  +static inline const char*
         1517  +UtfWellFormedEnd(
         1518  +    Tcl_DString *buffer,	/* Buffer used to hold well-formed string. */
         1519  +    CONST char *bytes,		/* Pointer to the beginning of the string. */
         1520  +    int length)			/* Length of the string. */
         1521  +{
         1522  +    CONST char *l = bytes + length;
         1523  +    CONST char *p = Tcl_UtfPrev(l, bytes);
         1524  +
         1525  +    if (Tcl_UtfCharComplete(p, l - p)) {
         1526  +	return bytes;
         1527  +    }
         1528  +    /* 
         1529  +     * Malformed utf-8 end, be sure we've NTS to safe compare of end-character,
         1530  +     * avoid segfault by access violation out of range.
         1531  +     */
         1532  +    Tcl_DStringAppend(buffer, bytes, length);
         1533  +    return Tcl_DStringValue(buffer);
         1534  +}
         1535  +/*
         1536  + *----------------------------------------------------------------------
         1537  + *
  1502   1538    * TclTrimRight --
  1503         - *	Takes two counted strings in the Tcl encoding which must both be
  1504         - *	null terminated.  Conceptually trims from the right side of the
         1539  + *	Takes two counted strings in the Tcl encoding.  Conceptually
         1540  + *	finds the sub string (offset) to trim from the right side of the
  1505   1541    *	first string all characters found in the second string.
  1506   1542    *
  1507   1543    * Results:
  1508   1544    *	The number of bytes to be removed from the end of the string.
  1509   1545    *
  1510   1546    * Side effects:
  1511   1547    *	None.
  1512   1548    *
  1513   1549    *----------------------------------------------------------------------
  1514   1550    */
  1515   1551   
  1516         -int
  1517         -TclTrimRight(
         1552  +static inline int
         1553  +TrimRight(
  1518   1554       const char *bytes,	/* String to be trimmed... */
  1519   1555       int numBytes,	/* ...and its length in bytes */
  1520   1556       const char *trim,	/* String of trim characters... */
  1521   1557       int numTrim)	/* ...and its length in bytes */
  1522   1558   {
  1523   1559       const char *p = bytes + numBytes;
  1524   1560       int pInc;
  1525   1561   
  1526         -    if ((bytes[numBytes] != '\0') || (trim[numTrim] != '\0')) {
  1527         -	Tcl_Panic("TclTrimRight works only on null-terminated strings");
  1528         -    }
  1529         -
  1530         -    /* Empty strings -> nothing to do */
  1531         -    if ((numBytes == 0) || (numTrim == 0)) {
  1532         -	return 0;
  1533         -    }
  1534         -
  1535   1562       /* Outer loop: iterate over string to be trimmed */
  1536   1563       do {
  1537   1564   	Tcl_UniChar ch1;
  1538   1565   	const char *q = trim;
  1539   1566   	int bytesLeft = numTrim;
  1540   1567   
  1541   1568   	p = Tcl_UtfPrev(p, bytes);
................................................................................
  1559   1586   	    p += pInc;
  1560   1587   	    break;
  1561   1588   	}
  1562   1589       } while (p > bytes);
  1563   1590   
  1564   1591       return numBytes - (p - bytes);
  1565   1592   }
         1593  +
         1594  +int
         1595  +TclTrimRight(
         1596  +    const char *bytes,	/* String to be trimmed... */
         1597  +    int numBytes,	/* ...and its length in bytes */
         1598  +    const char *trim,	/* String of trim characters... */
         1599  +    int numTrim)	/* ...and its length in bytes */
         1600  +{
         1601  +    int res;
         1602  +    Tcl_DString bytesBuf, trimBuf;
         1603  +
         1604  +    /* Empty strings -> nothing to do */
         1605  +    if ((numBytes == 0) || (numTrim == 0)) {
         1606  +	return 0;
         1607  +    }
         1608  +
         1609  +    Tcl_DStringInit(&bytesBuf);
         1610  +    Tcl_DStringInit(&trimBuf);
         1611  +    bytes = UtfWellFormedEnd(&bytesBuf, bytes, numBytes);
         1612  +    trim = UtfWellFormedEnd(&trimBuf, trim, numTrim);
         1613  +
         1614  +    res = TrimRight(bytes, numBytes, trim, numTrim);
         1615  +    if (res > numBytes) {
         1616  +	res = numBytes;
         1617  +    }
         1618  +
         1619  +    Tcl_DStringFree(&bytesBuf);
         1620  +    Tcl_DStringFree(&trimBuf);
         1621  +
         1622  +    return res;
         1623  +}
  1566   1624   
  1567   1625   /*
  1568   1626    *----------------------------------------------------------------------
  1569   1627    *
  1570   1628    * TclTrimLeft --
  1571         - *	Takes two counted strings in the Tcl encoding which must both be
  1572         - *	null terminated.  Conceptually trims from the left side of the
         1629  + *	Takes two counted strings in the Tcl encoding.  Conceptually
         1630  + *	finds the sub string (offset) to trim from the left side of the
  1573   1631    *	first string all characters found in the second string.
  1574   1632    *
  1575   1633    * Results:
  1576   1634    *	The number of bytes to be removed from the start of the string.
  1577   1635    *
  1578   1636    * Side effects:
  1579   1637    *	None.
  1580   1638    *
  1581   1639    *----------------------------------------------------------------------
  1582   1640    */
  1583   1641   
  1584         -int
  1585         -TclTrimLeft(
         1642  +static inline int
         1643  +TrimLeft(
  1586   1644       const char *bytes,	/* String to be trimmed... */
  1587   1645       int numBytes,	/* ...and its length in bytes */
  1588   1646       const char *trim,	/* String of trim characters... */
  1589   1647       int numTrim)	/* ...and its length in bytes */
  1590   1648   {
  1591   1649       const char *p = bytes;
  1592   1650   
  1593         -    if ((bytes[numBytes] != '\0') || (trim[numTrim] != '\0')) {
  1594         -	Tcl_Panic("TclTrimLeft works only on null-terminated strings");
  1595         -    }
  1596         -
  1597         -    /* Empty strings -> nothing to do */
  1598         -    if ((numBytes == 0) || (numTrim == 0)) {
  1599         -	return 0;
  1600         -    }
  1601         -
  1602   1651       /* Outer loop: iterate over string to be trimmed */
  1603   1652       do {
  1604   1653   	Tcl_UniChar ch1;
  1605   1654   	int pInc = TclUtfToUniChar(p, &ch1);
  1606   1655   	const char *q = trim;
  1607   1656   	int bytesLeft = numTrim;
  1608   1657   
................................................................................
  1622   1671   	if (bytesLeft == 0) {
  1623   1672   	    /* No match; trim task done; *p is first non-trimmed char */
  1624   1673   	    break;
  1625   1674   	}
  1626   1675   
  1627   1676   	p += pInc;
  1628   1677   	numBytes -= pInc;
  1629         -    } while (numBytes);
         1678  +    } while (numBytes > 0);
  1630   1679   
  1631   1680       return p - bytes;
  1632   1681   }
         1682  +
         1683  +int
         1684  +TclTrimLeft(
         1685  +    const char *bytes,	/* String to be trimmed... */
         1686  +    int numBytes,	/* ...and its length in bytes */
         1687  +    const char *trim,	/* String of trim characters... */
         1688  +    int numTrim)	/* ...and its length in bytes */
         1689  +{
         1690  +    int res;
         1691  +    Tcl_DString bytesBuf, trimBuf;
         1692  +
         1693  +    /* Empty strings -> nothing to do */
         1694  +    if ((numBytes == 0) || (numTrim == 0)) {
         1695  +	return 0;
         1696  +    }
         1697  +
         1698  +    Tcl_DStringInit(&bytesBuf);
         1699  +    Tcl_DStringInit(&trimBuf);
         1700  +    bytes = UtfWellFormedEnd(&bytesBuf, bytes, numBytes);
         1701  +    trim = UtfWellFormedEnd(&trimBuf, trim, numTrim);
         1702  +
         1703  +    res = TrimLeft(bytes, numBytes, trim, numTrim);
         1704  +    if (res > numBytes) {
         1705  +	res = numBytes;
         1706  +    }
         1707  +
         1708  +    Tcl_DStringFree(&bytesBuf);
         1709  +    Tcl_DStringFree(&trimBuf);
         1710  +
         1711  +    return res;
         1712  +}
         1713  +
         1714  +/*
         1715  + *----------------------------------------------------------------------
         1716  + *
         1717  + * TclTrim --
         1718  + *	Finds the sub string (offset) to trim from both sides of the
         1719  + *	first string all characters found in the second string.
         1720  + *
         1721  + * Results:
         1722  + *	The number of bytes to be removed from the start of the string
         1723  + *
         1724  + * Side effects:
         1725  + *	None.
         1726  + *
         1727  + *----------------------------------------------------------------------
         1728  + */
         1729  +
         1730  +int
         1731  +TclTrim(
         1732  +    const char *bytes,	/* String to be trimmed... */
         1733  +    int numBytes,	/* ...and its length in bytes */
         1734  +    const char *trim,	/* String of trim characters... */
         1735  +    int numTrim,	/* ...and its length in bytes */
         1736  +    int *trimRight)		/* Offset from the end of the string. */
         1737  +{
         1738  +    int trimLeft;
         1739  +    Tcl_DString bytesBuf, trimBuf;
         1740  +
         1741  +    /* Empty strings -> nothing to do */
         1742  +    if ((numBytes == 0) || (numTrim == 0)) {
         1743  +	return 0;
         1744  +    }
         1745  +
         1746  +    Tcl_DStringInit(&bytesBuf);
         1747  +    Tcl_DStringInit(&trimBuf);
         1748  +    bytes = UtfWellFormedEnd(&bytesBuf, bytes, numBytes);
         1749  +    trim = UtfWellFormedEnd(&trimBuf, trim, numTrim);
         1750  +
         1751  +    trimLeft = TrimLeft(bytes, numBytes, trim, numTrim);
         1752  +    if (trimLeft > numBytes) {
         1753  +	trimLeft = numBytes;
         1754  +    }
         1755  +    numBytes -= trimLeft;
         1756  +    *trimRight = 0;
         1757  +    if (numBytes) {
         1758  +	bytes += trimLeft;
         1759  +	*trimRight = TrimRight(bytes, numBytes, trim, numTrim);
         1760  +	if (*trimRight > numBytes) {
         1761  +	    *trimRight = numBytes;
         1762  +	}
         1763  +    }
         1764  +
         1765  +    Tcl_DStringFree(&bytesBuf);
         1766  +    Tcl_DStringFree(&trimBuf);
         1767  +
         1768  +    return trimLeft;
         1769  +}
  1633   1770   
  1634   1771   /*
  1635   1772    *----------------------------------------------------------------------
  1636   1773    *
  1637   1774    * Tcl_Concat --
  1638   1775    *
  1639   1776    *	Concatenate a set of strings into a single large string.
................................................................................
  1683   1820   	 */
  1684   1821   	Tcl_Panic("Tcl_Concat: max size of Tcl value exceeded");
  1685   1822       }
  1686   1823       /* All element bytes + (argc - 1) spaces + 1 terminating NULL */
  1687   1824       result = (char *) ckalloc((unsigned) (bytesNeeded + argc));
  1688   1825   
  1689   1826       for (p = result, i = 0;  i < argc;  i++) {
  1690         -	int trim, elemLength;
         1827  +	int triml, trimr, elemLength;
  1691   1828   	const char *element;
  1692   1829   
  1693   1830   	element = argv[i];
  1694   1831   	elemLength = strlen(argv[i]);
  1695   1832   
  1696         -	/* Trim away the leading whitespace */
  1697         -	trim = TclTrimLeft(element, elemLength, CONCAT_WS, CONCAT_WS_SIZE);
  1698         -	element += trim;
  1699         -	elemLength -= trim;
  1700         -
  1701         -	/*
  1702         -	 * Trim away the trailing whitespace.  Do not permit trimming
  1703         -	 * to expose a final backslash character.
  1704         -	 */
  1705         -
  1706         -	trim = TclTrimRight(element, elemLength, CONCAT_WS, CONCAT_WS_SIZE);
  1707         -	trim -= trim && (element[elemLength - trim - 1] == '\\');
  1708         -	elemLength -= trim;
         1833  +	/* Trim away the leading/trailing whitespace. */
         1834  +	triml = TclTrim(element, elemLength, CONCAT_WS, CONCAT_WS_SIZE, &trimr);
         1835  +	element += triml;
         1836  +	elemLength -= triml + trimr;
         1837  +	/* Do not permit trimming to expose a final backslash character. */
         1838  +	elemLength += trimr && (element[elemLength - 1] == '\\');
  1709   1839   
  1710   1840   	/* If we're left with empty element after trimming, do nothing */
  1711   1841   	if (elemLength == 0) {
  1712   1842   	    continue;
  1713   1843   	}
  1714   1844   
  1715   1845   	/* Append to the result with space if needed */
................................................................................
  1828   1958        * string append algorithm.  When that fails it will report the error.
  1829   1959        */
  1830   1960       TclNewObj(resPtr);
  1831   1961       Tcl_AttemptSetObjLength(resPtr, bytesNeeded + objc - 1);
  1832   1962       Tcl_SetObjLength(resPtr, 0);
  1833   1963   
  1834   1964       for (i = 0;  i < objc;  i++) {
  1835         -	int trim;
         1965  +	int triml, trimr;
  1836   1966   
  1837   1967   	element = TclGetStringFromObj(objv[i], &elemLength);
  1838   1968   
  1839         -	/* Trim away the leading whitespace */
  1840         -	trim = TclTrimLeft(element, elemLength, CONCAT_WS, CONCAT_WS_SIZE);
  1841         -	element += trim;
  1842         -	elemLength -= trim;
  1843         -
  1844         -	/*
  1845         -	 * Trim away the trailing whitespace.  Do not permit trimming
  1846         -	 * to expose a final backslash character.
  1847         -	 */
  1848         -
  1849         -	trim = TclTrimRight(element, elemLength, CONCAT_WS, CONCAT_WS_SIZE);
  1850         -	trim -= trim && (element[elemLength - trim - 1] == '\\');
  1851         -	elemLength -= trim;
         1969  +	/* Trim away the leading/trailing whitespace. */
         1970  +	triml = TclTrim(element, elemLength, CONCAT_WS, CONCAT_WS_SIZE, &trimr);
         1971  +	element += triml;
         1972  +	elemLength -= triml + trimr;
         1973  +	/* Do not permit trimming to expose a final backslash character. */
         1974  +	elemLength += trimr && (element[elemLength - 1] == '\\');
  1852   1975   
  1853   1976   	/* If we're left with empty element after trimming, do nothing */
  1854   1977   	if (elemLength == 0) {
  1855   1978   	    continue;
  1856   1979   	}
  1857   1980   
  1858   1981   	/* Append to the result with space if needed */