Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Eliminate use of tclUniData.c |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | tip-726-plus |
Files: | files | file ages | folders |
SHA3-256: |
1dcc7678c8ba8d1782e84092a7508bd2 |
User & Date: | apnadkarni 2025-07-25 17:46:15.343 |
Context
2025-07-25
| ||
17:55 | Remove unicode toXXXX commands as no longer needed for confirming equivalence with string toXXXX check-in: 30a02828fc user: apnadkarni tags: tip-726-plus | |
17:46 | Eliminate use of tclUniData.c check-in: 1dcc7678c8 user: apnadkarni tags: tip-726-plus | |
17:34 | Implement unicode toXXXX congruent to string toXXXX check-in: 369ac8bf18 user: apnadkarni tags: tip-726-plus, tip-726-tclunidata-equivalence-proof | |
Changes
Changes to generic/tclCmdMZ.c.
︙ | ︙ | |||
5952 5953 5954 5955 5956 5957 5958 | } string1 = TclGetStringFromObj(objv[1], &length1); if (objc == 2) { Tcl_Obj *resultPtr = Tcl_NewStringObj(string1, length1); | | | 5952 5953 5954 5955 5956 5957 5958 5959 5960 5961 5962 5963 5964 5965 5966 | } string1 = TclGetStringFromObj(objv[1], &length1); if (objc == 2) { Tcl_Obj *resultPtr = Tcl_NewStringObj(string1, length1); length1 = Tcl_UtfToUpper(TclGetString(resultPtr)); Tcl_SetObjLength(resultPtr, length1); Tcl_SetObjResult(interp, resultPtr); } else { Tcl_Size first, last; const char *start, *end; Tcl_Obj *resultPtr; |
︙ | ︙ | |||
5988 5989 5990 5991 5992 5993 5994 | string1 = TclGetStringFromObj(objv[1], &length1); start = Tcl_UtfAtIndex(string1, first); end = Tcl_UtfAtIndex(start, last - first + 1); resultPtr = Tcl_NewStringObj(string1, end - string1); string2 = TclGetString(resultPtr) + (start - string1); | | | 5988 5989 5990 5991 5992 5993 5994 5995 5996 5997 5998 5999 6000 6001 6002 | string1 = TclGetStringFromObj(objv[1], &length1); start = Tcl_UtfAtIndex(string1, first); end = Tcl_UtfAtIndex(start, last - first + 1); resultPtr = Tcl_NewStringObj(string1, end - string1); string2 = TclGetString(resultPtr) + (start - string1); length2 = Tcl_UtfToUpper(string2); Tcl_SetObjLength(resultPtr, length2 + (start - string1)); Tcl_AppendToObj(resultPtr, end, -1); Tcl_SetObjResult(interp, resultPtr); } return TCL_OK; |
︙ | ︙ | |||
6019 6020 6021 6022 6023 6024 6025 | } string1 = TclGetStringFromObj(objv[1], &length1); if (objc == 2) { Tcl_Obj *resultPtr = Tcl_NewStringObj(string1, length1); | | | 6019 6020 6021 6022 6023 6024 6025 6026 6027 6028 6029 6030 6031 6032 6033 | } string1 = TclGetStringFromObj(objv[1], &length1); if (objc == 2) { Tcl_Obj *resultPtr = Tcl_NewStringObj(string1, length1); length1 = Tcl_UtfToLower(TclGetString(resultPtr)); Tcl_SetObjLength(resultPtr, length1); Tcl_SetObjResult(interp, resultPtr); } else { Tcl_Size first, last; const char *start, *end; Tcl_Obj *resultPtr; |
︙ | ︙ | |||
6055 6056 6057 6058 6059 6060 6061 | string1 = TclGetStringFromObj(objv[1], &length1); start = Tcl_UtfAtIndex(string1, first); end = Tcl_UtfAtIndex(start, last - first + 1); resultPtr = Tcl_NewStringObj(string1, end - string1); string2 = TclGetString(resultPtr) + (start - string1); | | | 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 | string1 = TclGetStringFromObj(objv[1], &length1); start = Tcl_UtfAtIndex(string1, first); end = Tcl_UtfAtIndex(start, last - first + 1); resultPtr = Tcl_NewStringObj(string1, end - string1); string2 = TclGetString(resultPtr) + (start - string1); length2 = Tcl_UtfToLower(string2); Tcl_SetObjLength(resultPtr, length2 + (start - string1)); Tcl_AppendToObj(resultPtr, end, -1); Tcl_SetObjResult(interp, resultPtr); } return TCL_OK; |
︙ | ︙ | |||
6087 6088 6089 6090 6091 6092 6093 | } string1 = TclGetStringFromObj(objv[1], &length1); if (objc == 2) { Tcl_Obj *resultPtr = Tcl_NewStringObj(string1, length1); | | | 6087 6088 6089 6090 6091 6092 6093 6094 6095 6096 6097 6098 6099 6100 6101 | } string1 = TclGetStringFromObj(objv[1], &length1); if (objc == 2) { Tcl_Obj *resultPtr = Tcl_NewStringObj(string1, length1); length1 = Tcl_UtfToTitle(TclGetString(resultPtr)); Tcl_SetObjLength(resultPtr, length1); Tcl_SetObjResult(interp, resultPtr); } else { Tcl_Size first, last; const char *start, *end; Tcl_Obj *resultPtr; |
︙ | ︙ | |||
6123 6124 6125 6126 6127 6128 6129 | string1 = TclGetStringFromObj(objv[1], &length1); start = Tcl_UtfAtIndex(string1, first); end = Tcl_UtfAtIndex(start, last - first + 1); resultPtr = Tcl_NewStringObj(string1, end - string1); string2 = TclGetString(resultPtr) + (start - string1); | | | 6123 6124 6125 6126 6127 6128 6129 6130 6131 6132 6133 6134 6135 6136 6137 | string1 = TclGetStringFromObj(objv[1], &length1); start = Tcl_UtfAtIndex(string1, first); end = Tcl_UtfAtIndex(start, last - first + 1); resultPtr = Tcl_NewStringObj(string1, end - string1); string2 = TclGetString(resultPtr) + (start - string1); length2 = Tcl_UtfToTitle(string2); Tcl_SetObjLength(resultPtr, length2 + (start - string1)); Tcl_AppendToObj(resultPtr, end, -1); Tcl_SetObjResult(interp, resultPtr); } return TCL_OK; |
︙ | ︙ |
Changes to generic/tclUtf.c.
︙ | ︙ | |||
12 13 14 15 16 17 18 | #include "tclInt.h" #include "../utf8proc/utf8proc.h" /* * Include the static character classification tables and macros. */ | | > > > > > > | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 | #include "tclInt.h" #include "../utf8proc/utf8proc.h" /* * Include the static character classification tables and macros. */ #ifndef UNICODE_OUT_OF_RANGE # if TCL_UTF_MAX > 3 || TCL_MAJOR_VERSION > 8 || TCL_MINOR_VERSION > 6 # define UNICODE_OUT_OF_RANGE(ch) (((ch) & 0x1FFFFF) >= 0x323C0) # else # define UNICODE_OUT_OF_RANGE(ch) (((ch) & 0x1F0000) != 0) # endif #endif /* * The following masks are used for fast character category tests. */ enum Utf8ProcCharacterCategoryMasks { UTF8PROC_ALPHA_BITS = (1 << UTF8PROC_CATEGORY_LU) | (1 << UTF8PROC_CATEGORY_LL) | (1 << UTF8PROC_CATEGORY_LT) | (1 << UTF8PROC_CATEGORY_LM) | (1 << UTF8PROC_CATEGORY_LO), UTF8PROC_CONTROL_BITS = |
︙ | ︙ | |||
1382 1383 1384 1385 1386 1387 1388 | dst += Tcl_UniCharToUtf(upChar, dst); } src += len; } *dst = '\0'; return (dst - str); } | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 | dst += Tcl_UniCharToUtf(upChar, dst); } src += len; } *dst = '\0'; return (dst - str); } /* *---------------------------------------------------------------------- * * Tcl_UtfToLower -- * * Convert uppercase characters to lowercase characters in a UTF string |
︙ | ︙ | |||
1452 1453 1454 1455 1456 1457 1458 | */ src = dst = str; while (*src) { len = TclUtfToUniChar(src, &ch); lowChar = Tcl_UniCharToLower(ch); | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 | */ src = dst = str; while (*src) { len = TclUtfToUniChar(src, &ch); lowChar = Tcl_UniCharToLower(ch); /* * To keep badly formed Utf strings from getting inflated by the * conversion (thereby causing a segfault), only copy the lower case * char to dst if its size is <= the original char. */ if (len < TclUtfCount(lowChar)) { |
︙ | ︙ | |||
1559 1560 1561 1562 1563 1564 1565 | len = TclUtfToUniChar(src, &ch); lowChar = ch; /* Special exception for Georgian Asomtavruli chars, no titlecase. */ if ((unsigned)(lowChar - 0x1C90) >= 0x30) { lowChar = Tcl_UniCharToLower(lowChar); } | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 | len = TclUtfToUniChar(src, &ch); lowChar = ch; /* Special exception for Georgian Asomtavruli chars, no titlecase. */ if ((unsigned)(lowChar - 0x1C90) >= 0x30) { lowChar = Tcl_UniCharToLower(lowChar); } if (len < TclUtfCount(lowChar)) { memmove(dst, src, len); dst += len; } else { dst += Tcl_UniCharToUtf(lowChar, dst); } src += len; |
︙ | ︙ | |||
1925 1926 1927 1928 1929 1930 1931 | *---------------------------------------------------------------------- */ int Tcl_UniCharToUpper( int ch) /* Unicode character to convert. */ { | < < < < < < < < < < < < < < | 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 | *---------------------------------------------------------------------- */ int Tcl_UniCharToUpper( int ch) /* Unicode character to convert. */ { if (!UNICODE_OUT_OF_RANGE(ch)) { ch = utf8proc_toupper(ch); } /* Clear away extension bits, if any */ return ch & 0x1FFFFF; } |
︙ | ︙ | |||
1966 1967 1968 1969 1970 1971 1972 | *---------------------------------------------------------------------- */ int Tcl_UniCharToLower( int ch) /* Unicode character to convert. */ { | < < < < < < < < < < < < < < < | 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 | *---------------------------------------------------------------------- */ int Tcl_UniCharToLower( int ch) /* Unicode character to convert. */ { if (!UNICODE_OUT_OF_RANGE(ch)) { ch = utf8proc_tolower(ch); } /* Clear away extension bits, if any */ return ch & 0x1FFFFF; } |
︙ | ︙ | |||
2006 2007 2008 2009 2010 2011 2012 | * None. * *---------------------------------------------------------------------- */ int Tcl_UniCharToTitle( | < < < < < < < < < < < < < < < < < < < < < < < | 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 | * None. * *---------------------------------------------------------------------- */ int Tcl_UniCharToTitle( int ch) /* Unicode character to convert. */ { if (!UNICODE_OUT_OF_RANGE(ch)) { ch = utf8proc_totitle(ch); } /* Clear away extension bits, if any */ return ch & 0x1FFFFF; |
︙ | ︙ | |||
2199 2200 2201 2202 2203 2204 2205 | *---------------------------------------------------------------------- */ int Tcl_UniCharIsAlnum( int ch) /* Unicode character to test. */ { | < < < < < < < < < | 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 | *---------------------------------------------------------------------- */ int Tcl_UniCharIsAlnum( int ch) /* Unicode character to test. */ { return ((1 << utf8proc_category(ch)) & (UTF8PROC_ALPHA_BITS|UTF8PROC_DIGIT_BITS)) != 0; } /* *---------------------------------------------------------------------- * * Tcl_UniCharIsAlpha -- |
︙ | ︙ | |||
2229 2230 2231 2232 2233 2234 2235 | * None. * *---------------------------------------------------------------------- */ int Tcl_UniCharIsAlpha( | < < < < < < < < < | 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 | * None. * *---------------------------------------------------------------------- */ int Tcl_UniCharIsAlpha( int ch) /* Unicode character to test. */ { return ((1 << utf8proc_category(ch)) & UTF8PROC_ALPHA_BITS) != 0; } /* *---------------------------------------------------------------------- |
︙ | ︙ | |||
2263 2264 2265 2266 2267 2268 2269 | *---------------------------------------------------------------------- */ int Tcl_UniCharIsControl( int ch) /* Unicode character to test. */ { | < < < < < < < < < < < | 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 | *---------------------------------------------------------------------- */ int Tcl_UniCharIsControl( int ch) /* Unicode character to test. */ { return ((1 << utf8proc_category(ch)) & UTF8PROC_CONTROL_BITS) != 0; } /* *---------------------------------------------------------------------- * * Tcl_UniCharIsDigit -- |
︙ | ︙ | |||
2297 2298 2299 2300 2301 2302 2303 | *---------------------------------------------------------------------- */ int Tcl_UniCharIsDigit( int ch) /* Unicode character to test. */ { | < < < < < < < < < | 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 | *---------------------------------------------------------------------- */ int Tcl_UniCharIsDigit( int ch) /* Unicode character to test. */ { return (utf8proc_category(ch) == UTF8PROC_CATEGORY_ND); } /* *---------------------------------------------------------------------- * * Tcl_UniCharIsGraph -- |
︙ | ︙ | |||
2327 2328 2329 2330 2331 2332 2333 | * None. * *---------------------------------------------------------------------- */ int Tcl_UniCharIsGraph( | < < < < < < < < < | 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 | * None. * *---------------------------------------------------------------------- */ int Tcl_UniCharIsGraph( int ch) /* Unicode character to test. */ { return ((1 << utf8proc_category(ch)) & UTF8PROC_GRAPH_BITS) != 0; } /* *---------------------------------------------------------------------- |
︙ | ︙ | |||
2361 2362 2363 2364 2365 2366 2367 | *---------------------------------------------------------------------- */ int Tcl_UniCharIsLower( int ch) /* Unicode character to test. */ { | < < < < < < < < < | 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 | *---------------------------------------------------------------------- */ int Tcl_UniCharIsLower( int ch) /* Unicode character to test. */ { return (utf8proc_category(ch) == UTF8PROC_CATEGORY_LL); } /* *---------------------------------------------------------------------- * * Tcl_UniCharIsPrint -- |
︙ | ︙ | |||
2391 2392 2393 2394 2395 2396 2397 | * None. * *---------------------------------------------------------------------- */ int Tcl_UniCharIsPrint( | < < < < < < < < < | 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 | * None. * *---------------------------------------------------------------------- */ int Tcl_UniCharIsPrint( int ch) /* Unicode character to test. */ { return ((1 << utf8proc_category(ch)) & (UTF8PROC_SPACE_BITS|UTF8PROC_GRAPH_BITS)) != 0; } /* *---------------------------------------------------------------------- |
︙ | ︙ | |||
2425 2426 2427 2428 2429 2430 2431 | *---------------------------------------------------------------------- */ int Tcl_UniCharIsPunct( int ch) /* Unicode character to test. */ { | < < < < < < < < < | 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 | *---------------------------------------------------------------------- */ int Tcl_UniCharIsPunct( int ch) /* Unicode character to test. */ { return ((1 << utf8proc_category(ch)) & UTF8PROC_PUNCT_BITS) != 0; } /* *---------------------------------------------------------------------- * * Tcl_UniCharIsSpace -- |
︙ | ︙ | |||
2455 2456 2457 2458 2459 2460 2461 | * None. * *---------------------------------------------------------------------- */ int Tcl_UniCharIsSpace( | < < < < < < < < < < < < < < < < < < < < < < < | 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 | * None. * *---------------------------------------------------------------------- */ int Tcl_UniCharIsSpace( int ch) /* Unicode character to test. */ { /* Ignore upper 11 bits. */ ch &= 0x1FFFFF; /* * If the character is within the first 127 characters, just use the |
︙ | ︙ | |||
2520 2521 2522 2523 2524 2525 2526 | *---------------------------------------------------------------------- */ int Tcl_UniCharIsUpper( int ch) /* Unicode character to test. */ { | < < < < < < < < < | 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 | *---------------------------------------------------------------------- */ int Tcl_UniCharIsUpper( int ch) /* Unicode character to test. */ { return (utf8proc_category(ch) == UTF8PROC_CATEGORY_LU); } /* *---------------------------------------------------------------------- * * Tcl_UniCharIsWordChar -- |
︙ | ︙ | |||
2550 2551 2552 2553 2554 2555 2556 | * None. * *---------------------------------------------------------------------- */ int Tcl_UniCharIsWordChar( | < < < < < < < < < | 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 | * None. * *---------------------------------------------------------------------- */ int Tcl_UniCharIsWordChar( int ch) /* Unicode character to test. */ { return ((1 << utf8proc_category(ch)) & UTF8PROC_WORD_BITS) != 0; } /* *---------------------------------------------------------------------- |
︙ | ︙ |