Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Implement unicode toXXXX congruent to string toXXXX |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | tip-726-plus | tip-726-tclunidata-equivalence-proof |
Files: | files | file ages | folders |
SHA3-256: |
369ac8bf188d604a633ddfb9b14e384c |
User & Date: | apnadkarni 2025-07-25 17:34:27.996 |
Context
2025-07-25
| ||
17:46 | Eliminate use of tclUniData.c check-in: 1dcc7678c8 user: apnadkarni tags: tip-726-plus | |
17:34 | Implement unicode toXXXX congruent to string toXXXX check-in: 369ac8bf18 user: apnadkarni tags: tip-726-plus, tip-726-tclunidata-equivalence-proof | |
16:05 | Disable tests for bug 1ecea011 check-in: 13c2631e68 user: apnadkarni tags: tip-726-plus | |
Changes
Changes to generic/tclCmdMZ.c.
︙ | ︙ | |||
5505 5506 5507 5508 5509 5510 5511 | /* * When entering here, result == 1 and failat == 0. */ switch (index) { case STR_IS_ALNUM: | | | | | | 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 | /* * When entering here, result == 1 and failat == 0. */ switch (index) { case STR_IS_ALNUM: int Tcl_UniCharIsAlnum(int ch); chcomp = Tcl_UniCharIsAlnum; break; case STR_IS_ALPHA: int Tcl_UniCharIsAlpha(int ch); chcomp = Tcl_UniCharIsAlpha; break; case STR_IS_ASCII: chcomp = UniCharIsAscii; break; case STR_IS_BOOL: case STR_IS_TRUE: case STR_IS_FALSE: |
︙ | ︙ | |||
5532 5533 5534 5535 5536 5537 5538 | } } else if ((objPtr->internalRep.wideValue != 0) ? (index == STR_IS_FALSE) : (index == STR_IS_TRUE)) { result = 0; } break; case STR_IS_CONTROL: | | | | 5532 5533 5534 5535 5536 5537 5538 5539 5540 5541 5542 5543 5544 5545 5546 5547 | } } else if ((objPtr->internalRep.wideValue != 0) ? (index == STR_IS_FALSE) : (index == STR_IS_TRUE)) { result = 0; } break; case STR_IS_CONTROL: int Tcl_UniCharIsControl(int ch); chcomp = Tcl_UniCharIsControl; break; case STR_IS_DICT: { int dresult; Tcl_Size dsize; dresult = Tcl_DictObjSize(interp, objPtr, &dsize); Tcl_ResetResult(interp); |
︙ | ︙ | |||
5585 5586 5587 5588 5589 5590 5591 | break; } } } break; } case STR_IS_DIGIT: | | | | 5585 5586 5587 5588 5589 5590 5591 5592 5593 5594 5595 5596 5597 5598 5599 5600 | break; } } } break; } case STR_IS_DIGIT: int Tcl_UniCharIsDigit(int ch); chcomp = Tcl_UniCharIsDigit; break; case STR_IS_DOUBLE: { if (TclHasInternalRep(objPtr, &tclDoubleType) || TclHasInternalRep(objPtr, &tclIntType) || TclHasInternalRep(objPtr, &tclBignumType)) { break; |
︙ | ︙ | |||
5617 5618 5619 5620 5621 5622 5623 | result = 0; TclFreeInternalRep(objPtr); } } break; } case STR_IS_GRAPH: | | | | 5617 5618 5619 5620 5621 5622 5623 5624 5625 5626 5627 5628 5629 5630 5631 5632 | result = 0; TclFreeInternalRep(objPtr); } } break; } case STR_IS_GRAPH: int Tcl_UniCharIsGraph(int ch); chcomp = Tcl_UniCharIsGraph; break; case STR_IS_INT: case STR_IS_ENTIER: if (TclHasInternalRep(objPtr, &tclIntType) || TclHasInternalRep(objPtr, &tclBignumType)) { break; } |
︙ | ︙ | |||
5769 5770 5771 5772 5773 5774 5775 | break; } } } result = 0; break; case STR_IS_LOWER: | | | | | | | | | | | | | | 5769 5770 5771 5772 5773 5774 5775 5776 5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803 5804 | break; } } } result = 0; break; case STR_IS_LOWER: int Tcl_UniCharIsLower(int ch); chcomp = Tcl_UniCharIsLower; break; case STR_IS_PRINT: int Tcl_UniCharIsPrint(int ch); chcomp = Tcl_UniCharIsPrint; break; case STR_IS_PUNCT: int Tcl_UniCharIsPunct(int ch); chcomp = Tcl_UniCharIsPunct; break; case STR_IS_SPACE: int Tcl_UniCharIsSpace(int ch); chcomp = Tcl_UniCharIsSpace; break; case STR_IS_UPPER: int Tcl_UniCharIsUpper(int ch); chcomp = Tcl_UniCharIsUpper; break; case STR_IS_WORD: int Tcl_UniCharIsWordChar(int ch); chcomp = Tcl_UniCharIsWordChar; break; case STR_IS_XDIGIT: chcomp = UniCharIsHexDigit; break; default: TCL_UNREACHABLE(); } |
︙ | ︙ | |||
5873 5874 5875 5876 5877 5878 5879 | length2 = TclUtfToUniChar(string, &ucs4); Tcl_ListObjAppendElement(interp, categoriesObj, Tcl_NewStringObj(utf8proc_category_string(ucs4), -1)); } Tcl_SetObjResult(interp, categoriesObj); return TCL_OK; } | | | 5873 5874 5875 5876 5877 5878 5879 5880 5881 5882 5883 5884 5885 5886 5887 | length2 = TclUtfToUniChar(string, &ucs4); Tcl_ListObjAppendElement(interp, categoriesObj, Tcl_NewStringObj(utf8proc_category_string(ucs4), -1)); } Tcl_SetObjResult(interp, categoriesObj); return TCL_OK; } /* * TclUnicodeNormalizeCmd -- * * This procedure implements the "unicode tonfc|tonfd|tonfkc|tonfkd" * commands. See the user documentation for details on what it does. * * Results: |
︙ | ︙ | |||
5930 5931 5932 5933 5934 5935 5936 5937 5938 5939 5940 5941 5942 5943 5944 5945 5946 5947 5948 5949 5950 5951 5952 5953 5954 5955 5956 5957 5958 | &ds) == NULL) { return TCL_ERROR; } Tcl_DStringResult(interp, &ds); return TCL_OK; } /* * TclInitUnicodeCmd -- * * This procedure creates the "unicode" Tcl ensemble command. See user * documentation for details on implemented commands. * * Results: * A standard Tcl result. * * Side effects: * Stores the result in the interpreter result. */ Tcl_Command TclInitUnicodeCmd( Tcl_Interp *interp) { static const EnsembleImplMap unicodeImplMap[] = { {"tonfc", TclUnicodeNormalizeCmd, NULL, NULL, (void *)TCL_NFC, 0}, {"tonfd", TclUnicodeNormalizeCmd, NULL, NULL, (void *)TCL_NFD, 0}, {"tonfkc", TclUnicodeNormalizeCmd, NULL, NULL, (void *)TCL_NFKC, 0}, {"tonfkd", TclUnicodeNormalizeCmd, NULL, NULL, (void *)TCL_NFKD, 0}, | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | | | 5930 5931 5932 5933 5934 5935 5936 5937 5938 5939 5940 5941 5942 5943 5944 5945 5946 5947 5948 5949 5950 5951 5952 5953 5954 5955 5956 5957 5958 5959 5960 5961 5962 5963 5964 5965 5966 5967 5968 5969 5970 5971 5972 5973 5974 5975 5976 5977 5978 5979 5980 5981 5982 5983 5984 5985 5986 5987 5988 5989 5990 5991 5992 5993 5994 5995 5996 5997 5998 5999 6000 6001 6002 6003 6004 6005 6006 6007 6008 6009 6010 6011 6012 6013 6014 6015 6016 6017 6018 6019 6020 6021 6022 6023 6024 6025 6026 6027 6028 6029 6030 6031 6032 6033 6034 6035 6036 6037 6038 6039 6040 6041 6042 6043 6044 6045 6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 6079 6080 6081 6082 6083 6084 6085 6086 6087 6088 6089 6090 6091 6092 6093 6094 6095 6096 6097 6098 6099 6100 6101 6102 6103 6104 6105 6106 6107 6108 6109 6110 6111 6112 6113 6114 6115 6116 6117 6118 6119 6120 6121 6122 6123 6124 6125 6126 6127 6128 6129 6130 6131 6132 6133 6134 6135 6136 6137 6138 6139 6140 6141 6142 6143 6144 6145 6146 6147 6148 6149 6150 6151 6152 6153 6154 6155 6156 6157 6158 6159 6160 6161 6162 6163 6164 6165 6166 6167 6168 6169 6170 6171 6172 6173 6174 6175 6176 6177 6178 | &ds) == NULL) { return TCL_ERROR; } Tcl_DStringResult(interp, &ds); return TCL_OK; } static int TclUnicodeToUpperCmd( TCL_UNUSED(void *), Tcl_Interp *interp, /* Current interpreter. */ int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ { Tcl_Size length1, length2; const char *string1; char *string2; if (objc < 2 || objc > 4) { Tcl_WrongNumArgs(interp, 1, objv, "string ?first? ?last?"); return TCL_ERROR; } string1 = TclGetStringFromObj(objv[1], &length1); if (objc == 2) { Tcl_Obj *resultPtr = Tcl_NewStringObj(string1, length1); length1 = Tcl_Utf8procUtfToUpper(TclGetString(resultPtr)); Tcl_SetObjLength(resultPtr, length1); Tcl_SetObjResult(interp, resultPtr); } else { Tcl_Size first, last; const char *start, *end; Tcl_Obj *resultPtr; length1 = Tcl_NumUtfChars(string1, length1) - 1; if (TclGetIntForIndexM(interp,objv[2],length1, &first) != TCL_OK) { return TCL_ERROR; } if (first < 0) { first = 0; } last = first; if ((objc == 4) && (TclGetIntForIndexM(interp, objv[3], length1, &last) != TCL_OK)) { return TCL_ERROR; } if (last >= length1) { last = length1; } if (last < first) { Tcl_SetObjResult(interp, objv[1]); return TCL_OK; } string1 = TclGetStringFromObj(objv[1], &length1); start = Tcl_UtfAtIndex(string1, first); end = Tcl_UtfAtIndex(start, last - first + 1); resultPtr = Tcl_NewStringObj(string1, end - string1); string2 = TclGetString(resultPtr) + (start - string1); length2 = Tcl_Utf8procUtfToUpper(string2); Tcl_SetObjLength(resultPtr, length2 + (start - string1)); Tcl_AppendToObj(resultPtr, end, -1); Tcl_SetObjResult(interp, resultPtr); } return TCL_OK; } static int TclUnicodeToLowerCmd( TCL_UNUSED(void *), Tcl_Interp *interp, /* Current interpreter. */ int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ { Tcl_Size length1, length2; const char *string1; char *string2; if (objc < 2 || objc > 4) { Tcl_WrongNumArgs(interp, 1, objv, "string ?first? ?last?"); return TCL_ERROR; } string1 = TclGetStringFromObj(objv[1], &length1); if (objc == 2) { Tcl_Obj *resultPtr = Tcl_NewStringObj(string1, length1); length1 = Tcl_Utf8procUtfToLower(TclGetString(resultPtr)); Tcl_SetObjLength(resultPtr, length1); Tcl_SetObjResult(interp, resultPtr); } else { Tcl_Size first, last; const char *start, *end; Tcl_Obj *resultPtr; length1 = Tcl_NumUtfChars(string1, length1) - 1; if (TclGetIntForIndexM(interp,objv[2],length1, &first) != TCL_OK) { return TCL_ERROR; } if (first < 0) { first = 0; } last = first; if ((objc == 4) && (TclGetIntForIndexM(interp, objv[3], length1, &last) != TCL_OK)) { return TCL_ERROR; } if (last >= length1) { last = length1; } if (last < first) { Tcl_SetObjResult(interp, objv[1]); return TCL_OK; } string1 = TclGetStringFromObj(objv[1], &length1); start = Tcl_UtfAtIndex(string1, first); end = Tcl_UtfAtIndex(start, last - first + 1); resultPtr = Tcl_NewStringObj(string1, end - string1); string2 = TclGetString(resultPtr) + (start - string1); length2 = Tcl_Utf8procUtfToLower(string2); Tcl_SetObjLength(resultPtr, length2 + (start - string1)); Tcl_AppendToObj(resultPtr, end, -1); Tcl_SetObjResult(interp, resultPtr); } return TCL_OK; } static int TclUnicodeToTitleCmd( TCL_UNUSED(void *), Tcl_Interp *interp, /* Current interpreter. */ int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ { Tcl_Size length1, length2; const char *string1; char *string2; Tcl_Size Tcl_Utf8procToTitle(char *str); if (objc < 2 || objc > 4) { Tcl_WrongNumArgs(interp, 1, objv, "string ?first? ?last?"); return TCL_ERROR; } string1 = TclGetStringFromObj(objv[1], &length1); if (objc == 2) { Tcl_Obj *resultPtr = Tcl_NewStringObj(string1, length1); length1 = Tcl_Utf8procUtfToTitle(TclGetString(resultPtr)); Tcl_SetObjLength(resultPtr, length1); Tcl_SetObjResult(interp, resultPtr); } else { Tcl_Size first, last; const char *start, *end; Tcl_Obj *resultPtr; length1 = Tcl_NumUtfChars(string1, length1) - 1; if (TclGetIntForIndexM(interp,objv[2],length1, &first) != TCL_OK) { return TCL_ERROR; } if (first < 0) { first = 0; } last = first; if ((objc == 4) && (TclGetIntForIndexM(interp, objv[3], length1, &last) != TCL_OK)) { return TCL_ERROR; } if (last >= length1) { last = length1; } if (last < first) { Tcl_SetObjResult(interp, objv[1]); return TCL_OK; } string1 = TclGetStringFromObj(objv[1], &length1); start = Tcl_UtfAtIndex(string1, first); end = Tcl_UtfAtIndex(start, last - first + 1); resultPtr = Tcl_NewStringObj(string1, end - string1); string2 = TclGetString(resultPtr) + (start - string1); length2 = Tcl_Utf8procUtfToTitle(string2); Tcl_SetObjLength(resultPtr, length2 + (start - string1)); Tcl_AppendToObj(resultPtr, end, -1); Tcl_SetObjResult(interp, resultPtr); } return TCL_OK; } /* * TclInitUnicodeCmd -- * * This procedure creates the "unicode" Tcl ensemble command. See user * documentation for details on implemented commands. * * Results: * A standard Tcl result. * * Side effects: * Stores the result in the interpreter result. */ Tcl_Command TclInitUnicodeCmd( Tcl_Interp *interp) { static const EnsembleImplMap unicodeImplMap[] = { {"is", TclUnicodeIsCmd, NULL, NULL, NULL, 0}, {"category", TclUnicodeCategoryCmd, NULL, NULL, NULL, 0}, {"tolower", TclUnicodeToLowerCmd, NULL, NULL, NULL, 0}, {"tonfc", TclUnicodeNormalizeCmd, NULL, NULL, (void *)TCL_NFC, 0}, {"tonfd", TclUnicodeNormalizeCmd, NULL, NULL, (void *)TCL_NFD, 0}, {"tonfkc", TclUnicodeNormalizeCmd, NULL, NULL, (void *)TCL_NFKC, 0}, {"tonfkd", TclUnicodeNormalizeCmd, NULL, NULL, (void *)TCL_NFKD, 0}, {"totitle", TclUnicodeToTitleCmd, NULL, NULL, NULL, 0}, {"toupper", TclUnicodeToUpperCmd, NULL, NULL, NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0} }; return TclMakeEnsemble(interp, "unicode", unicodeImplMap); } /* * Local Variables: * mode: c * c-basic-offset: 4 * fill-column: 78 * End: */ |
Changes to generic/tclUtf.c.
︙ | ︙ | |||
1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 | dst += Tcl_UniCharToUtf(upChar, dst); } src += len; } *dst = '\0'; return (dst - str); } /* *---------------------------------------------------------------------- * * Tcl_UtfToLower -- * * Convert uppercase characters to lowercase characters in a UTF string | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 | dst += Tcl_UniCharToUtf(upChar, dst); } src += len; } *dst = '\0'; return (dst - str); } Tcl_Size Tcl_Utf8procUtfToUpper( char *str) /* String to convert in place. */ { int ch, upChar; char *src, *dst; Tcl_Size len; /* * Iterate over the string until we hit the terminating null. */ src = dst = str; while (*src) { len = TclUtfToUniChar(src, &ch); upChar = Tcl_Utf8procUniCharToUpper(ch); /* * To keep badly formed Utf strings from getting inflated by the * conversion (thereby causing a segfault), only copy the upper case * char to dst if its size is <= the original char. */ if (len < TclUtfCount(upChar)) { memmove(dst, src, len); dst += len; } else { dst += Tcl_UniCharToUtf(upChar, dst); } src += len; } *dst = '\0'; return (dst - str); } /* *---------------------------------------------------------------------- * * Tcl_UtfToLower -- * * Convert uppercase characters to lowercase characters in a UTF string |
︙ | ︙ | |||
1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 | */ src = dst = str; while (*src) { len = TclUtfToUniChar(src, &ch); lowChar = Tcl_UniCharToLower(ch); /* * To keep badly formed Utf strings from getting inflated by the * conversion (thereby causing a segfault), only copy the lower case * char to dst if its size is <= the original char. */ if (len < TclUtfCount(lowChar)) { | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 | */ src = dst = str; while (*src) { len = TclUtfToUniChar(src, &ch); lowChar = Tcl_UniCharToLower(ch); /* * To keep badly formed Utf strings from getting inflated by the * conversion (thereby causing a segfault), only copy the lower case * char to dst if its size is <= the original char. */ if (len < TclUtfCount(lowChar)) { memmove(dst, src, len); dst += len; } else { dst += Tcl_UniCharToUtf(lowChar, dst); } src += len; } *dst = '\0'; return (dst - str); } Tcl_Size Tcl_Utf8procUtfToLower( char *str) /* String to convert in place. */ { int ch, lowChar; char *src, *dst; Tcl_Size len; /* * Iterate over the string until we hit the terminating null. */ src = dst = str; while (*src) { len = TclUtfToUniChar(src, &ch); lowChar = Tcl_Utf8procUniCharToLower(ch); /* * To keep badly formed Utf strings from getting inflated by the * conversion (thereby causing a segfault), only copy the lower case * char to dst if its size is <= the original char. */ if (len < TclUtfCount(lowChar)) { |
︙ | ︙ | |||
1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 | while (*src) { len = TclUtfToUniChar(src, &ch); lowChar = ch; /* Special exception for Georgian Asomtavruli chars, no titlecase. */ if ((unsigned)(lowChar - 0x1C90) >= 0x30) { lowChar = Tcl_UniCharToLower(lowChar); } if (len < TclUtfCount(lowChar)) { memmove(dst, src, len); dst += len; } else { dst += Tcl_UniCharToUtf(lowChar, dst); } | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 | while (*src) { len = TclUtfToUniChar(src, &ch); lowChar = ch; /* Special exception for Georgian Asomtavruli chars, no titlecase. */ if ((unsigned)(lowChar - 0x1C90) >= 0x30) { lowChar = Tcl_UniCharToLower(lowChar); } if (len < TclUtfCount(lowChar)) { memmove(dst, src, len); dst += len; } else { dst += Tcl_UniCharToUtf(lowChar, dst); } src += len; } *dst = '\0'; return (dst - str); } Tcl_Size Tcl_Utf8procUtfToTitle( char *str) /* String to convert in place. */ { int ch, titleChar, lowChar; char *src, *dst; Tcl_Size len; /* * Capitalize the first character and then lowercase the rest of the * characters until we get to a null. */ src = dst = str; if (*src) { len = TclUtfToUniChar(src, &ch); titleChar = Tcl_Utf8procUniCharToTitle(ch); if (len < TclUtfCount(titleChar)) { memmove(dst, src, len); dst += len; } else { dst += Tcl_UniCharToUtf(titleChar, dst); } src += len; } while (*src) { len = TclUtfToUniChar(src, &ch); lowChar = ch; /* Special exception for Georgian Asomtavruli chars, no titlecase. */ if ((unsigned)(lowChar - 0x1C90) >= 0x30) { lowChar = Tcl_Utf8procUniCharToLower(lowChar); } if (len < TclUtfCount(lowChar)) { memmove(dst, src, len); dst += len; } else { dst += Tcl_UniCharToUtf(lowChar, dst); } |
︙ | ︙ | |||
1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 | if (GetCaseType(info) & 0x04) { ch -= GetDelta(info); } } /* Clear away extension bits, if any */ return ch & 0x1FFFFF; } /* *---------------------------------------------------------------------- * * Tcl_UniCharToLower -- * * Compute the lowercase equivalent of the given Unicode character. | > > > > > > > > > > | 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 | if (GetCaseType(info) & 0x04) { ch -= GetDelta(info); } } /* Clear away extension bits, if any */ return ch & 0x1FFFFF; } int Tcl_Utf8procUniCharToUpper( int ch) /* Unicode character to convert. */ { if (!UNICODE_OUT_OF_RANGE(ch)) { ch = utf8proc_toupper(ch); } /* Clear away extension bits, if any */ return ch & 0x1FFFFF; } /* *---------------------------------------------------------------------- * * Tcl_UniCharToLower -- * * Compute the lowercase equivalent of the given Unicode character. |
︙ | ︙ | |||
1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 | if (!UNICODE_OUT_OF_RANGE(ch)) { int info = GetUniCharInfo(ch); int mode = GetCaseType(info); if ((mode & 0x02) && (mode != 0x7)) { ch += GetDelta(info); } } /* Clear away extension bits, if any */ return ch & 0x1FFFFF; } /* *---------------------------------------------------------------------- | > > > > > > > > > > | 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 | if (!UNICODE_OUT_OF_RANGE(ch)) { int info = GetUniCharInfo(ch); int mode = GetCaseType(info); if ((mode & 0x02) && (mode != 0x7)) { ch += GetDelta(info); } } /* Clear away extension bits, if any */ return ch & 0x1FFFFF; } int Tcl_Utf8procUniCharToLower( int ch) /* Unicode character to convert. */ { if (!UNICODE_OUT_OF_RANGE(ch)) { ch = utf8proc_tolower(ch); } /* Clear away extension bits, if any */ return ch & 0x1FFFFF; } /* *---------------------------------------------------------------------- |
︙ | ︙ | |||
1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 | if (mode != 0x7) { ch += ((mode & 0x4) ? -1 : 1); } } else if (mode == 0x4) { ch -= GetDelta(info); } } /* Clear away extension bits, if any */ return ch & 0x1FFFFF; } /* *---------------------------------------------------------------------- | > > > > > > > > > > | 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 | if (mode != 0x7) { ch += ((mode & 0x4) ? -1 : 1); } } else if (mode == 0x4) { ch -= GetDelta(info); } } /* Clear away extension bits, if any */ return ch & 0x1FFFFF; } int Tcl_Utf8procUniCharToTitle( int ch) /* Unicode character to convert. */ { if (!UNICODE_OUT_OF_RANGE(ch)) { ch = utf8proc_totitle(ch); } /* Clear away extension bits, if any */ return ch & 0x1FFFFF; } /* *---------------------------------------------------------------------- |
︙ | ︙ |
Changes to tests/unicodeNormalize.test.
︙ | ︙ | |||
37 38 39 40 41 42 43 | # Standard arg number tests test unicode-badargs-0 {unicode no args} -returnCodes error -body { unicode } -result {wrong # args: should be "unicode subcommand ?arg ...?"} test unicode-badargs-1 {unicode bad command} -returnCodes error -body { unicode foo | | | 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 | # Standard arg number tests test unicode-badargs-0 {unicode no args} -returnCodes error -body { unicode } -result {wrong # args: should be "unicode subcommand ?arg ...?"} test unicode-badargs-1 {unicode bad command} -returnCodes error -body { unicode foo } -result {unknown or ambiguous subcommand "foo": must be category, is, tonfc, tonfd, tonfkc, or tonfkd} variable cmd foreach cmd {tonfc tonfd tonfkc tonfkd} { test $cmd-badargs-0 "$cmd 0 args" -returnCodes error -body { unicode $cmd } -result "wrong # args: should be \"unicode $cmd ?-profile PROFILE? STRING\"" test $cmd-badargs-1 "$cmd 2 args" -returnCodes error -body { |
︙ | ︙ |
Changes to tests/unicodeProperties.test.
︙ | ︙ | |||
104 105 106 107 108 109 110 | # Compatibility tests between the string and unicode commands. proc testStringUnicodeCompatibility {class} { set mismatches "is $class mismatches:" foreach codePoint [lseq 0 $::tcltests::ucd::maxCodepoint] { set ch [format %c $codePoint] if {[string is $class $ch] != [unicode is $class $ch]} { append mismatches " " U+[format %x $codePoint] | < > > > > > > > > > > > > > > > > > | 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 | # Compatibility tests between the string and unicode commands. proc testStringUnicodeCompatibility {class} { set mismatches "is $class mismatches:" foreach codePoint [lseq 0 $::tcltests::ucd::maxCodepoint] { set ch [format %c $codePoint] if {[string is $class $ch] != [unicode is $class $ch]} { append mismatches " " U+[format %x $codePoint] } } return $mismatches } foreach class {alpha alnum control digit graph lower print space upper wordchar} { test string-vs-unicode-is-$class "string is $class vs unicode" -body { testStringUnicodeCompatibility $class } -result "is $class mismatches:" } proc testStringUnicodeCaseConvertCompatibility {tocase} { set mismatches "$tocase mismatches:" foreach codePoint [lseq 0 $::tcltests::ucd::maxCodepoint] { set ch [format %c $codePoint] if {[string $tocase $ch] != [unicode $tocase $ch]} { append mismatches " " U+[format %x $codePoint] break } } return $mismatches } foreach tocase {tolower toupper totitle} { test string-vs-unicode-$tocase "string $tocase vs unicode" -body { testStringUnicodeCaseConvertCompatibility $tocase } -result "$tocase mismatches:" } } ::tcltest::cleanupTests namespace delete unicode::test return |