Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Fix some indentation/wrapping for style |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk | main |
Files: | files | file ages | folders |
SHA3-256: |
5b8a45dae948e114f63928f851c1a3b3 |
User & Date: | dkf 2024-06-12 20:24:40 |
Context
2024-06-13
| ||
09:51 | Use tabs for indenting in stead of 8 spaces. Unbreak clang build check-in: 4bbba9e1f6 user: jan.nijtmans tags: trunk, main | |
2024-06-12
| ||
20:24 | Fix some indentation/wrapping for style check-in: 5b8a45dae9 user: dkf tags: trunk, main | |
20:12 | merge 8.7 (several conflicts resolved) check-in: 30f3c945a6 user: sebres tags: trunk, main | |
Changes
Changes to generic/tclEncoding.c.
︙ | ︙ | |||
30 31 32 33 34 35 36 | * into UTF-8. */ Tcl_EncodingConvertProc *fromUtfProc; /* Function to convert from UTF-8 into * external encoding. */ Tcl_EncodingFreeProc *freeProc; /* If non-NULL, function to call when this * encoding is deleted. */ | | | | 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 | * into UTF-8. */ Tcl_EncodingConvertProc *fromUtfProc; /* Function to convert from UTF-8 into * external encoding. */ Tcl_EncodingFreeProc *freeProc; /* If non-NULL, function to call when this * encoding is deleted. */ void *clientData; /* Arbitrary value associated with encoding * type. Passed to conversion functions. */ Tcl_Size nullSize; /* Number of 0x00 bytes that signify * end-of-string in this encoding. This number * is used to determine the source string * length when the srcLen argument is * negative. This number can be 1, 2, or 4. */ LengthProc *lengthProc; /* Function to compute length of * null-terminated strings in this encoding. * If nullSize is 1, this is strlen; if |
︙ | ︙ | |||
115 116 117 118 119 120 121 | * conversion. */ char prefixBytes[256]; /* If a byte in the input stream is the first * character of one of the escape sequences in * the following array, the corresponding * entry in this array is 1, otherwise it is * 0. */ int numSubTables; /* Length of following array. */ | | > | 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 | * conversion. */ char prefixBytes[256]; /* If a byte in the input stream is the first * character of one of the escape sequences in * the following array, the corresponding * entry in this array is 1, otherwise it is * 0. */ int numSubTables; /* Length of following array. */ EscapeSubTable subTables[TCLFLEXARRAY]; /* Information about each EscapeSubTable used * by this encoding type. The actual size is * as large as necessary to hold all * EscapeSubTables. */ } EscapeEncodingData; /* * Constants used when loading an encoding file to identify the type of the |
︙ | ︙ | |||
197 198 199 200 201 202 203 | int value; } encodingProfiles[] = { {"replace", TCL_ENCODING_PROFILE_REPLACE}, {"strict", TCL_ENCODING_PROFILE_STRICT}, {"tcl8", TCL_ENCODING_PROFILE_TCL8}, }; | | | | | | | | 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 | int value; } encodingProfiles[] = { {"replace", TCL_ENCODING_PROFILE_REPLACE}, {"strict", TCL_ENCODING_PROFILE_STRICT}, {"tcl8", TCL_ENCODING_PROFILE_TCL8}, }; #define PROFILE_TCL8(flags_) \ (ENCODING_PROFILE_GET(flags_) == TCL_ENCODING_PROFILE_TCL8) #define PROFILE_REPLACE(flags_) \ (ENCODING_PROFILE_GET(flags_) == TCL_ENCODING_PROFILE_REPLACE) #define PROFILE_STRICT(flags_) \ (!PROFILE_TCL8(flags_) && !PROFILE_REPLACE(flags_)) #define UNICODE_REPLACE_CHAR 0xFFFD #define SURROGATE(c_) (((c_) & ~0x7FF) == 0xD800) #define HIGH_SURROGATE(c_) (((c_) & ~0x3FF) == 0xD800) #define LOW_SURROGATE(c_) (((c_) & ~0x3FF) == 0xDC00) /* * The following variable is used in the sparse matrix code for a * TableEncoding to represent a page in the table that has no entries. */ static unsigned short emptyPage[256]; |
︙ | ︙ | |||
255 256 257 258 259 260 261 | static Tcl_EncodingConvertProc UtfToUtf16Proc; static Tcl_EncodingConvertProc UtfToUcs2Proc; static Tcl_EncodingConvertProc UtfToUtfProc; static Tcl_EncodingConvertProc Iso88591FromUtfProc; static Tcl_EncodingConvertProc Iso88591ToUtfProc; /* | | | | | 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 | static Tcl_EncodingConvertProc UtfToUtf16Proc; static Tcl_EncodingConvertProc UtfToUcs2Proc; static Tcl_EncodingConvertProc UtfToUtfProc; static Tcl_EncodingConvertProc Iso88591FromUtfProc; static Tcl_EncodingConvertProc Iso88591ToUtfProc; /* * A Tcl_ObjType for holding a cached Tcl_Encoding in the twoPtrValue.ptr1 * field of the internalrep. This should help the lifetime of encodings be more * useful. See concerns raised in [Bug 1077262]. */ static const Tcl_ObjType encodingType = { "encoding", FreeEncodingInternalRep, DupEncodingInternalRep, NULL, |
︙ | ︙ | |||
506 507 508 509 510 511 512 | /* * NOTE: THESE BIT DEFINITIONS SHOULD NOT OVERLAP WITH INTERNAL USE BITS * DEFINED IN tcl.h (TCL_ENCODING_* et al). Be cognizant of this * when adding bits. TODO - should really be defined in a single file. * * To prevent conflicting bits, only define bits within 0xff00 mask here. */ | > | | > | > > | 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 | /* * NOTE: THESE BIT DEFINITIONS SHOULD NOT OVERLAP WITH INTERNAL USE BITS * DEFINED IN tcl.h (TCL_ENCODING_* et al). Be cognizant of this * when adding bits. TODO - should really be defined in a single file. * * To prevent conflicting bits, only define bits within 0xff00 mask here. */ enum InternalEncodingFlags { TCL_ENCODING_LE = 0x100, /* Used to distinguish LE/BE variants */ ENCODING_UTF = 0x200, /* For UTF-8 encoding, allow 4-byte output * sequences */ ENCODING_INPUT = 0x400 /* For UTF-8/CESU-8 encoding, means * external -> internal */ }; void TclInitEncodingSubsystem(void) { Tcl_EncodingType type; TableEncodingData *dataPtr; unsigned size; |
︙ | ︙ | |||
561 562 563 564 565 566 567 | type.clientData = INT2PTR(ENCODING_UTF); tclUtf8Encoding = Tcl_CreateEncoding(&type); type.clientData = NULL; type.encodingName = "cesu-8"; Tcl_CreateEncoding(&type); type.toUtfProc = Utf16ToUtfProc; | | | | | | | | | | | | | | 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 | type.clientData = INT2PTR(ENCODING_UTF); tclUtf8Encoding = Tcl_CreateEncoding(&type); type.clientData = NULL; type.encodingName = "cesu-8"; Tcl_CreateEncoding(&type); type.toUtfProc = Utf16ToUtfProc; type.fromUtfProc = UtfToUcs2Proc; type.freeProc = NULL; type.nullSize = 2; type.encodingName = "ucs-2le"; type.clientData = INT2PTR(TCL_ENCODING_LE); Tcl_CreateEncoding(&type); type.encodingName = "ucs-2be"; type.clientData = NULL; Tcl_CreateEncoding(&type); type.encodingName = "ucs-2"; type.clientData = INT2PTR(leFlags); Tcl_CreateEncoding(&type); type.toUtfProc = Utf32ToUtfProc; type.fromUtfProc = UtfToUtf32Proc; type.freeProc = NULL; type.nullSize = 4; type.encodingName = "utf-32le"; type.clientData = INT2PTR(TCL_ENCODING_LE); Tcl_CreateEncoding(&type); type.encodingName = "utf-32be"; type.clientData = NULL; Tcl_CreateEncoding(&type); type.encodingName = "utf-32"; type.clientData = INT2PTR(leFlags); Tcl_CreateEncoding(&type); type.toUtfProc = Utf16ToUtfProc; type.fromUtfProc = UtfToUtf16Proc; type.freeProc = NULL; type.nullSize = 2; type.encodingName = "utf-16le"; type.clientData = INT2PTR(TCL_ENCODING_LE); Tcl_CreateEncoding(&type); type.encodingName = "utf-16be"; type.clientData = NULL; Tcl_CreateEncoding(&type); type.encodingName = "utf-16"; type.clientData = INT2PTR(leFlags); Tcl_CreateEncoding(&type); #ifndef TCL_NO_DEPRECATED type.encodingName = "unicode"; Tcl_CreateEncoding(&type); #endif /* * Need the iso8859-1 encoding in order to process binary data, so force * it to always be embedded. Note that this encoding *must* be a proper * table encoding or some of the escape encodings crash! Hence the ugly |
︙ | ︙ | |||
920 921 922 923 924 925 926 | /* *------------------------------------------------------------------------- * * Tcl_GetEncodingNulLength -- * * Given an encoding, return the number of nul bytes used for the | | | 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 | /* *------------------------------------------------------------------------- * * Tcl_GetEncodingNulLength -- * * Given an encoding, return the number of nul bytes used for the * string termination. * * Results: * The number of nul bytes used for the string termination. * * Side effects: * None. * |
︙ | ︙ | |||
1120 1121 1122 1123 1124 1125 1126 | * "flags" controls the behavior if any of the bytes in * the source buffer are invalid or cannot be represented in utf-8. * Possible flags values: * target encoding. It should be composed by OR-ing the following: * - *At most one* of TCL_ENCODING_PROFILE{DEFAULT,TCL8,STRICT} * * Results: | | | | | | | | < | | | | | | | | | | | | | | 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 | * "flags" controls the behavior if any of the bytes in * the source buffer are invalid or cannot be represented in utf-8. * Possible flags values: * target encoding. It should be composed by OR-ing the following: * - *At most one* of TCL_ENCODING_PROFILE{DEFAULT,TCL8,STRICT} * * Results: * The return value is one of * TCL_OK: success. Converted string in *dstPtr * TCL_ERROR: error in passed parameters. Error message in interp * TCL_CONVERT_MULTIBYTE: source ends in truncated multibyte sequence * TCL_CONVERT_SYNTAX: source is not conformant to encoding definition * TCL_CONVERT_UNKNOWN: source contained a character that could not * be represented in target encoding. * * Side effects: * TCL_OK: The converted bytes are stored in the DString and NUL * terminated in an encoding-specific manner. * TCL_ERROR: an error, message is stored in the interp if not NULL. * TCL_CONVERT_*: if errorLocPtr is NULL, an error message is stored * in the interpreter (if not NULL). If errorLocPtr is not NULL, * no error message is stored as it is expected the caller is * interested in whatever is decoded so far and not treating this * as an error condition. * * In addition, *dstPtr is always initialized and must be cleared * by the caller irrespective of the return code. * *------------------------------------------------------------------------- */ int Tcl_ExternalToUtfDStringEx( Tcl_Interp *interp, /* For error messages. May be NULL. */ Tcl_Encoding encoding, /* The encoding for the source string, or NULL * for the default system encoding. */ const char *src, /* Source string in specified encoding. */ Tcl_Size srcLen, /* Source string length in bytes, or < 0 for * encoding-specific string length. */ int flags, /* Conversion control flags. */ Tcl_DString *dstPtr, /* Uninitialized or free DString in which the * converted string is stored. */ Tcl_Size *errorLocPtr) /* Where to store the error location * (or TCL_INDEX_NONE if no error). May * be NULL. */ { char *dst; Tcl_EncodingState state; const Encoding *encodingPtr; int result; Tcl_Size dstLen, soFar; |
︙ | ︙ | |||
1227 1228 1229 1230 1231 1232 1233 | Tcl_DStringSetLength(dstPtr, soFar); if (errorLocPtr) { /* * Do not write error message into interpreter if caller * wants to know error location. */ | | > | > | > | 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 | Tcl_DStringSetLength(dstPtr, soFar); if (errorLocPtr) { /* * Do not write error message into interpreter if caller * wants to know error location. */ *errorLocPtr = result == TCL_OK ? TCL_INDEX_NONE : nBytesProcessed; } else { /* Caller wants error message on failure */ if (result != TCL_OK && interp != NULL) { char buf[TCL_INTEGER_SPACE]; snprintf(buf, sizeof(buf), "%" TCL_SIZE_MODIFIER "d", nBytesProcessed); Tcl_SetObjResult(interp, Tcl_ObjPrintf( "unexpected byte sequence starting at index %" TCL_SIZE_MODIFIER "d: '\\x%02X'", nBytesProcessed, UCHAR(srcStart[nBytesProcessed]))); Tcl_SetErrorCode( interp, "TCL", "ENCODING", "ILLEGALSEQUENCE", buf, (void *)NULL); } } if (result != TCL_OK) { errno = (result == TCL_CONVERT_NOSPACE) ? ENOMEM : EILSEQ; } return result; } |
︙ | ︙ | |||
1283 1284 1285 1286 1287 1288 1289 | int Tcl_ExternalToUtf( TCL_UNUSED(Tcl_Interp *), /* TODO: Re-examine this. */ Tcl_Encoding encoding, /* The encoding for the source string, or NULL * for the default system encoding. */ const char *src, /* Source string in specified encoding. */ | | | > | 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 | int Tcl_ExternalToUtf( TCL_UNUSED(Tcl_Interp *), /* TODO: Re-examine this. */ Tcl_Encoding encoding, /* The encoding for the source string, or NULL * for the default system encoding. */ const char *src, /* Source string in specified encoding. */ Tcl_Size srcLen, /* Source string length in bytes, or * TCL_INDEX_NONE for encoding-specific string * length. */ int flags, /* Conversion control flags. */ Tcl_EncodingState *statePtr,/* Place for conversion routine to store state * information used during a piecewise * conversion. Contents of statePtr are * initialized and/or reset by conversion * routine under control of flags argument. */ char *dst, /* Output buffer in which converted string is |
︙ | ︙ | |||
1436 1437 1438 1439 1440 1441 1442 | * Convert a source buffer from UTF-8 to the specified encoding. * The parameter flags controls the behavior, if any of the bytes in * the source buffer are invalid or cannot be represented in the * target encoding. It should be composed by OR-ing the following: * - *At most one* of TCL_ENCODING_PROFILE_* * * Results: | | | | | | | | < | | | | | | | | | | | | | | 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 | * Convert a source buffer from UTF-8 to the specified encoding. * The parameter flags controls the behavior, if any of the bytes in * the source buffer are invalid or cannot be represented in the * target encoding. It should be composed by OR-ing the following: * - *At most one* of TCL_ENCODING_PROFILE_* * * Results: * The return value is one of * TCL_OK: success. Converted string in *dstPtr * TCL_ERROR: error in passed parameters. Error message in interp * TCL_CONVERT_MULTIBYTE: source ends in truncated multibyte sequence * TCL_CONVERT_SYNTAX: source is not conformant to encoding definition * TCL_CONVERT_UNKNOWN: source contained a character that could not * be represented in target encoding. * * Side effects: * TCL_OK: The converted bytes are stored in the DString and NUL * terminated in an encoding-specific manner * TCL_ERROR: an error, message is stored in the interp if not NULL. * TCL_CONVERT_*: if errorLocPtr is NULL, an error message is stored * in the interpreter (if not NULL). If errorLocPtr is not NULL, * no error message is stored as it is expected the caller is * interested in whatever is decoded so far and not treating this * as an error condition. * * In addition, *dstPtr is always initialized and must be cleared * by the caller irrespective of the return code. * *------------------------------------------------------------------------- */ int Tcl_UtfToExternalDStringEx( Tcl_Interp *interp, /* For error messages. May be NULL. */ Tcl_Encoding encoding, /* The encoding for the converted string, or * NULL for the default system encoding. */ const char *src, /* Source string in UTF-8. */ Tcl_Size srcLen, /* Source string length in bytes, or < 0 for * strlen(). */ int flags, /* Conversion control flags. */ Tcl_DString *dstPtr, /* Uninitialized or free DString in which the * converted string is stored. */ Tcl_Size *errorLocPtr) /* Where to store the error location * (or TCL_INDEX_NONE if no error). May * be NULL. */ { char *dst; Tcl_EncodingState state; const Encoding *encodingPtr; int result; const char *srcStart = src; |
︙ | ︙ | |||
1543 1544 1545 1546 1547 1548 1549 | Tcl_DStringSetLength(dstPtr, i--); } if (errorLocPtr) { /* * Do not write error message into interpreter if caller * wants to know error location. */ | | > | > | 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 | Tcl_DStringSetLength(dstPtr, i--); } if (errorLocPtr) { /* * Do not write error message into interpreter if caller * wants to know error location. */ *errorLocPtr = result == TCL_OK ? TCL_INDEX_NONE : nBytesProcessed; } else { /* Caller wants error message on failure */ if (result != TCL_OK && interp != NULL) { Tcl_Size pos = Tcl_NumUtfChars(srcStart, nBytesProcessed); int ucs4; char buf[TCL_INTEGER_SPACE]; TclUtfToUniChar(&srcStart[nBytesProcessed], &ucs4); snprintf(buf, sizeof(buf), "%" TCL_SIZE_MODIFIER "d", nBytesProcessed); Tcl_SetObjResult(interp, Tcl_ObjPrintf( "unexpected character at index %" TCL_SIZE_MODIFIER "u: 'U+%06X'", pos, ucs4)); Tcl_SetErrorCode(interp, "TCL", "ENCODING", "ILLEGALSEQUENCE", buf, (void *)NULL); } |
︙ | ︙ | |||
1603 1604 1605 1606 1607 1608 1609 | int Tcl_UtfToExternal( TCL_UNUSED(Tcl_Interp *), /* TODO: Re-examine this. */ Tcl_Encoding encoding, /* The encoding for the converted string, or * NULL for the default system encoding. */ const char *src, /* Source string in UTF-8. */ | | | | 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 | int Tcl_UtfToExternal( TCL_UNUSED(Tcl_Interp *), /* TODO: Re-examine this. */ Tcl_Encoding encoding, /* The encoding for the converted string, or * NULL for the default system encoding. */ const char *src, /* Source string in UTF-8. */ Tcl_Size srcLen, /* Source string length in bytes, or * TCL_INDEX_NONE for strlen(). */ int flags, /* Conversion control flags. */ Tcl_EncodingState *statePtr,/* Place for conversion routine to store state * information used during a piecewise * conversion. Contents of statePtr are * initialized and/or reset by conversion * routine under control of flags argument. */ char *dst, /* Output buffer in which converted string |
︙ | ︙ | |||
1812 1813 1814 1815 1816 1817 1818 | TclSetProcessGlobalValue(&encodingFileMap, map); } } if ((NULL == chan) && (interp != NULL)) { Tcl_SetObjResult(interp, Tcl_ObjPrintf( "unknown encoding \"%s\"", name)); | | > | 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 | TclSetProcessGlobalValue(&encodingFileMap, map); } } if ((NULL == chan) && (interp != NULL)) { Tcl_SetObjResult(interp, Tcl_ObjPrintf( "unknown encoding \"%s\"", name)); Tcl_SetErrorCode(interp, "TCL", "LOOKUP", "ENCODING", name, (void *)NULL); } Tcl_DecrRefCount(fileNameObj); Tcl_DecrRefCount(searchPath); return chan; } |
︙ | ︙ | |||
1886 1887 1888 1889 1890 1891 1892 | case 'E': encoding = LoadEscapeEncoding(name, chan); break; } if ((encoding == NULL) && (interp != NULL)) { Tcl_SetObjResult(interp, Tcl_ObjPrintf( "invalid encoding file \"%s\"", name)); | | > | 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 | case 'E': encoding = LoadEscapeEncoding(name, chan); break; } if ((encoding == NULL) && (interp != NULL)) { Tcl_SetObjResult(interp, Tcl_ObjPrintf( "invalid encoding file \"%s\"", name)); Tcl_SetErrorCode(interp, "TCL", "LOOKUP", "ENCODING", name, (void *)NULL); } Tcl_CloseEx(NULL, chan, 0); return encoding; } /* |
︙ | ︙ | |||
2276 2277 2278 2279 2280 2281 2282 | /* * To avoid infinite recursion in [encoding system iso2022-*] */ e = (Encoding *) Tcl_GetEncoding(NULL, est.name); if ((e != NULL) && (e->toUtfProc != TableToUtfProc) && (e->toUtfProc != Iso88591ToUtfProc)) { | | | | 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 | /* * To avoid infinite recursion in [encoding system iso2022-*] */ e = (Encoding *) Tcl_GetEncoding(NULL, est.name); if ((e != NULL) && (e->toUtfProc != TableToUtfProc) && (e->toUtfProc != Iso88591ToUtfProc)) { Tcl_FreeEncoding((Tcl_Encoding) e); e = NULL; } est.encodingPtr = e; Tcl_DStringAppend(&escapeData, (char *) &est, sizeof(est)); } } Tcl_Free(argv); Tcl_DStringFree(&lineString); |
︙ | ︙ | |||
2464 2465 2466 2467 2468 2469 2470 | result = TCL_CONVERT_MULTIBYTE; break; } if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; break; } | > | | | | | | | 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 | result = TCL_CONVERT_MULTIBYTE; break; } if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; break; } if (UCHAR(*src) < 0x80 && !((UCHAR(*src) == 0) && (flags & ENCODING_INPUT))) { /* * Copy 7bit characters, but skip null-bytes when we are in input * mode, so that they get converted to \xC0\x80. */ *dst++ = *src++; } else if ((UCHAR(*src) == 0xC0) && (src + 1 < srcEnd) && (UCHAR(src[1]) == 0x80) && (!(flags & ENCODING_INPUT) || !PROFILE_TCL8(profile))) { /* Special sequence \xC0\x80 */ if (!PROFILE_TCL8(profile) && (flags & ENCODING_INPUT)) { if (PROFILE_REPLACE(profile)) { dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst); src += 2; } else { /* PROFILE_STRICT */ result = TCL_CONVERT_SYNTAX; break; } } else { /* * Convert 0xC080 to real nulls when we are in output mode, * irrespective of the profile. */ *dst++ = 0; |
︙ | ︙ | |||
2505 2506 2507 2508 2509 2510 2511 | * the user has explicitly asked to be told. */ if (flags & ENCODING_INPUT) { /* Incomplete bytes for modified UTF-8 target */ if (PROFILE_STRICT(profile)) { result = (flags & TCL_ENCODING_CHAR_LIMIT) | | | | > | > | > | 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 | * the user has explicitly asked to be told. */ if (flags & ENCODING_INPUT) { /* Incomplete bytes for modified UTF-8 target */ if (PROFILE_STRICT(profile)) { result = (flags & TCL_ENCODING_CHAR_LIMIT) ? TCL_CONVERT_MULTIBYTE : TCL_CONVERT_SYNTAX; break; } } if (PROFILE_REPLACE(profile)) { ch = UNICODE_REPLACE_CHAR; ++src; } else { /* TCL_ENCODING_PROFILE_TCL8 */ char chbuf[2]; chbuf[0] = UCHAR(*src++); chbuf[1] = 0; TclUtfToUniChar(chbuf, &ch); } dst += Tcl_UniCharToUtf(ch, dst); } else { size_t len = TclUtfToUniChar(src, &ch); if (flags & ENCODING_INPUT) { if (((len < 2) && (ch != 0)) || ((ch > 0xFFFF) && !(flags & ENCODING_UTF))) { if (PROFILE_STRICT(profile)) { result = TCL_CONVERT_SYNTAX; break; } else if (PROFILE_REPLACE(profile)) { ch = UNICODE_REPLACE_CHAR; } } } const char *saveSrc = src; src += len; if (!(flags & ENCODING_UTF) && !(flags & ENCODING_INPUT) && (ch > 0x3FF)) { if (ch > 0xFFFF) { /* CESU-8 6-byte sequence for chars > U+FFFF */ ch -= 0x10000; *dst++ = 0xED; *dst++ = (char) (((ch >> 16) & 0x0F) | 0xA0); *dst++ = (char) (((ch >> 10) & 0x3F) | 0x80); ch = (ch & 0x0CFF) | 0xDC00; } *dst++ = (char)(((ch >> 12) | 0xE0) & 0xEF); *dst++ = (char)(((ch >> 6) | 0x80) & 0xBF); *dst++ = (char)((ch | 0x80) & 0xBF); continue; } else if (SURROGATE(ch)) { if (PROFILE_STRICT(profile)) { result = (flags & ENCODING_INPUT) ? TCL_CONVERT_SYNTAX : TCL_CONVERT_UNKNOWN; src = saveSrc; break; } else if (PROFILE_REPLACE(profile)) { ch = UNICODE_REPLACE_CHAR; } } dst += Tcl_UniCharToUtf(ch, dst); |
︙ | ︙ | |||
2585 2586 2587 2588 2589 2590 2591 | * None. * *------------------------------------------------------------------------- */ static int Utf32ToUtfProc( | | | 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 | * None. * *------------------------------------------------------------------------- */ static int Utf32ToUtfProc( void *clientData, /* additional flags, e.g. TCL_ENCODING_LE */ const char *src, /* Source string in Unicode. */ int srcLen, /* Source string length in bytes. */ int flags, /* Conversion control flags. */ TCL_UNUSED(Tcl_EncodingState *), char *dst, /* Output buffer in which converted string is * stored. */ int dstLen, /* The maximum length of output buffer in |
︙ | ︙ | |||
2639 2640 2641 2642 2643 2644 2645 | for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) { if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; break; } if (flags & TCL_ENCODING_LE) { | | > | > | 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 | for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) { if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; break; } if (flags & TCL_ENCODING_LE) { ch = (unsigned int)(src[3] & 0xFF) << 24 | (src[2] & 0xFF) << 16 | (src[1] & 0xFF) << 8 | (src[0] & 0xFF); } else { ch = (unsigned int)(src[0] & 0xFF) << 24 | (src[1] & 0xFF) << 16 | (src[2] & 0xFF) << 8 | (src[3] & 0xFF); } if ((unsigned)ch > 0x10FFFF) { if (PROFILE_STRICT(flags)) { result = TCL_CONVERT_SYNTAX; break; } ch = UNICODE_REPLACE_CHAR; |
︙ | ︙ | |||
2714 2715 2716 2717 2718 2719 2720 | * None. * *------------------------------------------------------------------------- */ static int UtfToUtf32Proc( | | | 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 | * None. * *------------------------------------------------------------------------- */ static int UtfToUtf32Proc( void *clientData, /* additional flags, e.g. TCL_ENCODING_LE */ const char *src, /* Source string in UTF-8. */ int srcLen, /* Source string length in bytes. */ int flags, /* Conversion control flags. */ TCL_UNUSED(Tcl_EncodingState *), char *dst, /* Output buffer in which converted string is * stored. */ int dstLen, /* The maximum length of output buffer in |
︙ | ︙ | |||
2813 2814 2815 2816 2817 2818 2819 | * None. * *------------------------------------------------------------------------- */ static int Utf16ToUtfProc( | | | 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 | * None. * *------------------------------------------------------------------------- */ static int Utf16ToUtfProc( void *clientData, /* additional flags, e.g. TCL_ENCODING_LE */ const char *src, /* Source string in Unicode. */ int srcLen, /* Source string length in bytes. */ int flags, /* Conversion control flags. */ TCL_UNUSED(Tcl_EncodingState *), char *dst, /* Output buffer in which converted string is * stored. */ int dstLen, /* The maximum length of output buffer in |
︙ | ︙ | |||
2871 2872 2873 2874 2875 2876 2877 | srcStart = src; srcEnd = src + srcLen; dstStart = dst; dstEnd = dst + dstLen - TCL_UTF_MAX; | | > | | | | > | > > | 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 | srcStart = src; srcEnd = src + srcLen; dstStart = dst; dstEnd = dst + dstLen - TCL_UTF_MAX; for (numChars = 0; src < srcEnd && numChars <= charLimit; src += 2, numChars++) { if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; break; } unsigned short prev = ch; if (flags & TCL_ENCODING_LE) { ch = (src[1] & 0xFF) << 8 | (src[0] & 0xFF); } else { ch = (src[0] & 0xFF) << 8 | (src[1] & 0xFF); } if (HIGH_SURROGATE(prev) && !LOW_SURROGATE(ch)) { if (PROFILE_STRICT(flags)) { result = TCL_CONVERT_SYNTAX; src -= 2; /* Go back to beginning of high surrogate */ dst--; /* Also undo writing a single byte too much */ numChars--; break; } else if (PROFILE_REPLACE(flags)) { /* * Previous loop wrote a single byte to mark the high surrogate. * Replace it with the replacement character. Further, restart * current loop iteration since need to recheck destination * space and reset processing of current character. */ ch = UNICODE_REPLACE_CHAR; dst--; dst += Tcl_UniCharToUtf(ch, dst); src -= 2; numChars--; continue; } else { /* * Bug [10c2c17c32]. If Hi surrogate not followed by Lo * surrogate, finish 3-byte UTF-8 */ dst += Tcl_UniCharToUtf(-1, dst); } } /* * Special case for 1-byte utf chars for speed. Make sure we work with * unsigned short-size data. |
︙ | ︙ | |||
2991 2992 2993 2994 2995 2996 2997 | * None. * *------------------------------------------------------------------------- */ static int UtfToUtf16Proc( | | | 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 | * None. * *------------------------------------------------------------------------- */ static int UtfToUtf16Proc( void *clientData, /* additional flags, e.g. TCL_ENCODING_LE */ const char *src, /* Source string in UTF-8. */ int srcLen, /* Source string length in bytes. */ int flags, /* Conversion control flags. */ TCL_UNUSED(Tcl_EncodingState *), char *dst, /* Output buffer in which converted string is * stored. */ int dstLen, /* The maximum length of output buffer in |
︙ | ︙ | |||
3099 3100 3101 3102 3103 3104 3105 | * None. * *------------------------------------------------------------------------- */ static int UtfToUcs2Proc( | | | 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 | * None. * *------------------------------------------------------------------------- */ static int UtfToUcs2Proc( void *clientData, /* additional flags, e.g. TCL_ENCODING_LE */ const char *src, /* Source string in UTF-8. */ int srcLen, /* Source string length in bytes. */ int flags, /* Conversion control flags. */ TCL_UNUSED(Tcl_EncodingState *), char *dst, /* Output buffer in which converted string is * stored. */ int dstLen, /* The maximum length of output buffer in |
︙ | ︙ | |||
3203 3204 3205 3206 3207 3208 3209 | * None. * *------------------------------------------------------------------------- */ static int TableToUtfProc( | | | 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 | * None. * *------------------------------------------------------------------------- */ static int TableToUtfProc( void *clientData, /* TableEncodingData that specifies * encoding. */ const char *src, /* Source string in specified encoding. */ int srcLen, /* Source string length in bytes. */ int flags, /* Conversion control flags. */ TCL_UNUSED(Tcl_EncodingState *), char *dst, /* Output buffer in which converted string is * stored. */ |
︙ | ︙ | |||
3266 3267 3268 3269 3270 3271 3272 | break; } else if (PROFILE_STRICT(flags)) { result = TCL_CONVERT_SYNTAX; break; } else if (PROFILE_REPLACE(flags)) { ch = UNICODE_REPLACE_CHAR; } else { | | > | 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 | break; } else if (PROFILE_STRICT(flags)) { result = TCL_CONVERT_SYNTAX; break; } else if (PROFILE_REPLACE(flags)) { ch = UNICODE_REPLACE_CHAR; } else { /* For prefix bytes, we don't fallback to cp1252, see * [1355b9a874] */ ch = byte; } } else { ch = toUnicode[byte][*((unsigned char *)++src)]; } } else { ch = pageZero[byte]; |
︙ | ︙ | |||
3331 3332 3333 3334 3335 3336 3337 | * None. * *------------------------------------------------------------------------- */ static int TableFromUtfProc( | | | 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 | * None. * *------------------------------------------------------------------------- */ static int TableFromUtfProc( void *clientData, /* TableEncodingData that specifies * encoding. */ const char *src, /* Source string in UTF-8. */ int srcLen, /* Source string length in bytes. */ int flags, /* Conversion control flags. */ TCL_UNUSED(Tcl_EncodingState *), char *dst, /* Output buffer in which converted string is * stored. */ |
︙ | ︙ | |||
3623 3624 3625 3626 3627 3628 3629 | * Memory freed. * *--------------------------------------------------------------------------- */ static void TableFreeProc( | | | 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 | * Memory freed. * *--------------------------------------------------------------------------- */ static void TableFreeProc( void *clientData) /* TableEncodingData that specifies * encoding. */ { TableEncodingData *dataPtr = (TableEncodingData *)clientData; /* * Make sure we aren't freeing twice on shutdown. [Bug 219314] */ |
︙ | ︙ | |||
3658 3659 3660 3661 3662 3663 3664 | * None. * *------------------------------------------------------------------------- */ static int EscapeToUtfProc( | | | 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 | * None. * *------------------------------------------------------------------------- */ static int EscapeToUtfProc( void *clientData, /* EscapeEncodingData that specifies * encoding. */ const char *src, /* Source string in specified encoding. */ int srcLen, /* Source string length in bytes. */ int flags, /* Conversion control flags. */ Tcl_EncodingState *statePtr,/* Place for conversion routine to store state * information used during a piecewise * conversion. Contents of statePtr are |
︙ | ︙ | |||
3871 3872 3873 3874 3875 3876 3877 | * None. * *------------------------------------------------------------------------- */ static int EscapeFromUtfProc( | | | 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 | * None. * *------------------------------------------------------------------------- */ static int EscapeFromUtfProc( void *clientData, /* EscapeEncodingData that specifies * encoding. */ const char *src, /* Source string in UTF-8. */ int srcLen, /* Source string length in bytes. */ int flags, /* Conversion control flags. */ Tcl_EncodingState *statePtr,/* Place for conversion routine to store state * information used during a piecewise * conversion. Contents of statePtr are |
︙ | ︙ | |||
3938 3939 3940 3941 3942 3943 3944 | memcpy(dst, dataPtr->init, dataPtr->initLen); dst += dataPtr->initLen; } else { state = PTR2INT(*statePtr); } encodingPtr = GetTableEncoding(dataPtr, state); | | | 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 | memcpy(dst, dataPtr->init, dataPtr->initLen); dst += dataPtr->initLen; } else { state = PTR2INT(*statePtr); } encodingPtr = GetTableEncoding(dataPtr, state); tableDataPtr = (TableEncodingData *) encodingPtr->clientData; tablePrefixBytes = tableDataPtr->prefixBytes; tableFromUnicode = (const unsigned short *const *) tableDataPtr->fromUnicode; for (numChars = 0; src < srcEnd; numChars++) { unsigned len; int word; |
︙ | ︙ | |||
3966 3967 3968 3969 3970 3971 3972 | if ((word == 0) && (ch != 0)) { int oldState; const EscapeSubTable *subTablePtr; oldState = state; for (state = 0; state < dataPtr->numSubTables; state++) { encodingPtr = GetTableEncoding(dataPtr, state); | | | | 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 | if ((word == 0) && (ch != 0)) { int oldState; const EscapeSubTable *subTablePtr; oldState = state; for (state = 0; state < dataPtr->numSubTables; state++) { encodingPtr = GetTableEncoding(dataPtr, state); tableDataPtr = (TableEncodingData *) encodingPtr->clientData; word = tableDataPtr->fromUnicode[(ch >> 8)][ch & 0xFF]; if (word != 0) { break; } } if (word == 0) { state = oldState; if (PROFILE_STRICT(flags)) { result = TCL_CONVERT_UNKNOWN; break; } encodingPtr = GetTableEncoding(dataPtr, state); tableDataPtr = (TableEncodingData *) encodingPtr->clientData; word = tableDataPtr->fallback; } tablePrefixBytes = (const char *) tableDataPtr->prefixBytes; tableFromUnicode = (const unsigned short *const *) tableDataPtr->fromUnicode; |
︙ | ︙ | |||
4082 4083 4084 4085 4086 4087 4088 | * Memory is freed. * *--------------------------------------------------------------------------- */ static void EscapeFreeProc( | | | 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 | * Memory is freed. * *--------------------------------------------------------------------------- */ static void EscapeFreeProc( void *clientData) /* EscapeEncodingData that specifies * encoding. */ { EscapeEncodingData *dataPtr = (EscapeEncodingData *)clientData; EscapeSubTable *subTablePtr; int i; if (dataPtr == NULL) { |
︙ | ︙ | |||
4304 4305 4306 4307 4308 4309 4310 | } if (interp) { /* This code assumes at least two profiles :-) */ Tcl_Obj *errorObj = Tcl_ObjPrintf("bad profile name \"%s\": must be", profileName); for (i = 0; i < (numProfiles - 1); ++i) { Tcl_AppendStringsToObj( | | > | > | > | 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 | } if (interp) { /* This code assumes at least two profiles :-) */ Tcl_Obj *errorObj = Tcl_ObjPrintf("bad profile name \"%s\": must be", profileName); for (i = 0; i < (numProfiles - 1); ++i) { Tcl_AppendStringsToObj( errorObj, " ", encodingProfiles[i].name, ",", (void *)NULL); } Tcl_AppendStringsToObj( errorObj, " or ", encodingProfiles[numProfiles-1].name, (void *)NULL); Tcl_SetObjResult(interp, errorObj); Tcl_SetErrorCode( interp, "TCL", "ENCODING", "PROFILE", profileName, (void *)NULL); } return TCL_ERROR; } /* *------------------------------------------------------------------------ * |
︙ | ︙ | |||
4338 4339 4340 4341 4342 4343 4344 | const char * TclEncodingProfileIdToName( Tcl_Interp *interp, /* For error messages. May be NULL */ int profileValue) /* Profile #define value */ { size_t i; | | > | | 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 | const char * TclEncodingProfileIdToName( Tcl_Interp *interp, /* For error messages. May be NULL */ int profileValue) /* Profile #define value */ { size_t i; for (i = 0; i < sizeof(encodingProfiles) / sizeof(encodingProfiles[0]); ++i) { if (profileValue == encodingProfiles[i].value) { return encodingProfiles[i].name; } } if (interp) { Tcl_SetObjResult(interp, Tcl_ObjPrintf( "Internal error. Bad profile id \"%d\".", profileValue)); Tcl_SetErrorCode( interp, "TCL", "ENCODING", "PROFILEID", (void *)NULL); } return NULL; } /* *------------------------------------------------------------------------ * |
︙ | ︙ | |||
4377 4378 4379 4380 4381 4382 4383 | Tcl_Interp *interp) { size_t i, n; Tcl_Obj *objPtr; n = sizeof(encodingProfiles) / sizeof(encodingProfiles[0]); objPtr = Tcl_NewListObj(n, NULL); for (i = 0; i < n; ++i) { | | | | | 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 | Tcl_Interp *interp) { size_t i, n; Tcl_Obj *objPtr; n = sizeof(encodingProfiles) / sizeof(encodingProfiles[0]); objPtr = Tcl_NewListObj(n, NULL); for (i = 0; i < n; ++i) { Tcl_ListObjAppendElement(interp, objPtr, Tcl_NewStringObj(encodingProfiles[i].name, TCL_INDEX_NONE)); } Tcl_SetObjResult(interp, objPtr); } /* * Local Variables: * mode: c * c-basic-offset: 4 * fill-column: 78 * End: */ |