Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Add C API Tcl_UtfToNormalized |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | tip-726 |
Files: | files | file ages | folders |
SHA3-256: |
b00ff15275e85fd520ea0a7213438d0a |
User & Date: | apnadkarni 2025-07-27 00:59:37.628 |
Context
2025-07-27
| ||
08:07 | Add tests Tcl_UtfToNormalized API check-in: 228a4cdf87 user: apnadkarni tags: tip-726 | |
00:59 | Add C API Tcl_UtfToNormalized check-in: b00ff15275 user: apnadkarni tags: tip-726 | |
2025-07-26
| ||
06:14 | Merge tip-726-plus to eliminate use of tclUniData.c. check-in: 0a1c3bc2fb user: apnadkarni tags: tip-726 | |
Changes
Changes to generic/tcl.decls.
︙ | ︙ | |||
2389 2390 2391 2392 2393 2394 2395 | declare 694 { int Tcl_ListObjRange(Tcl_Interp *interp, Tcl_Obj *objPtr, Tcl_Size start, Tcl_Size end, Tcl_Obj **resultPtrPtr) } # TIP 726 declare 695 { | | > > > > > > | | 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 | declare 694 { int Tcl_ListObjRange(Tcl_Interp *interp, Tcl_Obj *objPtr, Tcl_Size start, Tcl_Size end, Tcl_Obj **resultPtrPtr) } # TIP 726 declare 695 { int Tcl_UtfToNormalizedDString(Tcl_Interp *interp, const char *bytes, Tcl_Size length, Tcl_UnicodeNormalizationForm normForm, int profile, Tcl_DString *dsPtr) } declare 696 { int Tcl_UtfToNormalized(Tcl_Interp *interp, const char *bytes, Tcl_Size length, Tcl_UnicodeNormalizationForm normForm, int profile, char *bufPtr, Tcl_Size bufLen, Tcl_Size *lengthPtr) } # ----- BASELINE -- FOR -- 9.1.0 ----- # declare 697 { void TclUnusedStubEntry(void) } ############################################################################## # Define the platform specific public Tcl interface. These functions are only # available on the designated platform. |
︙ | ︙ |
Changes to generic/tclCmdMZ.c.
︙ | ︙ | |||
5450 5451 5452 5453 5454 5455 5456 | Tcl_WrongNumArgs(interp, 1, objv, "?-profile PROFILE? STRING"); return TCL_ERROR; } Tcl_DString ds; if (Tcl_UtfToNormalizedDString(interp, Tcl_GetString(objv[objc - 1]), TCL_INDEX_NONE, (Tcl_UnicodeNormalizationForm)clientData, profile, | | | 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 | Tcl_WrongNumArgs(interp, 1, objv, "?-profile PROFILE? STRING"); return TCL_ERROR; } Tcl_DString ds; if (Tcl_UtfToNormalizedDString(interp, Tcl_GetString(objv[objc - 1]), TCL_INDEX_NONE, (Tcl_UnicodeNormalizationForm)clientData, profile, &ds) != TCL_OK) { return TCL_ERROR; } Tcl_DStringResult(interp, &ds); return TCL_OK; } |
︙ | ︙ |
Changes to generic/tclDecls.h.
︙ | ︙ | |||
1883 1884 1885 1886 1887 1888 1889 | Tcl_Obj *const objv[], Tcl_Obj **resultPtrPtr); /* 694 */ EXTERN int Tcl_ListObjRange(Tcl_Interp *interp, Tcl_Obj *objPtr, Tcl_Size start, Tcl_Size end, Tcl_Obj **resultPtrPtr); /* 695 */ | | > > > > > > | 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 | Tcl_Obj *const objv[], Tcl_Obj **resultPtrPtr); /* 694 */ EXTERN int Tcl_ListObjRange(Tcl_Interp *interp, Tcl_Obj *objPtr, Tcl_Size start, Tcl_Size end, Tcl_Obj **resultPtrPtr); /* 695 */ EXTERN int Tcl_UtfToNormalizedDString(Tcl_Interp *interp, const char *bytes, Tcl_Size length, Tcl_UnicodeNormalizationForm normForm, int profile, Tcl_DString *dsPtr); /* 696 */ EXTERN int Tcl_UtfToNormalized(Tcl_Interp *interp, const char *bytes, Tcl_Size length, Tcl_UnicodeNormalizationForm normForm, int profile, char *bufPtr, Tcl_Size bufLen, Tcl_Size *lengthPtr); /* 697 */ EXTERN void TclUnusedStubEntry(void); typedef struct { const struct TclPlatStubs *tclPlatStubs; const struct TclIntStubs *tclIntStubs; const struct TclIntPlatStubs *tclIntPlatStubs; } TclStubHooks; |
︙ | ︙ | |||
2595 2596 2597 2598 2599 2600 2601 | Tcl_Obj * (*tcl_NewWideUIntObj) (Tcl_WideUInt wideValue); /* 688 */ void (*tcl_SetWideUIntObj) (Tcl_Obj *objPtr, Tcl_WideUInt uwideValue); /* 689 */ int (*tcl_IsEmpty) (Tcl_Obj *obj); /* 690 */ const char * (*tcl_GetEncodingNameForUser) (Tcl_DString *bufPtr); /* 691 */ int (*tcl_ListObjReverse) (Tcl_Interp *interp, Tcl_Obj *objPtr, Tcl_Obj **resultPtrPtr); /* 692 */ int (*tcl_ListObjRepeat) (Tcl_Interp *interp, Tcl_Size repeatCount, Tcl_Size objc, Tcl_Obj *const objv[], Tcl_Obj **resultPtrPtr); /* 693 */ int (*tcl_ListObjRange) (Tcl_Interp *interp, Tcl_Obj *objPtr, Tcl_Size start, Tcl_Size end, Tcl_Obj **resultPtrPtr); /* 694 */ | | > | | 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 | Tcl_Obj * (*tcl_NewWideUIntObj) (Tcl_WideUInt wideValue); /* 688 */ void (*tcl_SetWideUIntObj) (Tcl_Obj *objPtr, Tcl_WideUInt uwideValue); /* 689 */ int (*tcl_IsEmpty) (Tcl_Obj *obj); /* 690 */ const char * (*tcl_GetEncodingNameForUser) (Tcl_DString *bufPtr); /* 691 */ int (*tcl_ListObjReverse) (Tcl_Interp *interp, Tcl_Obj *objPtr, Tcl_Obj **resultPtrPtr); /* 692 */ int (*tcl_ListObjRepeat) (Tcl_Interp *interp, Tcl_Size repeatCount, Tcl_Size objc, Tcl_Obj *const objv[], Tcl_Obj **resultPtrPtr); /* 693 */ int (*tcl_ListObjRange) (Tcl_Interp *interp, Tcl_Obj *objPtr, Tcl_Size start, Tcl_Size end, Tcl_Obj **resultPtrPtr); /* 694 */ int (*tcl_UtfToNormalizedDString) (Tcl_Interp *interp, const char *bytes, Tcl_Size length, Tcl_UnicodeNormalizationForm normForm, int profile, Tcl_DString *dsPtr); /* 695 */ int (*tcl_UtfToNormalized) (Tcl_Interp *interp, const char *bytes, Tcl_Size length, Tcl_UnicodeNormalizationForm normForm, int profile, char *bufPtr, Tcl_Size bufLen, Tcl_Size *lengthPtr); /* 696 */ void (*tclUnusedStubEntry) (void); /* 697 */ } TclStubs; extern const TclStubs *tclStubsPtr; #ifdef __cplusplus } #endif |
︙ | ︙ | |||
3938 3939 3940 3941 3942 3943 3944 3945 | (tclStubsPtr->tcl_ListObjReverse) /* 692 */ #define Tcl_ListObjRepeat \ (tclStubsPtr->tcl_ListObjRepeat) /* 693 */ #define Tcl_ListObjRange \ (tclStubsPtr->tcl_ListObjRange) /* 694 */ #define Tcl_UtfToNormalizedDString \ (tclStubsPtr->tcl_UtfToNormalizedDString) /* 695 */ #define TclUnusedStubEntry \ | > > | | 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 | (tclStubsPtr->tcl_ListObjReverse) /* 692 */ #define Tcl_ListObjRepeat \ (tclStubsPtr->tcl_ListObjRepeat) /* 693 */ #define Tcl_ListObjRange \ (tclStubsPtr->tcl_ListObjRange) /* 694 */ #define Tcl_UtfToNormalizedDString \ (tclStubsPtr->tcl_UtfToNormalizedDString) /* 695 */ #define Tcl_UtfToNormalized \ (tclStubsPtr->tcl_UtfToNormalized) /* 696 */ #define TclUnusedStubEntry \ (tclStubsPtr->tclUnusedStubEntry) /* 697 */ #endif /* defined(USE_TCL_STUBS) */ /* !END!: Do not edit above this line. */ #undef TclUnusedStubEntry |
︙ | ︙ |
Changes to generic/tclEncoding.c.
︙ | ︙ | |||
4633 4634 4635 4636 4637 4638 4639 | objPtr = Tcl_NewListObj(n, NULL); for (i = 0; i < n; ++i) { Tcl_ListObjAppendElement(interp, objPtr, Tcl_NewStringObj(encodingProfiles[i].name, TCL_INDEX_NONE)); } Tcl_SetObjResult(interp, objPtr); } | | < < < < < < < < < < | | | | > | < < > < < > | | | > | > | | | < | > | < < < | > > | | > < | < < | < | > | | | | < < < < < < | | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | | 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 | objPtr = Tcl_NewListObj(n, NULL); for (i = 0; i < n; ++i) { Tcl_ListObjAppendElement(interp, objPtr, Tcl_NewStringObj(encodingProfiles[i].name, TCL_INDEX_NONE)); } Tcl_SetObjResult(interp, objPtr); } static utf8proc_ssize_t TclUtfNormalize( Tcl_Interp *interp, /* Error messages. May be NULL */ const char *bytes, /* Operand encoded in Tcl internal UTF8 */ Tcl_Size numBytes, /* Length bytes[], or -1 if NUL terminated */ Tcl_Encoding encoding, /* Encoding - must be UTF-8. Caller passed for reuse */ Tcl_UnicodeNormalizationForm normForm, /* TCL_{NFC,NFD,NFKC,NFKC} */ int profile, /* TCL_ENCODING_PROFILE_{STRICT,REPLACE} */ utf8proc_uint8_t **bufPtrPtr) /* On success, output length excluding nul */ { if (profile != TCL_ENCODING_PROFILE_REPLACE && profile != TCL_ENCODING_PROFILE_STRICT) { if (interp) { Tcl_SetObjResult(interp, Tcl_ObjPrintf("Invalid value %d passed for encoding profile.", profile)); } return -1; } utf8proc_option_t options = UTF8PROC_STABLE; switch (normForm) { case TCL_NFC: options |= UTF8PROC_COMPOSE; break; case TCL_NFD: options |= UTF8PROC_DECOMPOSE; break; case TCL_NFKC: options |= UTF8PROC_COMPOSE | UTF8PROC_COMPAT; break; case TCL_NFKD: options |= UTF8PROC_DECOMPOSE | UTF8PROC_COMPAT; break; default: if (interp) { Tcl_SetObjResult(interp, Tcl_ObjPrintf("Invalid value %d passed for normalization form.", normForm)); } return -1; } if (numBytes < 0) { numBytes = -1; } int result; Tcl_DString dsExt; result = Tcl_UtfToExternalDStringEx(interp, encoding, bytes, numBytes, profile, &dsExt, NULL); /* !!! dsExt needs to be freed even in case of error returns */ utf8proc_ssize_t normLength = -1; if (result == TCL_OK) { normLength = utf8proc_map_custom((utf8proc_uint8_t *)Tcl_DStringValue(&dsExt), Tcl_DStringLength(&dsExt), bufPtrPtr, options, NULL, NULL); if (normLength < 0) { if (interp) { const char *errorMsg = utf8proc_errmsg(normLength); Tcl_SetObjResult(interp, Tcl_NewStringObj( errorMsg ? errorMsg : "Unicode normalization failed.", -1)); } } } Tcl_DStringFree(&dsExt); return normLength; } /* * Tcl_UtfToNormalizedDString -- * * Converts the passed string to a Unicode normalization form storing * it in dsPtr. * * Results: * A standard Tcl error code. * * Side effects: * The output string is stored in dsPtr, which is initialized. */ int Tcl_UtfToNormalizedDString( Tcl_Interp *interp, /* Used for error messages. May be NULL */ const char *bytes, /* Operand encoded in Tcl internal UTF8 */ Tcl_Size numBytes, /* Length of bytes[], or -1 if NUL terminated */ Tcl_UnicodeNormalizationForm normForm, /* TCL_{NFC,NFD,NFKC,NFKC} */ int profile, /* TCL_ENCODING_PROFILE_{STRICT,REPLACE} */ Tcl_DString *dsPtr) /* Converted output string in Tcl internal UTF8 encoding. Init'ed by function */ { Tcl_DStringInit(dsPtr); Tcl_Encoding encoding = Tcl_GetEncoding(interp, "utf-8"); if (encoding == NULL) { return TCL_ERROR; } utf8proc_uint8_t *normUtf8; utf8proc_ssize_t normLength; normLength = TclUtfNormalize(interp, bytes, numBytes, encoding, normForm, profile, &normUtf8); if (normLength >= 0) { assert(normUtf8); /* Convert standard UTF8 to internal UTF8 */ int result = Tcl_ExternalToUtfDStringEx(interp, encoding, (const char *)normUtf8, normLength, profile, dsPtr, NULL); if (result != TCL_OK) { normLength = -1; } free(normUtf8); /* NOT Tcl_Free! */ } Tcl_FreeEncoding(encoding); return normLength >= 0 ? TCL_OK : TCL_ERROR; } /* * Tcl_UtfToNormalized -- * * Converts the passed string to a Unicode normalization form storing * it in the caller provided buffer. * * Results: * A standard Tcl error code. * * Side effects: * The output string is stored in bufPtr. */ int Tcl_UtfToNormalized( Tcl_Interp *interp, /* Used for error messages. May be NULL */ const char *bytes, /* Operand encoded in Tcl internal UTF8 */ Tcl_Size numBytes, /* Length of bytes[], or -1 if NUL terminated */ Tcl_UnicodeNormalizationForm normForm, /* TCL_{NFC,NFD,NFKC,NFKC} */ int profile, /* TCL_ENCODING_PROFILE_{STRICT,REPLACE} */ char *bufPtr, /* Pointer to output buffer. Must not be NULL. */ Tcl_Size bufLen, /* Size of bufPtr storage. */ Tcl_Size *lengthPtr) /* Length of the output string in bytes excluding the trailing NUL byte. May be NULL */ { Tcl_Encoding encoding = Tcl_GetEncoding(interp, "utf-8"); if (encoding == NULL) { return TCL_ERROR; } utf8proc_uint8_t *normUtf8; utf8proc_ssize_t normLength; normLength = TclUtfNormalize(interp, bytes, numBytes, encoding, normForm, profile, &normUtf8); Tcl_FreeEncoding(encoding); if (normLength < 0) { return TCL_ERROR; } assert(normUtf8); /* Convert standard UTF8 to internal UTF8 */ int result; utf8proc_uint8_t *from = normUtf8; utf8proc_uint8_t *fromEnd = from + normLength; char *to = (utf8proc_uint8_t *)bufPtr; char *toEnd = to + bufLen - 1; /* -1 for trailing NUL */ while (from < fromEnd && to < toEnd) { /* * The only difference between UTF-8 and internal UTF-8 is that * internal UTF-8 does not allow NUL bytes in the middle of the string. */ if (*from) { *to++ = *from++; } else { *to++ = 0xC0; /* NUL byte in internal UTF-8 is encoded as C080 */ if (to == toEnd) { break; } *to++ = 0x80; from++; } } if (from < fromEnd) { Tcl_SetObjResult(interp, Tcl_NewStringObj("Output buffer too small", -1)); result = TCL_CONVERT_NOSPACE; } else { assert(to < (bufPtr + bufLen - 1)); *to = '\0'; /* NUL terminate the output */ if (lengthPtr) { *lengthPtr = to-bufPtr; } result = TCL_OK; } free(normUtf8); /* NOT Tcl_Free! */ return result; } /* * Local Variables: * mode: c * c-basic-offset: 4 * fill-column: 78 * End: */ |
Changes to generic/tclStubInit.c.
︙ | ︙ | |||
1498 1499 1500 1501 1502 1503 1504 | Tcl_SetWideUIntObj, /* 689 */ Tcl_IsEmpty, /* 690 */ Tcl_GetEncodingNameForUser, /* 691 */ Tcl_ListObjReverse, /* 692 */ Tcl_ListObjRepeat, /* 693 */ Tcl_ListObjRange, /* 694 */ Tcl_UtfToNormalizedDString, /* 695 */ | > | | 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 | Tcl_SetWideUIntObj, /* 689 */ Tcl_IsEmpty, /* 690 */ Tcl_GetEncodingNameForUser, /* 691 */ Tcl_ListObjReverse, /* 692 */ Tcl_ListObjRepeat, /* 693 */ Tcl_ListObjRange, /* 694 */ Tcl_UtfToNormalizedDString, /* 695 */ Tcl_UtfToNormalized, /* 696 */ TclUnusedStubEntry, /* 697 */ }; /* !END!: Do not edit above this line. */ |
Changes to generic/tclTest.c.
︙ | ︙ | |||
322 323 324 325 326 327 328 | static Tcl_FSOpenFileChannelProc SimpleOpenFileChannel; static Tcl_FSListVolumesProc SimpleListVolumes; static Tcl_FSPathInFilesystemProc SimplePathInFilesystem; static Tcl_Obj * SimpleRedirect(Tcl_Obj *pathPtr); static Tcl_FSMatchInDirectoryProc SimpleMatchInDirectory; static Tcl_ObjCmdProc TestUtfNextCmd; static Tcl_ObjCmdProc TestUtfPrevCmd; | > | | 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 | static Tcl_FSOpenFileChannelProc SimpleOpenFileChannel; static Tcl_FSListVolumesProc SimpleListVolumes; static Tcl_FSPathInFilesystemProc SimplePathInFilesystem; static Tcl_Obj * SimpleRedirect(Tcl_Obj *pathPtr); static Tcl_FSMatchInDirectoryProc SimpleMatchInDirectory; static Tcl_ObjCmdProc TestUtfNextCmd; static Tcl_ObjCmdProc TestUtfPrevCmd; static Tcl_ObjCmdProc TestUtfToNormalizedDStringCmd; static Tcl_ObjCmdProc TestUtfToNormalizedCmd; static Tcl_ObjCmdProc TestNumUtfCharsCmd; static Tcl_ObjCmdProc TestGetUniCharCmd; static Tcl_ObjCmdProc TestFindFirstCmd; static Tcl_ObjCmdProc TestFindLastCmd; static Tcl_ObjCmdProc TestHashSystemHashCmd; static Tcl_ObjCmdProc TestGetIntForIndexCmd; static Tcl_ObjCmdProc TestLutilCmd; |
︙ | ︙ | |||
734 735 736 737 738 739 740 | NULL, NULL); Tcl_CreateObjCommand(interp, "testinterpresolver", TestInterpResolverCmd, NULL, NULL); Tcl_CreateObjCommand(interp, "testapplylambda", TestApplyLambdaCmd, NULL, NULL); Tcl_CreateObjCommand(interp, "testlutil", TestLutilCmd, NULL, NULL); | | | > > | 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 | NULL, NULL); Tcl_CreateObjCommand(interp, "testinterpresolver", TestInterpResolverCmd, NULL, NULL); Tcl_CreateObjCommand(interp, "testapplylambda", TestApplyLambdaCmd, NULL, NULL); Tcl_CreateObjCommand(interp, "testlutil", TestLutilCmd, NULL, NULL); Tcl_CreateObjCommand(interp, "testutftonormalized", TestUtfToNormalizedCmd, NULL, NULL); Tcl_CreateObjCommand(interp, "testutftonormalizeddstring", TestUtfToNormalizedDStringCmd, NULL, NULL); #if defined(_WIN32) Tcl_CreateObjCommand(interp, "testhandlecount", TestHandleCountCmd, NULL, NULL); Tcl_CreateObjCommand(interp, "testappverifierpresent", TestAppVerifierPresentCmd, NULL, NULL); #endif |
︙ | ︙ | |||
9026 9027 9028 9029 9030 9031 9032 | if (l2Obj) { Tcl_DecrRefCount(l2Obj); } return ret; } /* | | | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | | | | | | 9029 9030 9031 9032 9033 9034 9035 9036 9037 9038 9039 9040 9041 9042 9043 9044 9045 9046 9047 9048 9049 9050 9051 9052 9053 9054 9055 9056 9057 9058 9059 9060 9061 9062 9063 9064 9065 9066 9067 9068 9069 9070 9071 9072 9073 9074 9075 9076 9077 9078 9079 9080 9081 9082 9083 9084 9085 9086 9087 9088 9089 9090 9091 9092 9093 9094 9095 9096 9097 9098 9099 9100 9101 9102 9103 9104 9105 9106 9107 9108 9109 9110 9111 9112 9113 9114 9115 9116 9117 9118 9119 9120 9121 9122 9123 9124 9125 9126 9127 9128 9129 9130 9131 9132 9133 9134 9135 9136 9137 9138 9139 9140 9141 9142 9143 9144 9145 | if (l2Obj) { Tcl_DecrRefCount(l2Obj); } return ret; } /* * TestUtfToNormalizedCmd -- * * This procedure implements the "testutftonormalized" command which * provides a raw interface to the Tcl_UtfToNormalized API. * objv[1] - input byte array encoded in Tcl internal UTF-8. Use * teststringbytes to construct. * objv[2] - normForm value to pass to Tcl_UtfToNormalized * objv[3] - profile value to pass to Tcl_UtfToNormalized * objv[4] - buffer length to pass to Tcl_UtfToNormalized. * * Results: * A standard Tcl result. * * Side effects: * The interpreter result is set to the raw bytes output of the * Tcl_UtfToNormalized call. * *---------------------------------------------------------------------- */ static int TestUtfToNormalizedCmd( TCL_UNUSED(void *), Tcl_Interp *interp, /* Current interpreter. */ int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Arguments. */ { if (objc != 5 && objc != 6) { Tcl_WrongNumArgs(interp, 1, objv, "BYTES NORMALFORM PROFILE ?LENGTH? BUFLENGTH"); return TCL_ERROR; } Tcl_Size bufLen, len, slen; unsigned char *s = Tcl_GetBytesFromObj(interp, objv[1], &slen); if (s == NULL) { return TCL_ERROR; } int normForm, profile; if (Tcl_GetIntFromObj(interp, objv[2], &normForm) != TCL_OK || Tcl_GetIntFromObj(interp, objv[3], &profile) != TCL_OK) { return TCL_ERROR; } if (Tcl_GetSizeIntFromObj(interp, objv[objc-1], &bufLen) != TCL_OK) { return TCL_ERROR; } if (objc == 5) { len = slen; } else { if (Tcl_GetSizeIntFromObj(interp, objv[4], &len) != TCL_OK) { return TCL_ERROR; } if (len > slen) { Tcl_SetObjResult(interp, Tcl_ObjPrintf( "Passed length %" TCL_SIZE_MODIFIER "d is greater than string length %" TCL_SIZE_MODIFIER "d.", len, slen)); return TCL_ERROR; } } int result; char buffer[20] = {0x80}; char *bufPtr; Tcl_Size bufStored = 0; if (bufLen > sizeof(buffer)) { bufPtr = (char *)Tcl_Alloc(bufLen); } else { bufPtr = buffer; } result = Tcl_UtfToNormalized(interp, (char *) s, len, (Tcl_UnicodeNormalizationForm)normForm, profile, bufPtr, bufLen, &bufStored); if (result == TCL_OK) { /* Return as raw bytes, not string */ Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(bufPtr, bufStored)); } if (bufPtr != buffer) { Tcl_Free(bufPtr); } return result; } /* * TestUtfToNormalizedDStringCmd -- * * This procedure implements the "testutftonormalizedstring" command which * provides a raw interface to the Tcl_UtfToNormalizedDString API. * objv[1] - input byte array encoded in Tcl internal UTF-8. Use * teststringbytes to construct. * objv[2] - normForm value to pass to Tcl_UtfToNormalizedDString * objv[3] - profile value to pass to Tcl_UtfToNormalizedDString * objv[4] - (optional) length to pass to Tcl_UtfToNormalizedDString. If * not present, length of objv[1] is used. * * Results: * A standard Tcl result. * * Side effects: * The interpreter result is set to the raw bytes output of the * Tcl_UtfToNormalizedDString call. * *---------------------------------------------------------------------- */ static int TestUtfToNormalizedDStringCmd( TCL_UNUSED(void *), Tcl_Interp *interp, /* Current interpreter. */ int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Arguments. */ { if (objc != 4 && objc != 5) { Tcl_WrongNumArgs(interp, 1, objv, "BYTES NORMALFORM PROFILE ?LENGTH?"); |
︙ | ︙ | |||
9069 9070 9071 9072 9073 9074 9075 | if (Tcl_GetIntFromObj(interp, objv[2], &normForm) != TCL_OK || Tcl_GetIntFromObj(interp, objv[3], &profile) != TCL_OK) { return TCL_ERROR; } if (objc == 4) { len = slen; } else { | | < > | | < < | | | > | > | | 9153 9154 9155 9156 9157 9158 9159 9160 9161 9162 9163 9164 9165 9166 9167 9168 9169 9170 9171 9172 9173 9174 9175 9176 9177 9178 9179 9180 9181 9182 9183 9184 9185 9186 9187 9188 9189 9190 | if (Tcl_GetIntFromObj(interp, objv[2], &normForm) != TCL_OK || Tcl_GetIntFromObj(interp, objv[3], &profile) != TCL_OK) { return TCL_ERROR; } if (objc == 4) { len = slen; } else { if (Tcl_GetSizeIntFromObj(interp, objv[5], &len) != TCL_OK) { return TCL_ERROR; } if (len > slen) { Tcl_SetObjResult(interp, Tcl_ObjPrintf( "Passed length %" TCL_SIZE_MODIFIER "d is greater than string length %" TCL_SIZE_MODIFIER "d.", len, slen)); return TCL_ERROR; } } Tcl_DString ds; int result; result = Tcl_UtfToNormalizedDString(interp, (char *) s, len, (Tcl_UnicodeNormalizationForm)normForm, profile, &ds); if (result == TCL_OK) { /* Return as raw bytes, not string */ Tcl_SetObjResult(interp, Tcl_NewByteArrayObj((unsigned char *)Tcl_DStringValue(&ds), Tcl_DStringLength(&ds))); Tcl_DStringFree(&ds); } return result; } #ifdef _WIN32 /* *---------------------------------------------------------------------- * * TestHandleCountCmd -- |
︙ | ︙ |
Changes to tests/unicodeNormalize.test.
︙ | ︙ | |||
266 267 268 269 270 271 272 | lassign $testCase lineno chars nfc nfd nfkc nfkd set bytes [teststringbytes $chars] foreach profile {strict replace} { foreach normForm {nfc nfd nfkc nfkd} { test Tcl_UtfToNormalizedDString-$normForm-line-$lineno-$profile \ "Tcl_UtfToNormalizedDString for $normForm at line $lineno of $::tcltests::ucd::normalizationDataFile" \ -body { | > | > | | | | | 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 | lassign $testCase lineno chars nfc nfd nfkc nfkd set bytes [teststringbytes $chars] foreach profile {strict replace} { foreach normForm {nfc nfd nfkc nfkd} { test Tcl_UtfToNormalizedDString-$normForm-line-$lineno-$profile \ "Tcl_UtfToNormalizedDString for $normForm at line $lineno of $::tcltests::ucd::normalizationDataFile" \ -body { testutftonormalizeddstring $bytes $normEnums($normForm) \ $profileFlags($profile) } -result [teststringbytes [set $normForm]] } } } foreach normForm {nfc nfd nfkc nfkd} { test Tcl_UtfToNormalizedDString-$normForm-nulchar-$profile \ "Tcl_UtfToNormalizedDString for $normForm passed nul character" \ -body { testutftonormalizeddstring [teststringbytes \0] \ $normEnums($normForm) $profileFlags($profile) } -result \xC0\x80 } # Tcl_UtfToNormalizedDString error cases foreach normForm {nfc nfd nfkc nfkd} { test Tcl_UtfToNormalizedDString-$normForm-tcl8 \ "Tcl_UtfToNormalizedDString for $normForm profile tcl8" \ -body { testutftonormalizeddstring abc $normEnums($normForm) $profileFlags(tcl8) } -result {Invalid value 16777216 passed for encoding profile.} -returnCodes error if {0} { # TODO - currently, Tcl "fixes up" any internal invalid UTF-8 so # no way to test normalization of invalid UTF-8. Enable this test # once this "fixing up" by Tcl is corrected (see Bug [b69e00ecf6]) test Tcl_UtfToNormalizedDString-$normForm-invalid-utf8 \ "Tcl_UtfToNormalizedDString for $normForm invalid utf8 profile strict" \ -body { testutftonormalizeddstring [testbytestring [binary decode hex EFBF7F]] $normEnums($normForm) $profileFlags(strict) } -result {} -returnCodes error } } test Tcl_UtfToNormalizedDString-invalid-normalization-form \ "Tcl_UtfToNormalizedDString invalid value for normalization form" \ -body { testutftonormalizeddstring abc 4 $profileFlags(strict) } -result {Invalid value 4 passed for normalization form.} -returnCodes error } ::tcltest::cleanupTests namespace delete unicode::test return |