Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Fix issue [8f7fdea2d], string-2.20.1 fails on big endian, and also fix issues in TclStringCmp when checkEq is 1. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | core-8-6-branch |
Files: | files | file ages | folders |
SHA3-256: |
9858a68816f2053a9fbf94ecd6a6b8a3 |
User & Date: | pooryorick 2023-01-15 19:26:36.697 |
Original Comment: | Fix issues in TclStringCmp when checkEq is 1, discovered with the help of rmax while diagnosing [8f7fdea2d]. |
References
2023-01-15
| ||
19:31 | • Ticket [8f7fdea2df] string-2.20.1 fails on big endian status still Open with 4 other changes artifact: 81332e4d39 user: pooryorick | |
Context
2023-01-22
| ||
01:33 | Bug [e3dcab1d14] fix check-in: 783bacf316 user: kbk tags: core-8-6-branch | |
2023-01-19
| ||
21:36 | Proposed fix for [3e8074aea7]: [interp limit time -seconds] has a y2k38 problem Closed-Leaf check-in: 504171c4e4 user: jan.nijtmans tags: bug-3e8074aea7 | |
2023-01-18
| ||
16:54 | Merge 8.6 check-in: 6dac80a981 user: jan.nijtmans tags: core-8-branch | |
2023-01-15
| ||
19:26 | Fix issue [8f7fdea2d], string-2.20.1 fails on big endian, and also fix issues in TclStringCmp when ... check-in: 9858a68816 user: pooryorick tags: core-8-6-branch | |
2023-01-14
| ||
22:07 | Properly quote contents of Make variables to pass through gdb.run file. check-in: 075c79a718 user: pooryorick tags: core-8-6-branch | |
Changes
Changes to generic/tclCmdMZ.c.
︙ | ︙ | |||
2625 2626 2627 2628 2629 2630 2631 | /* * From now on, we only access the two objects at the end of the argument * array. */ objv += objc-2; | | | 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 | /* * From now on, we only access the two objects at the end of the argument * array. */ objv += objc-2; match = TclStringCmp(objv[0], objv[1], 1, nocase, reqlength); Tcl_SetObjResult(interp, Tcl_NewBooleanObj(match ? 0 : 1)); return TCL_OK; } /* *---------------------------------------------------------------------- * |
︙ | ︙ | |||
2698 2699 2700 2701 2702 2703 2704 | int TclStringCmp( Tcl_Obj *value1Ptr, Tcl_Obj *value2Ptr, int checkEq, /* comparison is only for equality */ int nocase, /* comparison is not case sensitive */ | | | | 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 | int TclStringCmp( Tcl_Obj *value1Ptr, Tcl_Obj *value2Ptr, int checkEq, /* comparison is only for equality */ int nocase, /* comparison is not case sensitive */ int reqlength) /* requested length in characters; -1 to * compare whole strings */ { const char *s1, *s2; int empty, length, match, s1len, s2len; memCmpFn_t memCmpFn; if ((reqlength == 0) || (value1Ptr == value2Ptr)) { /* |
︙ | ︙ | |||
2727 2728 2729 2730 2731 2732 2733 | s1 = (char *) Tcl_GetByteArrayFromObj(value1Ptr, &s1len); s2 = (char *) Tcl_GetByteArrayFromObj(value2Ptr, &s2len); memCmpFn = memcmp; } else if ((value1Ptr->typePtr == &tclStringType) && (value2Ptr->typePtr == &tclStringType)) { /* | | | > > > | | | > > > | 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 | s1 = (char *) Tcl_GetByteArrayFromObj(value1Ptr, &s1len); s2 = (char *) Tcl_GetByteArrayFromObj(value2Ptr, &s2len); memCmpFn = memcmp; } else if ((value1Ptr->typePtr == &tclStringType) && (value2Ptr->typePtr == &tclStringType)) { /* * Do a Unicode-specific comparison if both of the args are of String * type. If the char length == byte length, we can do a memcmp. In * benchmark testing this proved the most efficient check between the * Unicode and string comparison operations. */ if (nocase) { s1 = (char *) Tcl_GetUnicodeFromObj(value1Ptr, &s1len); s2 = (char *) Tcl_GetUnicodeFromObj(value2Ptr, &s2len); memCmpFn = (memCmpFn_t)Tcl_UniCharNcasecmp; } else { s1len = Tcl_GetCharLength(value1Ptr); s2len = Tcl_GetCharLength(value2Ptr); if ((s1len == value1Ptr->length) && (value1Ptr->bytes != NULL) && (s2len == value2Ptr->length) && (value2Ptr->bytes != NULL)) { /* each byte represents one character so s1l3n, s2l3n, and * reqlength are in both bytes and characters */ s1 = value1Ptr->bytes; s2 = value2Ptr->bytes; memCmpFn = memcmp; } else { s1 = (char *) Tcl_GetUnicode(value1Ptr); s2 = (char *) Tcl_GetUnicode(value2Ptr); if ( #if defined(WORDS_BIGENDIAN) && (TCL_UTF_MAX != 4) 1 #else checkEq #endif /* WORDS_BIGENDIAN */ ) { memCmpFn = memcmp; s1len *= sizeof(Tcl_UniChar); s2len *= sizeof(Tcl_UniChar); if (reqlength > 0) { reqlength *= sizeof(Tcl_UniChar); } } else { memCmpFn = (memCmpFn_t) Tcl_UniCharNcmp; } } } } else { /* |
︙ | ︙ | |||
2801 2802 2803 2804 2805 2806 2807 | return 0; } } else { s1 = TclGetStringFromObj(value1Ptr, &s1len); s2 = TclGetStringFromObj(value2Ptr, &s2len); } | | | > > > > > | | < | | 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 | return 0; } } else { s1 = TclGetStringFromObj(value1Ptr, &s1len); s2 = TclGetStringFromObj(value2Ptr, &s2len); } if (!nocase && checkEq && reqlength < 0) { /* * When we have equal-length we can check only for (in)equality. * We can use memcmp() in all (n)eq cases because we don't need to * worry about lexical LE/BE variance. */ memCmpFn = memcmp; } else { /* * As a catch-all we will work with UTF-8. We cannot use memcmp() * as that is unsafe with any string containing NUL (\xC0\x80 in * Tcl's utf rep). We can use the more efficient TclpUtfNcmp2 if * we are case-sensitive and no specific length was requested. */ if ((reqlength < 0) && !nocase) { memCmpFn = (memCmpFn_t) TclpUtfNcmp2; } else { s1len = Tcl_NumUtfChars(s1, s1len); s2len = Tcl_NumUtfChars(s2, s2len); memCmpFn = (memCmpFn_t) (nocase ? Tcl_UtfNcasecmp : Tcl_UtfNcmp); } } } /* At this point s1len, s2len, and reqlength should by now have been * adjusted so that they are all in the units expected by the selected * comparison function. */ length = (s1len < s2len) ? s1len : s2len; if (reqlength > 0 && reqlength < length) { length = reqlength; } else if (reqlength < 0) { /* * The requested length is negative, so ignore it by setting it to * length + 1 to correct the match var. */ reqlength = length + 1; } if (checkEq && reqlength < 0 && (s1len != s2len)) { match = 1; /* This will be reversed below. */ } else { /* * The comparison function should compare up to the minimum byte * length only. */ match = memCmpFn(s1, s2, length); |
︙ | ︙ |
Changes to tests/stringComp.test.
︙ | ︙ | |||
96 97 98 99 100 101 102 | {unicode} { string compare ab\u7266 ab\u7267 } {-1} {} {unicode} {string compare \334 \u00dc} 0 {} {unicode} {string compare \334 \u00fc} -1 {} {unicode} {string compare \334\334\334\374\374 \334\334\334\334\334} 1 {} {high bit} { | | | 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 | {unicode} { string compare ab\u7266 ab\u7267 } {-1} {} {unicode} {string compare \334 \u00dc} 0 {} {unicode} {string compare \334 \u00fc} -1 {} {unicode} {string compare \334\334\334\374\374 \334\334\334\334\334} 1 {} {high bit} { # This test fails if the underlying comparison # is using signed chars instead of unsigned chars. # (like SunOS's default memcmp thus the compat/memcmp.c) string compare "\x80" "@" # Nb this tests works also in utf-8 space because \x80 is # translated into a 2 or more bytelength but whose first byte has # the high bit set. } {1} {} |
︙ | ︙ | |||
152 153 154 155 156 157 158 | } 0 {} {-nocase null strings} { string compare -nocase "" foo } -1 {} {-nocase null strings} { string compare -nocase foo "" } 1 {} | | | | 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 | } 0 {} {-nocase null strings} { string compare -nocase "" foo } -1 {} {-nocase null strings} { string compare -nocase foo "" } 1 {} {with length, unequal strings, partial first string} { string compare -length 2 abc abde } 0 {} {with length, unequal strings 2, full first string} { string compare -length 2 ab abde } 0 {} {with NUL character vs. other ASCII} { # Be careful here, since UTF-8 rep comparison with memcmp() of # these puts chars in the wrong order string compare \x00 \x01 } -1 {} |
︙ | ︙ |