Tcl Source Code

Check-in [9858a68816]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix issue [8f7fdea2d], string-2.20.1 fails on big endian, and also fix issues in TclStringCmp when checkEq is 1.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core-8-6-branch
Files: files | file ages | folders
SHA3-256: 9858a68816f2053a9fbf94ecd6a6b8a36a8d69e661628a8353f9b589220d0a04
User & Date: pooryorick 2023-01-15 19:26:36
Original Comment: Fix issues in TclStringCmp when checkEq is 1, discovered with the help of rmax while diagnosing [8f7fdea2d].
References
2023-01-15
19:31 Ticket [8f7fdea2df] string-2.20.1 fails on big endian status still Open with 4 other changes artifact: 81332e4d39 user: pooryorick
Context
2023-01-22
01:33
Bug [e3dcab1d14] fix check-in: 783bacf316 user: kbk tags: core-8-6-branch
2023-01-19
21:36
Proposed fix for [3e8074aea7]: [interp limit time -seconds] has a y2k38 problem Closed-Leaf check-in: 504171c4e4 user: jan.nijtmans tags: bug-3e8074aea7
2023-01-18
16:54
Merge 8.6 check-in: 6dac80a981 user: jan.nijtmans tags: core-8-branch
2023-01-15
19:26
Fix issue [8f7fdea2d], string-2.20.1 fails on big endian, and also fix issues in TclStringCmp when ... check-in: 9858a68816 user: pooryorick tags: core-8-6-branch
2023-01-14
22:07
Properly quote contents of Make variables to pass through gdb.run file. check-in: 075c79a718 user: pooryorick tags: core-8-6-branch
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to generic/tclCmdMZ.c.

2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639

    /*
     * From now on, we only access the two objects at the end of the argument
     * array.
     */

    objv += objc-2;
    match = TclStringCmp(objv[0], objv[1], 0, nocase, reqlength);
    Tcl_SetObjResult(interp, Tcl_NewBooleanObj(match ? 0 : 1));
    return TCL_OK;
}

/*
 *----------------------------------------------------------------------
 *







|







2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639

    /*
     * From now on, we only access the two objects at the end of the argument
     * array.
     */

    objv += objc-2;
    match = TclStringCmp(objv[0], objv[1], 1, nocase, reqlength);
    Tcl_SetObjResult(interp, Tcl_NewBooleanObj(match ? 0 : 1));
    return TCL_OK;
}

/*
 *----------------------------------------------------------------------
 *
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713

int
TclStringCmp(
    Tcl_Obj *value1Ptr,
    Tcl_Obj *value2Ptr,
    int checkEq,		/* comparison is only for equality */
    int nocase,			/* comparison is not case sensitive */
    int reqlength)		/* requested length; -1 to compare whole
				 * strings */
{
    const char *s1, *s2;
    int empty, length, match, s1len, s2len;
    memCmpFn_t memCmpFn;

    if ((reqlength == 0) || (value1Ptr == value2Ptr)) {
	/*







|
|







2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713

int
TclStringCmp(
    Tcl_Obj *value1Ptr,
    Tcl_Obj *value2Ptr,
    int checkEq,		/* comparison is only for equality */
    int nocase,			/* comparison is not case sensitive */
    int reqlength)		/* requested length in characters; -1 to
				 * compare whole strings */
{
    const char *s1, *s2;
    int empty, length, match, s1len, s2len;
    memCmpFn_t memCmpFn;

    if ((reqlength == 0) || (value1Ptr == value2Ptr)) {
	/*
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750



2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766



2767
2768
2769
2770
2771
2772
2773

	s1 = (char *) Tcl_GetByteArrayFromObj(value1Ptr, &s1len);
	s2 = (char *) Tcl_GetByteArrayFromObj(value2Ptr, &s2len);
	memCmpFn = memcmp;
    } else if ((value1Ptr->typePtr == &tclStringType)
	    && (value2Ptr->typePtr == &tclStringType)) {
	/*
	 * Do a unicode-specific comparison if both of the args are of String
	 * type. If the char length == byte length, we can do a memcmp. In
	 * benchmark testing this proved the most efficient check between the
	 * unicode and string comparison operations.
	 */

	if (nocase) {
	    s1 = (char *) Tcl_GetUnicodeFromObj(value1Ptr, &s1len);
	    s2 = (char *) Tcl_GetUnicodeFromObj(value2Ptr, &s2len);
	    memCmpFn = (memCmpFn_t)Tcl_UniCharNcasecmp;
	} else {
	    s1len = Tcl_GetCharLength(value1Ptr);
	    s2len = Tcl_GetCharLength(value2Ptr);
	    if ((s1len == value1Ptr->length)
		    && (value1Ptr->bytes != NULL)
		    && (s2len == value2Ptr->length)
		    && (value2Ptr->bytes != NULL)) {



		s1 = value1Ptr->bytes;
		s2 = value2Ptr->bytes;
		memCmpFn = memcmp;
	    } else {
		s1 = (char *) Tcl_GetUnicode(value1Ptr);
		s2 = (char *) Tcl_GetUnicode(value2Ptr);
		if (
#if defined(WORDS_BIGENDIAN) && (TCL_UTF_MAX != 4)
			1
#else
			checkEq
#endif /* WORDS_BIGENDIAN */
		        ) {
		    memCmpFn = memcmp;
		    s1len *= sizeof(Tcl_UniChar);
		    s2len *= sizeof(Tcl_UniChar);



		} else {
		    memCmpFn = (memCmpFn_t) Tcl_UniCharNcmp;
		}
	    }
	}
    } else {
	/*







|


|













>
>
>








|

|

|



>
>
>







2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779

	s1 = (char *) Tcl_GetByteArrayFromObj(value1Ptr, &s1len);
	s2 = (char *) Tcl_GetByteArrayFromObj(value2Ptr, &s2len);
	memCmpFn = memcmp;
    } else if ((value1Ptr->typePtr == &tclStringType)
	    && (value2Ptr->typePtr == &tclStringType)) {
	/*
	 * Do a Unicode-specific comparison if both of the args are of String
	 * type. If the char length == byte length, we can do a memcmp. In
	 * benchmark testing this proved the most efficient check between the
	 * Unicode and string comparison operations.
	 */

	if (nocase) {
	    s1 = (char *) Tcl_GetUnicodeFromObj(value1Ptr, &s1len);
	    s2 = (char *) Tcl_GetUnicodeFromObj(value2Ptr, &s2len);
	    memCmpFn = (memCmpFn_t)Tcl_UniCharNcasecmp;
	} else {
	    s1len = Tcl_GetCharLength(value1Ptr);
	    s2len = Tcl_GetCharLength(value2Ptr);
	    if ((s1len == value1Ptr->length)
		    && (value1Ptr->bytes != NULL)
		    && (s2len == value2Ptr->length)
		    && (value2Ptr->bytes != NULL)) {
		/* each byte represents one character so s1l3n, s2l3n, and
		 * reqlength are in both bytes and characters
		 */
		s1 = value1Ptr->bytes;
		s2 = value2Ptr->bytes;
		memCmpFn = memcmp;
	    } else {
		s1 = (char *) Tcl_GetUnicode(value1Ptr);
		s2 = (char *) Tcl_GetUnicode(value2Ptr);
		if (
#if defined(WORDS_BIGENDIAN) && (TCL_UTF_MAX != 4)
		    1
#else
		    checkEq
#endif /* WORDS_BIGENDIAN */
		) {
		    memCmpFn = memcmp;
		    s1len *= sizeof(Tcl_UniChar);
		    s2len *= sizeof(Tcl_UniChar);
		    if (reqlength > 0) {
			reqlength *= sizeof(Tcl_UniChar);
		    }
		} else {
		    memCmpFn = (memCmpFn_t) Tcl_UniCharNcmp;
		}
	    }
	}
    } else {
	/*
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832





2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
		return 0;
	    }
	} else {
	    s1 = TclGetStringFromObj(value1Ptr, &s1len);
	    s2 = TclGetStringFromObj(value2Ptr, &s2len);
	}

	if (!nocase && checkEq) {
	    /*
	     * When we have equal-length we can check only for (in)equality.
	     * We can use memcmp() in all (n)eq cases because we don't need to
	     * worry about lexical LE/BE variance.
	     */
	    memCmpFn = memcmp;
	} else {
	    /*
	     * As a catch-all we will work with UTF-8. We cannot use memcmp()
	     * as that is unsafe with any string containing NUL (\xC0\x80 in
	     * Tcl's utf rep). We can use the more efficient TclpUtfNcmp2 if
	     * we are case-sensitive and no specific length was requested.
	     */

	    if ((reqlength < 0) && !nocase) {
		memCmpFn = (memCmpFn_t) TclpUtfNcmp2;
	    } else {
		s1len = Tcl_NumUtfChars(s1, s1len);
		s2len = Tcl_NumUtfChars(s2, s2len);
		memCmpFn = (memCmpFn_t)
			(nocase ? Tcl_UtfNcasecmp : Tcl_UtfNcmp);
	    }
	}
    }






    length = (s1len < s2len) ? s1len : s2len;
    if (reqlength > 0 && reqlength < length) {
	length = reqlength;
    } else if (reqlength < 0) {
	/*
	 * The requested length is negative, so we ignore it by setting it to
	 * length + 1 so we correct the match var.
	 */

	reqlength = length + 1;
    }

    if (checkEq && (s1len != s2len)) {
	match = 1;		/* This will be reversed below. */
    } else {
	/*
	 * The comparison function should compare up to the minimum byte
	 * length only.
	 */
	match = memCmpFn(s1, s2, length);







|




















|



>
>
>
>
>






|
|

<



|







2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852

2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
		return 0;
	    }
	} else {
	    s1 = TclGetStringFromObj(value1Ptr, &s1len);
	    s2 = TclGetStringFromObj(value2Ptr, &s2len);
	}

	if (!nocase && checkEq && reqlength < 0) {
	    /*
	     * When we have equal-length we can check only for (in)equality.
	     * We can use memcmp() in all (n)eq cases because we don't need to
	     * worry about lexical LE/BE variance.
	     */
	    memCmpFn = memcmp;
	} else {
	    /*
	     * As a catch-all we will work with UTF-8. We cannot use memcmp()
	     * as that is unsafe with any string containing NUL (\xC0\x80 in
	     * Tcl's utf rep). We can use the more efficient TclpUtfNcmp2 if
	     * we are case-sensitive and no specific length was requested.
	     */

	    if ((reqlength < 0) && !nocase) {
		memCmpFn = (memCmpFn_t) TclpUtfNcmp2;
	    } else {
		s1len = Tcl_NumUtfChars(s1, s1len);
		s2len = Tcl_NumUtfChars(s2, s2len);
		memCmpFn = (memCmpFn_t)
		    (nocase ? Tcl_UtfNcasecmp : Tcl_UtfNcmp);
	    }
	}
    }

    /* At this point s1len, s2len, and reqlength should by now have been
     * adjusted so that they are all in the units expected by the selected
     * comparison function.
     */

    length = (s1len < s2len) ? s1len : s2len;
    if (reqlength > 0 && reqlength < length) {
	length = reqlength;
    } else if (reqlength < 0) {
	/*
	 * The requested length is negative, so ignore it by setting it to
	 * length + 1 to correct the match var.
	 */

	reqlength = length + 1;
    }

    if (checkEq && reqlength < 0 && (s1len != s2len)) {
	match = 1;		/* This will be reversed below. */
    } else {
	/*
	 * The comparison function should compare up to the minimum byte
	 * length only.
	 */
	match = memCmpFn(s1, s2, length);

Changes to tests/stringComp.test.

96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
    {unicode} {
	string compare ab\u7266 ab\u7267
    } {-1} {}
    {unicode} {string compare \334 \u00dc} 0 {}
    {unicode} {string compare \334 \u00fc} -1 {}
    {unicode} {string compare \334\334\334\374\374 \334\334\334\334\334} 1 {}
    {high bit} {
	# This test will fail if the underlying comparison
	# is using signed chars instead of unsigned chars.
	# (like SunOS's default memcmp thus the compat/memcmp.c)
	string compare "\x80" "@"
	# Nb this tests works also in utf-8 space because \x80 is
	# translated into a 2 or more bytelength but whose first byte has
	# the high bit set.
    } {1} {}







|







96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
    {unicode} {
	string compare ab\u7266 ab\u7267
    } {-1} {}
    {unicode} {string compare \334 \u00dc} 0 {}
    {unicode} {string compare \334 \u00fc} -1 {}
    {unicode} {string compare \334\334\334\374\374 \334\334\334\334\334} 1 {}
    {high bit} {
	# This test fails if the underlying comparison
	# is using signed chars instead of unsigned chars.
	# (like SunOS's default memcmp thus the compat/memcmp.c)
	string compare "\x80" "@"
	# Nb this tests works also in utf-8 space because \x80 is
	# translated into a 2 or more bytelength but whose first byte has
	# the high bit set.
    } {1} {}
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
    } 0 {}
    {-nocase null strings} {
	string compare -nocase "" foo
    } -1 {}
    {-nocase null strings} {
	string compare -nocase foo ""
    } 1 {}
    {with length, unequal strings} {
	string compare -length 2 abc abde
    } 0 {}
    {with length, unequal strings} {
	string compare -length 2 ab abde
    } 0 {}
    {with NUL character vs. other ASCII} {
	# Be careful here, since UTF-8 rep comparison with memcmp() of
	# these puts chars in the wrong order
	string compare \x00 \x01
    } -1 {}







|


|







152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
    } 0 {}
    {-nocase null strings} {
	string compare -nocase "" foo
    } -1 {}
    {-nocase null strings} {
	string compare -nocase foo ""
    } 1 {}
    {with length, unequal strings, partial first string} {
	string compare -length 2 abc abde
    } 0 {}
    {with length, unequal strings 2, full first string} {
	string compare -length 2 ab abde
    } 0 {}
    {with NUL character vs. other ASCII} {
	# Be careful here, since UTF-8 rep comparison with memcmp() of
	# these puts chars in the wrong order
	string compare \x00 \x01
    } -1 {}