Tcl Source Code

Check-in [cb43f6d65a]
Login
Bounty program for improvements to Tcl and certain Tcl packages.

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:merge core-8-6-branch
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core-8-branch
Files: files | file ages | folders
SHA3-256: cb43f6d65ac8a1066dbcbb2a2dc2c3532a8dab80011b3ba529bbcb8505c1d33a
User & Date: pooryorick 2018-05-07 14:33:15
Context
2018-05-08
15:00
merge 8.6 check-in: 714db5afb8 user: dgp tags: core-8-branch
2018-05-07
14:33
merge core-8-6-branch check-in: cb43f6d65a user: pooryorick tags: core-8-branch
07:43
Deduplicate code in INST_STR_CMP, StringCmpCmd, and StringEqualCmd. check-in: 1841bf54d1 user: pooryorick tags: core-8-6-branch
07:40
Remove some tip389 restrictions in test-cases, which are no longer necessary. Eliminate gcc compile... check-in: 385fda311b user: jan.nijtmans tags: core-8-branch
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to generic/tclCmdMZ.c.

2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
{
    /*
     * Remember to keep code here in some sync with the byte-compiled versions
     * in tclExecute.c (INST_STR_EQ, INST_STR_NEQ and INST_STR_CMP as well as
     * the expr string comparison in INST_EQ/INST_NEQ/INST_LT/...).
     */

    const char *string1, *string2;
    int length1, length2, i, match, length, nocase = 0, reqlength = -1;
    typedef int (*strCmpFn_t)(const char *, const char *, unsigned int);
    strCmpFn_t strCmpFn;

    if (objc < 3 || objc > 6) {
    str_cmp_args:
	Tcl_WrongNumArgs(interp, 1, objv,
		"?-nocase? ?-length int? string1 string2");
	return TCL_ERROR;
    }

    for (i = 1; i < objc-2; i++) {
	string2 = TclGetStringFromObj(objv[i], &length2);
	if ((length2 > 1) && !strncmp(string2, "-nocase", (size_t)length2)) {
	    nocase = 1;
	} else if ((length2 > 1)
		&& !strncmp(string2, "-length", (size_t)length2)) {
	    if (i+1 >= objc-2) {
		goto str_cmp_args;
	    }
	    i++;
	    if (TclGetIntFromObj(interp, objv[i], &reqlength) != TCL_OK) {
		return TCL_ERROR;
	    }






|
|
<
<









|
|

|
|







2542
2543
2544
2545
2546
2547
2548
2549
2550


2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
{
    /*
     * Remember to keep code here in some sync with the byte-compiled versions
     * in tclExecute.c (INST_STR_EQ, INST_STR_NEQ and INST_STR_CMP as well as
     * the expr string comparison in INST_EQ/INST_NEQ/INST_LT/...).
     */

    const char *string2;
    int length, i, match, nocase = 0, reqlength = -1;



    if (objc < 3 || objc > 6) {
    str_cmp_args:
	Tcl_WrongNumArgs(interp, 1, objv,
		"?-nocase? ?-length int? string1 string2");
	return TCL_ERROR;
    }

    for (i = 1; i < objc-2; i++) {
	string2 = TclGetStringFromObj(objv[i], &length);
	if ((length > 1) && !strncmp(string2, "-nocase", (size_t)length)) {
	    nocase = 1;
	} else if ((length > 1)
		&& !strncmp(string2, "-length", (size_t)length)) {
	    if (i+1 >= objc-2) {
		goto str_cmp_args;
	    }
	    i++;
	    if (TclGetIntFromObj(interp, objv[i], &reqlength) != TCL_OK) {
		return TCL_ERROR;
	    }
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
    /*
     * From now on, we only access the two objects at the end of the argument
     * array.
     */

    objv += objc-2;

    if ((reqlength == 0) || (objv[0] == objv[1])) {
	/*
	 * Always match at 0 chars of if it is the same obj.
	 */

	Tcl_SetObjResult(interp, Tcl_NewBooleanObj(1));
	return TCL_OK;
    }

    if (!nocase && TclIsPureByteArray(objv[0]) &&
	    TclIsPureByteArray(objv[1])) {
	/*
	 * Use binary versions of comparisons since that won't cause undue
	 * type conversions and it is much faster. Only do this if we're
	 * case-sensitive (which is all that really makes sense with byte
	 * arrays anyway, and we have no memcasecmp() for some reason... :^)
	 */

	string1 = (char *) Tcl_GetByteArrayFromObj(objv[0], &length1);
	string2 = (char *) Tcl_GetByteArrayFromObj(objv[1], &length2);
	strCmpFn = (strCmpFn_t) memcmp;
    } else if ((objv[0]->typePtr == &tclStringType)
	    && (objv[1]->typePtr == &tclStringType)) {
	/*
	 * Do a unicode-specific comparison if both of the args are of String
	 * type. In benchmark testing this proved the most efficient check
	 * between the unicode and string comparison operations.
	 */

	string1 = (char *) Tcl_GetUnicodeFromObj(objv[0], &length1);
	string2 = (char *) Tcl_GetUnicodeFromObj(objv[1], &length2);
	strCmpFn = (strCmpFn_t)
		(nocase ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp);
    } else {
	/*
	 * As a catch-all we will work with UTF-8. We cannot use memcmp() as
	 * that is unsafe with any string containing NUL (\xC0\x80 in Tcl's
	 * utf rep). We can use the more efficient TclpUtfNcmp2 if we are
	 * case-sensitive and no specific length was requested.
	 */

	string1 = (char *) TclGetStringFromObj(objv[0], &length1);
	string2 = (char *) TclGetStringFromObj(objv[1], &length2);
	if ((reqlength < 0) && !nocase) {
	    strCmpFn = (strCmpFn_t) TclpUtfNcmp2;
	} else {
	    length1 = Tcl_NumUtfChars(string1, length1);
	    length2 = Tcl_NumUtfChars(string2, length2);
	    strCmpFn = (strCmpFn_t) (nocase ? Tcl_UtfNcasecmp : Tcl_UtfNcmp);
	}
    }

    if ((reqlength < 0) && (length1 != length2)) {
	match = 1;		/* This will be reversed below. */
    } else {
	length = (length1 < length2) ? length1 : length2;
	if (reqlength > 0 && reqlength < length) {
	    length = reqlength;
	} else if (reqlength < 0) {
	    /*
	     * The requested length is negative, so we ignore it by setting it
	     * to length + 1 so we correct the match var.
	     */

	    reqlength = length + 1;
	}

	match = strCmpFn(string1, string2, (unsigned) length);
	if ((match == 0) && (reqlength > length)) {
	    match = length1 - length2;
	}
    }

    Tcl_SetObjResult(interp, Tcl_NewBooleanObj(match ? 0 : 1));
    return TCL_OK;
}

/*
 *----------------------------------------------------------------------






<
<
<
<
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







2582
2583
2584
2585
2586
2587
2588




2589



































































2590
2591
2592
2593
2594
2595
2596
    /*
     * From now on, we only access the two objects at the end of the argument
     * array.
     */

    objv += objc-2;





    match = TclStringCmp (objv[0], objv[1], 0, nocase, reqlength);




































































    Tcl_SetObjResult(interp, Tcl_NewBooleanObj(match ? 0 : 1));
    return TCL_OK;
}

/*
 *----------------------------------------------------------------------
2692
2693
2694
2695
2696
2697
2698

2699


2700


2701





2702






2703






2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
{
    /*
     * Remember to keep code here in some sync with the byte-compiled versions
     * in tclExecute.c (INST_STR_EQ, INST_STR_NEQ and INST_STR_CMP as well as
     * the expr string comparison in INST_EQ/INST_NEQ/INST_LT/...).
     */


    const char *string1, *string2;


    int length1, length2, i, match, length, nocase = 0, reqlength = -1;


    typedef int (*strCmpFn_t)(const char *, const char *, unsigned int);





    strCmpFn_t strCmpFn;













    if (objc < 3 || objc > 6) {
    str_cmp_args:
	Tcl_WrongNumArgs(interp, 1, objv,
		"?-nocase? ?-length int? string1 string2");
	return TCL_ERROR;
    }

    for (i = 1; i < objc-2; i++) {
	string2 = TclGetStringFromObj(objv[i], &length2);
	if ((length2 > 1) && !strncmp(string2, "-nocase", (size_t)length2)) {
	    nocase = 1;
	} else if ((length2 > 1)
		&& !strncmp(string2, "-length", (size_t)length2)) {
	    if (i+1 >= objc-2) {
		goto str_cmp_args;
	    }
	    i++;
	    if (TclGetIntFromObj(interp, objv[i], &reqlength) != TCL_OK) {
		return TCL_ERROR;
	    }
	} else {
	    Tcl_SetObjResult(interp, Tcl_ObjPrintf(
		    "bad option \"%s\": must be -nocase or -length",
		    string2));
	    Tcl_SetErrorCode(interp, "TCL", "LOOKUP", "INDEX", "option",
		    string2, NULL);
	    return TCL_ERROR;
	}
    }

    /*
     * From now on, we only access the two objects at the end of the argument
     * array.
     */

    objv += objc-2;

    if ((reqlength == 0) || (objv[0] == objv[1])) {
	/*
	 * Always match at 0 chars of if it is the same obj.
	 */

	Tcl_SetObjResult(interp, Tcl_NewBooleanObj(0));
	return TCL_OK;
    }

    if (!nocase && TclIsPureByteArray(objv[0]) &&
	    TclIsPureByteArray(objv[1])) {
	/*
	 * Use binary versions of comparisons since that won't cause undue
	 * type conversions and it is much faster. Only do this if we're
	 * case-sensitive (which is all that really makes sense with byte
	 * arrays anyway, and we have no memcasecmp() for some reason... :^)
	 */

	string1 = (char *) Tcl_GetByteArrayFromObj(objv[0], &length1);
	string2 = (char *) Tcl_GetByteArrayFromObj(objv[1], &length2);
	strCmpFn = (strCmpFn_t) memcmp;
    } else if ((objv[0]->typePtr == &tclStringType)
	    && (objv[1]->typePtr == &tclStringType)) {
	/*
	 * Do a unicode-specific comparison if both of the args are of String
	 * type. In benchmark testing this proved the most efficient check
	 * between the unicode and string comparison operations.
	 */

	string1 = (char *) Tcl_GetUnicodeFromObj(objv[0], &length1);
	string2 = (char *) Tcl_GetUnicodeFromObj(objv[1], &length2);
	strCmpFn = (strCmpFn_t)
		(nocase ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp);
    } else {
	/*
	 * As a catch-all we will work with UTF-8. We cannot use memcmp() as
	 * that is unsafe with any string containing NUL (\xC0\x80 in Tcl's
	 * utf rep). We can use the more efficient TclpUtfNcmp2 if we are
	 * case-sensitive and no specific length was requested.
	 */

	string1 = (char *) TclGetStringFromObj(objv[0], &length1);
	string2 = (char *) TclGetStringFromObj(objv[1], &length2);
	if ((reqlength < 0) && !nocase) {
	    strCmpFn = (strCmpFn_t) TclpUtfNcmp2;
	} else {
	    length1 = Tcl_NumUtfChars(string1, length1);
	    length2 = Tcl_NumUtfChars(string2, length2);
	    strCmpFn = (strCmpFn_t) (nocase ? Tcl_UtfNcasecmp : Tcl_UtfNcmp);
	}
    }

    length = (length1 < length2) ? length1 : length2;
    if (reqlength > 0 && reqlength < length) {
	length = reqlength;
    } else if (reqlength < 0) {
	/*
	 * The requested length is negative, so we ignore it by setting it to
	 * length + 1 so we correct the match var.
	 */

	reqlength = length + 1;
    }

    match = strCmpFn(string1, string2, (unsigned) length);
    if ((match == 0) && (reqlength > length)) {
	match = length1 - length2;
    }

    Tcl_SetObjResult(interp,
	    Tcl_NewIntObj((match > 0) ? 1 : (match < 0) ? -1 : 0));
    return TCL_OK;
}

/*
 *----------------------------------------------------------------------
 *
 * StringCatCmd --






>
|
>
>
|
>
>
|
>
>
>
>
>
|
>
>
>
>
>
>
|
>
>
>
>
>
>








|
|
|
|
|




|





|

|



<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681















































































2682
2683
2684
2685
2686
2687
2688
{
    /*
     * Remember to keep code here in some sync with the byte-compiled versions
     * in tclExecute.c (INST_STR_EQ, INST_STR_NEQ and INST_STR_CMP as well as
     * the expr string comparison in INST_EQ/INST_NEQ/INST_LT/...).
     */

    int match, nocase, reqlength, status;

    if ((status = TclStringCmpOpts(interp, objc, objv, &nocase, &reqlength))
	!= TCL_OK) {

	return status;
    }

    objv += objc-2;
    match = TclStringCmp (objv[0], objv[1], 0, nocase, reqlength);
    Tcl_SetObjResult(interp, Tcl_NewIntObj(match));
    return TCL_OK;
}

int TclStringCmpOpts (
    Tcl_Interp *interp,		/* Current interpreter. */
    int objc,			/* Number of arguments. */
    Tcl_Obj *const objv[],	/* Argument objects. */
    int *nocase,
    int *reqlength
)
{
    int i, length;
    const char *string;

    *reqlength = -1;
    *nocase = 0;
    if (objc < 3 || objc > 6) {
    str_cmp_args:
	Tcl_WrongNumArgs(interp, 1, objv,
		"?-nocase? ?-length int? string1 string2");
	return TCL_ERROR;
    }

    for (i = 1; i < objc-2; i++) {
	string = TclGetStringFromObj(objv[i], &length);
	if ((length > 1) && !strncmp(string, "-nocase", (size_t)length)) {
	    *nocase = 1;
	} else if ((length > 1)
		&& !strncmp(string, "-length", (size_t)length)) {
	    if (i+1 >= objc-2) {
		goto str_cmp_args;
	    }
	    i++;
	    if (TclGetIntFromObj(interp, objv[i], reqlength) != TCL_OK) {
		return TCL_ERROR;
	    }
	} else {
	    Tcl_SetObjResult(interp, Tcl_ObjPrintf(
		    "bad option \"%s\": must be -nocase or -length",
		    string));
	    Tcl_SetErrorCode(interp, "TCL", "LOOKUP", "INDEX", "option",
		    string, NULL);
	    return TCL_ERROR;
	}
    }















































































    return TCL_OK;
}

/*
 *----------------------------------------------------------------------
 *
 * StringCatCmd --

Changes to generic/tclExecute.c.

5084
5085
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099
5100
5101
5102
5103
5104
5105
5106
5107
5108
5109
5110
5111
5112
5113
5114
5115
5116
5117
5118
5119
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
5138
5139
5140
5141
5142
5143
5144
5145
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173
5174
5175
5176
5177
5178
5179
    case INST_STR_EQ:
    case INST_STR_NEQ:		/* String (in)equality check */
    case INST_STR_CMP:		/* String compare. */
    stringCompare:
	value2Ptr = OBJ_AT_TOS;
	valuePtr = OBJ_UNDER_TOS;

	if (valuePtr == value2Ptr) {
	    match = 0;
	} else {
	    /*
	     * We only need to check (in)equality when we have equal length
	     * strings.  We can use memcmp in all (n)eq cases because we
	     * don't need to worry about lexical LE/BE variance.
	     */

	    typedef int (*memCmpFn_t)(const void*, const void*, size_t);
	    memCmpFn_t memCmpFn;
	    int checkEq = ((*pc == INST_EQ) || (*pc == INST_NEQ)
		    || (*pc == INST_STR_EQ) || (*pc == INST_STR_NEQ));

	    if (TclIsPureByteArray(valuePtr)
		    && TclIsPureByteArray(value2Ptr)) {
		s1 = (char *) Tcl_GetByteArrayFromObj(valuePtr, &s1len);
		s2 = (char *) Tcl_GetByteArrayFromObj(value2Ptr, &s2len);
		memCmpFn = memcmp;
	    } else if ((valuePtr->typePtr == &tclStringType)
		    && (value2Ptr->typePtr == &tclStringType)) {
		/*
		 * Do a unicode-specific comparison if both of the args are of
		 * String type. If the char length == byte length, we can do a
		 * memcmp. In benchmark testing this proved the most efficient
		 * check between the unicode and string comparison operations.
		 */

		s1len = Tcl_GetCharLength(valuePtr);
		s2len = Tcl_GetCharLength(value2Ptr);
		if ((s1len == valuePtr->length)
			&& (valuePtr->bytes != NULL)
			&& (s2len == value2Ptr->length)
			&& (value2Ptr->bytes != NULL)) {
		    s1 = valuePtr->bytes;
		    s2 = value2Ptr->bytes;
		    memCmpFn = memcmp;
		} else {
		    s1 = (char *) Tcl_GetUnicode(valuePtr);
		    s2 = (char *) Tcl_GetUnicode(value2Ptr);
		    if (
#ifdef WORDS_BIGENDIAN
			1
#else
			checkEq
#endif
			) {
			memCmpFn = memcmp;
			s1len *= sizeof(Tcl_UniChar);
			s2len *= sizeof(Tcl_UniChar);
		    } else {
			memCmpFn = (memCmpFn_t) Tcl_UniCharNcmp;
		    }
		}
	    } else {
		/*
		 * strcmp can't do a simple memcmp in order to handle the
		 * special Tcl \xC0\x80 null encoding for utf-8.
		 */

		s1 = TclGetStringFromObj(valuePtr, &s1len);
		s2 = TclGetStringFromObj(value2Ptr, &s2len);
		if (checkEq) {
		    memCmpFn = memcmp;
		} else {
		    memCmpFn = (memCmpFn_t) TclpUtfNcmp2;
		}
	    }

	    if (checkEq && (s1len != s2len)) {
		match = 1;
	    } else {
		/*
		 * The comparison function should compare up to the minimum
		 * byte length only.
		 */
		match = memCmpFn(s1, s2,
			(size_t) ((s1len < s2len) ? s1len : s2len));
		if (match == 0) {
		    match = s1len - s2len;
		}
	    }
	}

	/*
	 * Make sure only -1,0,1 is returned
	 * TODO: consider peephole opt.
	 */







<
<
<
<
<
<
<
<
|
<
<


<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
<
<
<
<
<
<
<
<
<
<
<







5084
5085
5086
5087
5088
5089
5090








5091


5092
5093

























































5094











5095
5096
5097
5098
5099
5100
5101
    case INST_STR_EQ:
    case INST_STR_NEQ:		/* String (in)equality check */
    case INST_STR_CMP:		/* String compare. */
    stringCompare:
	value2Ptr = OBJ_AT_TOS;
	valuePtr = OBJ_UNDER_TOS;









	{


	    int checkEq = ((*pc == INST_EQ) || (*pc == INST_NEQ)
		    || (*pc == INST_STR_EQ) || (*pc == INST_STR_NEQ));

























































	    match = TclStringCmp(valuePtr, value2Ptr, checkEq, 0, -1);











	}

	/*
	 * Make sure only -1,0,1 is returned
	 * TODO: consider peephole opt.
	 */

5734
5735
5736
5737
5738
5739
5740








5741
5742
5743
5744
5745
5746
5747
    case INST_GT:
    case INST_LE:
    case INST_GE: {
	int iResult = 0, compare = 0;

	value2Ptr = OBJ_AT_TOS;
	valuePtr = OBJ_UNDER_TOS;









	if (GetNumberFromObj(NULL, valuePtr, &ptr1, &type1) != TCL_OK
		|| GetNumberFromObj(NULL, value2Ptr, &ptr2, &type2) != TCL_OK) {
	    /*
	     * At least one non-numeric argument - compare as strings.
	     */







>
>
>
>
>
>
>
>







5656
5657
5658
5659
5660
5661
5662
5663
5664
5665
5666
5667
5668
5669
5670
5671
5672
5673
5674
5675
5676
5677
    case INST_GT:
    case INST_LE:
    case INST_GE: {
	int iResult = 0, compare = 0;

	value2Ptr = OBJ_AT_TOS;
	valuePtr = OBJ_UNDER_TOS;

	/*
	    Try to determine, without triggering generation of a string
	    representation, whether one value is not a number.
	*/
	if (TclCheckEmptyString(valuePtr) > 0 || TclCheckEmptyString(value2Ptr) > 0) {
	    goto stringCompare;
	}

	if (GetNumberFromObj(NULL, valuePtr, &ptr1, &type1) != TCL_OK
		|| GetNumberFromObj(NULL, value2Ptr, &ptr2, &type2) != TCL_OK) {
	    /*
	     * At least one non-numeric argument - compare as strings.
	     */

Changes to generic/tclInt.h.

2756
2757
2758
2759
2760
2761
2762




2763
2764
2765
2766
2767
2768
2769
 * Pointer to a heap-allocated string of length zero that the Tcl core uses as
 * the value of an empty string representation for an object. This value is
 * shared by all new objects allocated by Tcl_NewObj.
 */

MODULE_SCOPE char	tclEmptyString;





/*
 *----------------------------------------------------------------
 * Procedures shared among Tcl modules but not used by the outside world,
 * introduced by/for NRE.
 *----------------------------------------------------------------
 */







>
>
>
>







2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
 * Pointer to a heap-allocated string of length zero that the Tcl core uses as
 * the value of an empty string representation for an object. This value is
 * shared by all new objects allocated by Tcl_NewObj.
 */

MODULE_SCOPE char	tclEmptyString;

enum CheckEmptyStringResult {
	TCL_EMPTYSTRING_UNKNOWN = -1, TCL_EMPTYSTRING_NO, TCL_EMPTYSTRING_YES
};

/*
 *----------------------------------------------------------------
 * Procedures shared among Tcl modules but not used by the outside world,
 * introduced by/for NRE.
 *----------------------------------------------------------------
 */

2896
2897
2898
2899
2900
2901
2902

2903
2904
2905
2906
2907
2908
2909
MODULE_SCOPE double	TclCeil(const mp_int *a);
MODULE_SCOPE void	TclChannelPreserve(Tcl_Channel chan);
MODULE_SCOPE void	TclChannelRelease(Tcl_Channel chan);
MODULE_SCOPE int	TclCheckArrayTraces(Tcl_Interp *interp, Var *varPtr,
			    Var *arrayPtr, Tcl_Obj *name, int index);
MODULE_SCOPE int	TclCheckBadOctal(Tcl_Interp *interp,
			    const char *value);

MODULE_SCOPE int	TclChanCaughtErrorBypass(Tcl_Interp *interp,
			    Tcl_Channel chan);
MODULE_SCOPE Tcl_ObjCmdProc TclChannelNamesCmd;
MODULE_SCOPE Tcl_NRPostProc TclClearRootEnsemble;
MODULE_SCOPE int	TclCompareTwoNumbers(Tcl_Obj *valuePtr,
			    Tcl_Obj *value2Ptr);
MODULE_SCOPE ContLineLoc *TclContinuationsEnter(Tcl_Obj *objPtr, int num,






>







2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
MODULE_SCOPE double	TclCeil(const mp_int *a);
MODULE_SCOPE void	TclChannelPreserve(Tcl_Channel chan);
MODULE_SCOPE void	TclChannelRelease(Tcl_Channel chan);
MODULE_SCOPE int	TclCheckArrayTraces(Tcl_Interp *interp, Var *varPtr,
			    Var *arrayPtr, Tcl_Obj *name, int index);
MODULE_SCOPE int	TclCheckBadOctal(Tcl_Interp *interp,
			    const char *value);
MODULE_SCOPE int	TclCheckEmptyString(Tcl_Obj *objPtr);
MODULE_SCOPE int	TclChanCaughtErrorBypass(Tcl_Interp *interp,
			    Tcl_Channel chan);
MODULE_SCOPE Tcl_ObjCmdProc TclChannelNamesCmd;
MODULE_SCOPE Tcl_NRPostProc TclClearRootEnsemble;
MODULE_SCOPE int	TclCompareTwoNumbers(Tcl_Obj *valuePtr,
			    Tcl_Obj *value2Ptr);
MODULE_SCOPE ContLineLoc *TclContinuationsEnter(Tcl_Obj *objPtr, int num,
3181
3182
3183
3184
3185
3186
3187






3188
3189
3190
3191
3192
3193
3194
			    Tcl_Obj *newValue, Tcl_Encoding encoding);
MODULE_SCOPE void	TclSignalExitThread(Tcl_ThreadId id, int result);
MODULE_SCOPE void	TclSpellFix(Tcl_Interp *interp,
			    Tcl_Obj *const *objv, int objc, int subIdx,
			    Tcl_Obj *bad, Tcl_Obj *fix);
MODULE_SCOPE void *	TclStackRealloc(Tcl_Interp *interp, void *ptr,
			    int numBytes);






MODULE_SCOPE int	TclStringMatch(const char *str, int strLen,
			    const char *pattern, int ptnLen, int flags);
MODULE_SCOPE int	TclStringMatchObj(Tcl_Obj *stringObj,
			    Tcl_Obj *patternObj, int flags);
MODULE_SCOPE void	TclSubstCompile(Tcl_Interp *interp, const char *bytes,
			    int numBytes, int flags, int line,
			    struct CompileEnv *envPtr);






>
>
>
>
>
>







3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
			    Tcl_Obj *newValue, Tcl_Encoding encoding);
MODULE_SCOPE void	TclSignalExitThread(Tcl_ThreadId id, int result);
MODULE_SCOPE void	TclSpellFix(Tcl_Interp *interp,
			    Tcl_Obj *const *objv, int objc, int subIdx,
			    Tcl_Obj *bad, Tcl_Obj *fix);
MODULE_SCOPE void *	TclStackRealloc(Tcl_Interp *interp, void *ptr,
			    int numBytes);

typedef int (*memCmpFn_t)(const void*, const void*, size_t);
MODULE_SCOPE int	TclStringCmp (Tcl_Obj *value1Ptr, Tcl_Obj *value2Ptr,
			    int checkEq, int nocase, int reqlength);
MODULE_SCOPE int	TclStringCmpOpts (Tcl_Interp *interp, int objc, Tcl_Obj *const objv[],
			    int *nocase, int *reqlength);
MODULE_SCOPE int	TclStringMatch(const char *str, int strLen,
			    const char *pattern, int ptnLen, int flags);
MODULE_SCOPE int	TclStringMatchObj(Tcl_Obj *stringObj,
			    Tcl_Obj *patternObj, int flags);
MODULE_SCOPE void	TclSubstCompile(Tcl_Interp *interp, const char *bytes,
			    int numBytes, int flags, int line,
			    struct CompileEnv *envPtr);
4536
4537
4538
4539
4540
4541
4542



4543
4544
4545
4546
4547
4548
4549
 * The ANSI C "prototype" for this macro is:
 *
 * MODULE_SCOPE int	TclIsPureByteArray(Tcl_Obj *objPtr);
 *----------------------------------------------------------------
 */

MODULE_SCOPE int	TclIsPureByteArray(Tcl_Obj *objPtr);




/*
 *----------------------------------------------------------------
 * Macro used by the Tcl core to compare Unicode strings. On big-endian
 * systems we can use the more efficient memcmp, but this would not be
 * lexically correct on little-endian systems. The ANSI C "prototype" for
 * this macro is:






>
>
>







4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
 * The ANSI C "prototype" for this macro is:
 *
 * MODULE_SCOPE int	TclIsPureByteArray(Tcl_Obj *objPtr);
 *----------------------------------------------------------------
 */

MODULE_SCOPE int	TclIsPureByteArray(Tcl_Obj *objPtr);
#define TclIsPureDict(objPtr) \
	(((objPtr)->bytes==NULL) && ((objPtr)->typePtr==&tclDictType))


/*
 *----------------------------------------------------------------
 * Macro used by the Tcl core to compare Unicode strings. On big-endian
 * systems we can use the more efficient memcmp, but this would not be
 * lexically correct on little-endian systems. The ANSI C "prototype" for
 * this macro is:

Changes to generic/tclStringObj.c.

431
432
433
434
435
436
437

438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456












































457
458
459
460
461
462
463
    if (TclIsPureByteArray(objPtr)) {
	int length;

	(void) Tcl_GetByteArrayFromObj(objPtr, &length);
	return length;
    }


    /*
     * OK, need to work with the object as a string.
     */

    SetStringFromAny(NULL, objPtr);
    stringPtr = GET_STRING(objPtr);
    numChars = stringPtr->numChars;

    /*
     * If numChars is unknown, compute it.
     */

    if (numChars == -1) {
	TclNumUtfChars(numChars, objPtr->bytes, objPtr->length);
	stringPtr->numChars = numChars;
    }
    return numChars;
}













































/*
 *----------------------------------------------------------------------
 *
 * Tcl_GetUniChar --
 *
 *	Get the index'th Unicode character from the String object. If index






>



















>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
    if (TclIsPureByteArray(objPtr)) {
	int length;

	(void) Tcl_GetByteArrayFromObj(objPtr, &length);
	return length;
    }


    /*
     * OK, need to work with the object as a string.
     */

    SetStringFromAny(NULL, objPtr);
    stringPtr = GET_STRING(objPtr);
    numChars = stringPtr->numChars;

    /*
     * If numChars is unknown, compute it.
     */

    if (numChars == -1) {
	TclNumUtfChars(numChars, objPtr->bytes, objPtr->length);
	stringPtr->numChars = numChars;
    }
    return numChars;
}



/*
 *----------------------------------------------------------------------
 *
 * TclCheckEmptyString --
 *
 *	Determine whether the string value of an object is or would be the
 *	empty string, without generating a string representation.
 *
 * Results:
 *	Returns 1 if empty, 0 if not, and -1 if unknown.
 *
 * Side effects:
 *	None.
 *
 *----------------------------------------------------------------------
 */
int
TclCheckEmptyString (
    Tcl_Obj *objPtr
) {
    int length = -1;

    if (objPtr->bytes == &tclEmptyString) {
	return TCL_EMPTYSTRING_YES;
    }

    if (TclListObjIsCanonical(objPtr)) {
	Tcl_ListObjLength(NULL, objPtr, &length);
	return length == 0;
    }

    if (TclIsPureDict(objPtr)) {
	Tcl_DictObjSize(NULL, objPtr, &length);
	return length == 0;
    }
    
    if (objPtr->bytes == NULL) {
	return TCL_EMPTYSTRING_UNKNOWN;
    }
    return objPtr->length == 0;
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_GetUniChar --
 *
 *	Get the index'th Unicode character from the String object. If index
3263
3264
3265
3266
3267
3268
3269















































































































































































3270
3271
3272
3273
3274
3275
3276
    if (interp) {
	Tcl_SetObjResult(interp, Tcl_ObjPrintf(
		    "max size for a Tcl value (%d bytes) exceeded", INT_MAX));
	Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL);
    }
    return NULL;
}
















































































































































































/*
 *---------------------------------------------------------------------------
 *
 * TclStringFirst --
 *
 *	Implements the [string first] operation.






>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
    if (interp) {
	Tcl_SetObjResult(interp, Tcl_ObjPrintf(
		    "max size for a Tcl value (%d bytes) exceeded", INT_MAX));
	Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL);
    }
    return NULL;
}

/*
 *---------------------------------------------------------------------------
 *
 * TclStringCmp --
 *	Compare two Tcl_Obj values as strings.
 *
 * Results:
 *	Like memcmp,  return -1, 0, or 1.
 *
 * Side effects:
 *	String representations may be generated.  Internal representation may
 *	be changed.
 *
 *---------------------------------------------------------------------------
 */

int TclStringCmp (
	Tcl_Obj *value1Ptr,
	Tcl_Obj *value2Ptr,
	int checkEq,        /* comparison is only for equality */
	int nocase,	    /* comparison is not case sensitive */
	int reqlength	    /* requested length */
) {
    char *s1, *s2;
    int empty, length, match, s1len, s2len;
    memCmpFn_t memCmpFn;

    if ((reqlength == 0) || (value1Ptr == value2Ptr)) {
	/*
	 * Always match at 0 chars of if it is the same obj.
	 */
	match = 0;
    } else {

	if (!nocase && TclIsPureByteArray(value1Ptr)
		&& TclIsPureByteArray(value2Ptr)) {
	    /*
	     * Use binary versions of comparisons since that won't cause undue
	     * type conversions and it is much faster. Only do this if we're
	     * case-sensitive (which is all that really makes sense with byte
	     * arrays anyway, and we have no memcasecmp() for some reason... :^)
	     */
	    s1 = (char *) Tcl_GetByteArrayFromObj(value1Ptr, &s1len);
	    s2 = (char *) Tcl_GetByteArrayFromObj(value2Ptr, &s2len);
	    memCmpFn = memcmp;
	} else if ((value1Ptr->typePtr == &tclStringType)
		&& (value2Ptr->typePtr == &tclStringType)) {
	    /*
	     * Do a unicode-specific comparison if both of the args are of
	     * String type. If the char length == byte length, we can do a
	     * memcmp. In benchmark testing this proved the most efficient
	     * check between the unicode and string comparison operations.
	     */

	    if (nocase) {
		s1 = (char *) Tcl_GetUnicodeFromObj(value1Ptr, &s1len);
		s2 = (char *) Tcl_GetUnicodeFromObj(value2Ptr, &s2len);
		memCmpFn = (memCmpFn_t)Tcl_UniCharNcasecmp;
	    } else {
		s1len = Tcl_GetCharLength(value1Ptr);
		s2len = Tcl_GetCharLength(value2Ptr);
		if ((s1len == value1Ptr->length)
			&& (value1Ptr->bytes != NULL)
			&& (s2len == value2Ptr->length)
			&& (value2Ptr->bytes != NULL)) {
		    s1 = value1Ptr->bytes;
		    s2 = value2Ptr->bytes;
		    memCmpFn = memcmp;
		} else {
		    s1 = (char *) Tcl_GetUnicode(value1Ptr);
		    s2 = (char *) Tcl_GetUnicode(value2Ptr);
		    if (
#ifdef WORDS_BIGENDIAN
			1
#else
			checkEq
#endif
			) {
			memCmpFn = memcmp;
			s1len *= sizeof(Tcl_UniChar);
			s2len *= sizeof(Tcl_UniChar);
		    } else {
			memCmpFn = (memCmpFn_t) Tcl_UniCharNcmp;
		    }
		}
	    }
	} else {
	    if ((empty = TclCheckEmptyString(value1Ptr)) > 0) {
		switch (TclCheckEmptyString(value2Ptr)) {
		    case -1:
		    s1 = 0;
		    s1len = 0;
		    s2 = TclGetStringFromObj(value2Ptr, &s2len);
		    break;
		    case 0:
		    match = -1;
		    goto matchdone;
		    case 1:
		    match = 0;
		    goto matchdone;
		}
	    } else if (TclCheckEmptyString(value2Ptr) > 0) {
		switch (empty) {
		    case -1:
		    s2 = 0;
		    s2len = 0;
		    s1 = TclGetStringFromObj(value1Ptr, &s1len);
		    break;
		    case 0:
		    match = 1;
		    goto matchdone;
		    case 1:
		    match = 0;
		    goto matchdone;
		}
	    } else {
		s1 = TclGetStringFromObj(value1Ptr, &s1len);
		s2 = TclGetStringFromObj(value2Ptr, &s2len);
	    }
	    if (!nocase && checkEq) {
		/*
		 * When we have equal-length we can check only for (in)equality.
		 * We can use memcmp in all (n)eq cases because we
		 * don't need to worry about lexical LE/BE variance.
		 */
		memCmpFn = memcmp;
	    } else {

		/*
		 * As a catch-all we will work with UTF-8. We cannot use memcmp() as
		 * that is unsafe with any string containing NUL (\xC0\x80 in Tcl's
		 * utf rep). We can use the more efficient TclpUtfNcmp2 if we are
		 * case-sensitive and no specific length was requested.
		 */

		if ((reqlength < 0) && !nocase) {
		    memCmpFn = (memCmpFn_t) TclpUtfNcmp2;
		} else {
		    s1len = Tcl_NumUtfChars(s1, s1len);
		    s2len = Tcl_NumUtfChars(s2, s2len);
		    memCmpFn = (memCmpFn_t) (nocase ? Tcl_UtfNcasecmp : Tcl_UtfNcmp);
		}
	    }
	}

	length = (s1len < s2len) ? s1len : s2len;
	if (reqlength > 0 && reqlength < length) {
	    length = reqlength;
	} else if (reqlength < 0) {
	    /*
	     * The requested length is negative, so we ignore it by setting it to
	     * length + 1 so we correct the match var.
	     */

	    reqlength = length + 1;
	}

	if (checkEq && (s1len != s2len)) {
	    match = 1;		/* This will be reversed below. */
	}  else {
	    /*
	     * The comparison function should compare up to the minimum
	     * byte length only.
	     */
	    match = memCmpFn(s1, s2, (size_t) length);
	}
	if ((match == 0) && (reqlength > length)) {
	    match = s1len - s2len;
	}
	match = (match > 0) ? 1 : (match < 0) ? -1 : 0;
    }
    matchdone:
    return match;
}

/*
 *---------------------------------------------------------------------------
 *
 * TclStringFirst --
 *
 *	Implements the [string first] operation.