Tcl Source Code

Check-in [e423459fce]
Login
Bounty program for improvements to Tcl and certain Tcl packages.
Tcl 2019 Conference, Houston/TX, US, Nov 4-8
Send your abstracts to [email protected]
or submit via the online form by Sep 9.

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Preparation to deduplicate code between byte-compiled and legacy implementations of [string compare].
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core-8-6-branch
Files: files | file ages | folders
SHA3-256: e423459fceeed3325c694aca0fff4d5c78e3b069d0e2a1a7da752115a8a93e55
User & Date: pooryorick 2018-05-06 13:45:21
Context
2018-05-06
18:13
Factor options handling out of StringCmpCmd. check-in: 88e60c5289 user: pooryorick tags: core-8-6-branch
13:45
Preparation to deduplicate code between byte-compiled and legacy implementations of [string compare]... check-in: e423459fce user: pooryorick tags: core-8-6-branch
2018-05-05
17:04
Avoid generating string representation when comparing the empty string. check-in: 44527c632e user: pooryorick tags: core-8-6-branch
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to generic/tclCmdMZ.c.

2860
2861
2862
2863
2864
2865
2866


























































































































2867
2868
2869
2870
2871
2872
2873
	match = length1 - length2;
    }

    Tcl_SetObjResult(interp,
	    Tcl_NewIntObj((match > 0) ? 1 : (match < 0) ? -1 : 0));
    return TCL_OK;
}


























































































































 
/*
 *----------------------------------------------------------------------
 *
 * StringCatCmd --
 *
 *	This procedure is invoked to process the "string cat" Tcl command.






>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
	match = length1 - length2;
    }

    Tcl_SetObjResult(interp,
	    Tcl_NewIntObj((match > 0) ? 1 : (match < 0) ? -1 : 0));
    return TCL_OK;
}
 
int TclStringCmp (
	Tcl_Obj *value1Ptr,
	Tcl_Obj *value2Ptr,
	int checkEq
) {
	char *s1, *s2;
	int empty, match, s1len, s2len;
	memCmpFn_t memCmpFn;

	/*
	 * When we have equal-length we can check only for (in)equality.
	 * We can use memcmp in all (n)eq cases because we
	 * don't need to worry about lexical LE/BE variance.
	 */

	if (value1Ptr == value2Ptr) {
	    match = 0;
	} else {
	    if (TclIsPureByteArray(value1Ptr)
		    && TclIsPureByteArray(value2Ptr)) {
		s1 = (char *) Tcl_GetByteArrayFromObj(value1Ptr, &s1len);
		s2 = (char *) Tcl_GetByteArrayFromObj(value2Ptr, &s2len);
		memCmpFn = memcmp;
	    } else if ((value1Ptr->typePtr == &tclStringType)
		    && (value2Ptr->typePtr == &tclStringType)) {
		/*
		 * Do a unicode-specific comparison if both of the args are of
		 * String type. If the char length == byte length, we can do a
		 * memcmp. In benchmark testing this proved the most efficient
		 * check between the unicode and string comparison operations.
		 */

		s1len = Tcl_GetCharLength(value1Ptr);
		s2len = Tcl_GetCharLength(value2Ptr);
		if ((s1len == value1Ptr->length)
			&& (value1Ptr->bytes != NULL)
			&& (s2len == value2Ptr->length)
			&& (value2Ptr->bytes != NULL)) {
		    s1 = value1Ptr->bytes;
		    s2 = value2Ptr->bytes;
		    memCmpFn = memcmp;
		} else {
		    s1 = (char *) Tcl_GetUnicode(value1Ptr);
		    s2 = (char *) Tcl_GetUnicode(value2Ptr);
		    if (
#ifdef WORDS_BIGENDIAN
			1
#else
			checkEq
#endif
			) {
			memCmpFn = memcmp;
			s1len *= sizeof(Tcl_UniChar);
			s2len *= sizeof(Tcl_UniChar);
		    } else {
			memCmpFn = (memCmpFn_t) Tcl_UniCharNcmp;
		    }
		}
	    } else {
		/*
		 * In order to handle the special Tcl \xC0\x80 null encoding
		 * for utf-8, strcmp can't do a simple memcmp.
		 */

		if ((empty = TclCheckEmptyString(value1Ptr)) > 0) {
		    switch (TclCheckEmptyString(value2Ptr)) {
			case -1:
			s1 = "";
			s1len = 0;
			s2 = TclGetStringFromObj(value2Ptr, &s2len);
			break;
			case 0:
			match = -1;
			goto matchdone;
			case 1:
			match = 0;
			goto matchdone;
		    }
		} else if (TclCheckEmptyString(value2Ptr) > 0) {
		    switch (empty) {
			case -1:
			s2 = "";
			s2len = 0;
			s1 = TclGetStringFromObj(value1Ptr, &s1len);
			break;
			case 0:
			match = 1;
			goto matchdone;
			case 1:
			match = 0;
			goto matchdone;
		    }
		} else {
		    s1 = TclGetStringFromObj(value1Ptr, &s1len);
		    s2 = TclGetStringFromObj(value2Ptr, &s2len);
		}

		if (checkEq) {
		    memCmpFn = memcmp;
		} else {
		    memCmpFn = (memCmpFn_t) TclpUtfNcmp2;
		}
	    }

	    if (checkEq && (s1len != s2len)) {
		match = 1;
	    }  else {
		/*
		 * The comparison function should compare up to the minimum
		 * byte length only.
		 */
		match = memCmpFn(s1, s2,
			(size_t) ((s1len < s2len) ? s1len : s2len));
		if (match == 0) {
		    match = s1len - s2len;
		}
	    }
	}
    matchdone:
    return match;
}
 
/*
 *----------------------------------------------------------------------
 *
 * StringCatCmd --
 *
 *	This procedure is invoked to process the "string cat" Tcl command.

Changes to generic/tclExecute.c.

5380
5381
5382
5383
5384
5385
5386
5387
5388
5389
5390
5391
5392
5393
5394
5395
5396
5397
5398
5399

5400
5401
5402
5403
5404
5405
5406
5407
5408
5409
5410
5411
5412
5413
5414
5415
5416
5417
5418
5419
5420
5421
5422
5423
5424
5425
5426
5427
5428
5429
5430
5431
5432
5433
5434
5435
5436
5437
5438
5439
5440
5441
5442
5443
5444
5445
5446
5447
5448
5449
5450
5451
5452
5453
5454
5455
5456
5457
5458
5459
5460
5461
5462
5463
5464
5465
5466
5467
5468
5469
5470
5471
5472
5473
5474
5475
5476
5477
5478
5479
5480
5481
5482
5483
5484
5485
5486
5487
5488
5489
5490
5491
5492
5493
5494
5495
5496
5497
5498
5499
5500
5501
5502
5503
5504
5505
5506
5507
5508
5509
5510
5511
5512
5513
5514
5515
    case INST_STR_EQ:
    case INST_STR_NEQ:		/* String (in)equality check */
    case INST_STR_CMP:		/* String compare. */
    stringCompare:
	value2Ptr = OBJ_AT_TOS;
	valuePtr = OBJ_UNDER_TOS;

	if (valuePtr == value2Ptr) {
	    match = 0;
	} else {
	    /*
	     * We only need to check (in)equality when we have equal length
	     * strings.  We can use memcmp in all (n)eq cases because we
	     * don't need to worry about lexical LE/BE variance.
	     */

	    typedef int (*memCmpFn_t)(const void*, const void*, size_t);
	    memCmpFn_t memCmpFn;
	    int checkEq = ((*pc == INST_EQ) || (*pc == INST_NEQ)
		    || (*pc == INST_STR_EQ) || (*pc == INST_STR_NEQ));


	    if (TclIsPureByteArray(valuePtr)
		    && TclIsPureByteArray(value2Ptr)) {
		s1 = (char *) Tcl_GetByteArrayFromObj(valuePtr, &s1len);
		s2 = (char *) Tcl_GetByteArrayFromObj(value2Ptr, &s2len);
		memCmpFn = memcmp;
	    } else if ((valuePtr->typePtr == &tclStringType)
		    && (value2Ptr->typePtr == &tclStringType)) {
		/*
		 * Do a unicode-specific comparison if both of the args are of
		 * String type. If the char length == byte length, we can do a
		 * memcmp. In benchmark testing this proved the most efficient
		 * check between the unicode and string comparison operations.
		 */

		s1len = Tcl_GetCharLength(valuePtr);
		s2len = Tcl_GetCharLength(value2Ptr);
		if ((s1len == valuePtr->length)
			&& (valuePtr->bytes != NULL)
			&& (s2len == value2Ptr->length)
			&& (value2Ptr->bytes != NULL)) {
		    s1 = valuePtr->bytes;
		    s2 = value2Ptr->bytes;
		    memCmpFn = memcmp;
		} else {
		    s1 = (char *) Tcl_GetUnicode(valuePtr);
		    s2 = (char *) Tcl_GetUnicode(value2Ptr);
		    if (
#ifdef WORDS_BIGENDIAN
			1
#else
			checkEq
#endif
			) {
			memCmpFn = memcmp;
			s1len *= sizeof(Tcl_UniChar);
			s2len *= sizeof(Tcl_UniChar);
		    } else {
			memCmpFn = (memCmpFn_t) Tcl_UniCharNcmp;
		    }
		}
	    } else {
		/*
		 * In order to handle the special Tcl \xC0\x80 null encoding
		 * for utf-8, strcmp can't do a simple memcmp.
		 */

		{
		    int empty;
		    if ((empty = TclCheckEmptyString(valuePtr)) > 0) {
			switch (TclCheckEmptyString(value2Ptr)) {
			    case -1:
				s1 = "";
				s1len = 0;
				s2 = TclGetStringFromObj(value2Ptr, &s2len);
				break;
			    case 0:
				match = -1;
				goto matchdone;
			    case 1:
				match = 0;
				goto matchdone;
			}
		    } else if (TclCheckEmptyString(value2Ptr) > 0) {
			switch (empty) {
			    case -1:
				s2 = "";
				s2len = 0;
				s1 = TclGetStringFromObj(valuePtr, &s1len);
				break;
			    case 0:
				match = 1;
				goto matchdone;
			    case 1:
				match = 0;
				goto matchdone;
			}
		    } else {
			s1 = TclGetStringFromObj(valuePtr, &s1len);
			s2 = TclGetStringFromObj(value2Ptr, &s2len);
		    }
		}


		if (checkEq) {
		    memCmpFn = memcmp;
		} else {
		    memCmpFn = (memCmpFn_t) TclpUtfNcmp2;
		}

	    }

	    if (checkEq && (s1len != s2len)) {
		match = 1;
	    }  else {
		/*
		 * The comparison function should compare up to the minimum
		 * byte length only.
		 */
		match = memCmpFn(s1, s2,
			(size_t) ((s1len < s2len) ? s1len : s2len));
		if (match == 0) {
		    match = s1len - s2len;
		}
	    }
	}

	matchdone:

	/*
	 * Make sure only -1,0,1 is returned
	 * TODO: consider peephole opt.
	 */

	if (*pc != INST_STR_CMP) {
	    /*






<
<
<
<
<
<
<
<
|
<
<


>
|
<
<
<
<
<
<
<
<
<
<
<
<
<

<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







5380
5381
5382
5383
5384
5385
5386








5387


5388
5389
5390
5391













5392






























































































5393
5394
5395
5396
5397
5398
5399
    case INST_STR_EQ:
    case INST_STR_NEQ:		/* String (in)equality check */
    case INST_STR_CMP:		/* String compare. */
    stringCompare:
	value2Ptr = OBJ_AT_TOS;
	valuePtr = OBJ_UNDER_TOS;









	{


	    int checkEq = ((*pc == INST_EQ) || (*pc == INST_NEQ)
		    || (*pc == INST_STR_EQ) || (*pc == INST_STR_NEQ));
	    match = TclStringCmp(valuePtr, value2Ptr, checkEq);
	}












































































































	/*
	 * Make sure only -1,0,1 is returned
	 * TODO: consider peephole opt.
	 */

	if (*pc != INST_STR_CMP) {
	    /*

Changes to generic/tclInt.h.

3151
3152
3153
3154
3155
3156
3157





3158
3159
3160
3161
3162
3163
3164
			    Tcl_Obj *newValue, Tcl_Encoding encoding);
MODULE_SCOPE void	TclSignalExitThread(Tcl_ThreadId id, int result);
MODULE_SCOPE void	TclSpellFix(Tcl_Interp *interp,
			    Tcl_Obj *const *objv, int objc, int subIdx,
			    Tcl_Obj *bad, Tcl_Obj *fix);
MODULE_SCOPE void *	TclStackRealloc(Tcl_Interp *interp, void *ptr,
			    int numBytes);





MODULE_SCOPE int	TclStringMatch(const char *str, int strLen,
			    const char *pattern, int ptnLen, int flags);
MODULE_SCOPE int	TclStringMatchObj(Tcl_Obj *stringObj,
			    Tcl_Obj *patternObj, int flags);
MODULE_SCOPE Tcl_Obj *	TclStringObjReverse(Tcl_Obj *objPtr);
MODULE_SCOPE void	TclSubstCompile(Tcl_Interp *interp, const char *bytes,
			    int numBytes, int flags, int line,






>
>
>
>
>







3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
			    Tcl_Obj *newValue, Tcl_Encoding encoding);
MODULE_SCOPE void	TclSignalExitThread(Tcl_ThreadId id, int result);
MODULE_SCOPE void	TclSpellFix(Tcl_Interp *interp,
			    Tcl_Obj *const *objv, int objc, int subIdx,
			    Tcl_Obj *bad, Tcl_Obj *fix);
MODULE_SCOPE void *	TclStackRealloc(Tcl_Interp *interp, void *ptr,
			    int numBytes);

typedef int (*memCmpFn_t)(const void*, const void*, size_t);
MODULE_SCOPE int	TclStringCmp (Tcl_Obj *value1Ptr, Tcl_Obj *value2Ptr,
			    int checkEq);

MODULE_SCOPE int	TclStringMatch(const char *str, int strLen,
			    const char *pattern, int ptnLen, int flags);
MODULE_SCOPE int	TclStringMatchObj(Tcl_Obj *stringObj,
			    Tcl_Obj *patternObj, int flags);
MODULE_SCOPE Tcl_Obj *	TclStringObjReverse(Tcl_Obj *objPtr);
MODULE_SCOPE void	TclSubstCompile(Tcl_Interp *interp, const char *bytes,
			    int numBytes, int flags, int line,