Tcl Source Code

Changes On Branch dgp-string-find
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Changes In Branch dgp-string-find Excluding Merge-Ins

This is equivalent to a diff from 24d9077003 to e35f61d57e

2016-11-07
20:11
Refactor to channel all [string first] functions through a single implementation. check-in: 5fd1a339d3 user: dgp tags: trunk
20:04
Optimize case of all single-byte chars. Closed-Leaf check-in: e35f61d57e user: dgp tags: dgp-string-find
19:41
Consolidate the "find empty string" cases. check-in: 9bc07dee24 user: dgp tags: dgp-string-find
2016-11-04
21:29
First draft refactoring the [string first] functionality. check-in: 5073110f26 user: dgp tags: dgp-string-find
15:52
merge trunk check-in: 16c449a79f user: dgp tags: tip-445
14:56
[824752f10e] Use LIST_MAX instead of INT_MAX for unknown number of elements to the end of the list. check-in: 24d9077003 user: dgp tags: trunk
14:54
[8245752f10e] Use LIST_MAX instead of INT_MAX for unknown number of elements to the end of the list. check-in: 5983fb59e6 user: dgp tags: core-8-6-branch
14:49
merge mark check-in: 9cd4c5200b user: dgp tags: trunk

Changes to generic/tclCmdMZ.c.

1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203

1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263

1264
1265
1266
1267
1268
1269
1270
static int
StringFirstCmd(
    ClientData dummy,		/* Not used. */
    Tcl_Interp *interp,		/* Current interpreter. */
    int objc,			/* Number of arguments. */
    Tcl_Obj *const objv[])	/* Argument objects. */
{
    Tcl_UniChar *needleStr, *haystackStr;
    int match, start, needleLen, haystackLen;

    if (objc < 3 || objc > 4) {
	Tcl_WrongNumArgs(interp, 1, objv,
		"needleString haystackString ?startIndex?");
	return TCL_ERROR;
    }

    /*
     * We are searching haystackStr for the sequence needleStr.
     */

    match = -1;
    start = 0;
    haystackLen = -1;

    needleStr = Tcl_GetUnicodeFromObj(objv[1], &needleLen);
    haystackStr = Tcl_GetUnicodeFromObj(objv[2], &haystackLen);

    if (objc == 4) {
	/*
	 * If a startIndex is specified, we will need to fast forward to that
	 * point in the string before we think about a match.
	 */


	if (TclGetIntForIndexM(interp, objv[3], haystackLen-1,
		&start) != TCL_OK){
	    return TCL_ERROR;
	}

	/*
	 * Reread to prevent shimmering problems.
	 */

	needleStr = Tcl_GetUnicodeFromObj(objv[1], &needleLen);
	haystackStr = Tcl_GetUnicodeFromObj(objv[2], &haystackLen);

	if (start >= haystackLen) {
	    goto str_first_done;
	} else if (start > 0) {
	    haystackStr += start;
	    haystackLen -= start;
	} else if (start < 0) {
	    /*
	     * Invalid start index mapped to string start; Bug #423581
	     */

	    start = 0;
	}
    }

    /*
     * If the length of the needle is more than the length of the haystack, it
     * cannot be contained in there so we can avoid searching. [Bug 2960021]
     */

    if (needleLen > 0 && needleLen <= haystackLen) {
	register Tcl_UniChar *p, *end;

	end = haystackStr + haystackLen - needleLen + 1;
	for (p = haystackStr;  p < end;  p++) {
	    /*
	     * Scan forward to find the first character.
	     */

	    if ((*p == *needleStr) && (memcmp(needleStr, p,
		    sizeof(Tcl_UniChar) * (size_t)needleLen) == 0)) {
		match = p - haystackStr;
		break;
	    }
	}
    }

    /*
     * Compute the character index of the matching string by counting the
     * number of characters before the match.
     */

    if ((match != -1) && (objc == 4)) {
	match += start;
    }

  str_first_done:
    Tcl_SetObjResult(interp, Tcl_NewIntObj(match));

    return TCL_OK;
}

/*
 *----------------------------------------------------------------------
 *
 * StringLastCmd --







<
|







<
<
<
<
<
<
<
<
<
<
<

<
<
<
<
>

|
<



<
<
<
<
<
<
<
|
<
|
<
<
<
<
<
<
|
|
<
<
|
<
<
<
<
|
<
<
|
<
<
<
<
<
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
>







1172
1173
1174
1175
1176
1177
1178

1179
1180
1181
1182
1183
1184
1185
1186











1187




1188
1189
1190

1191
1192
1193







1194

1195






1196
1197


1198




1199


1200





1201


















1202
1203
1204
1205
1206
1207
1208
1209
1210
static int
StringFirstCmd(
    ClientData dummy,		/* Not used. */
    Tcl_Interp *interp,		/* Current interpreter. */
    int objc,			/* Number of arguments. */
    Tcl_Obj *const objv[])	/* Argument objects. */
{

    int start = 0;

    if (objc < 3 || objc > 4) {
	Tcl_WrongNumArgs(interp, 1, objv,
		"needleString haystackString ?startIndex?");
	return TCL_ERROR;
    }












    if (objc == 4) {




	int size = Tcl_GetCharLength(objv[2]);

	if (TCL_OK != TclGetIntForIndexM(interp, objv[3], size - 1, &start)) {

	    return TCL_ERROR;
	}








	if (start < 0) {

	    start = 0;






	}
	if (start >= size) {


	    Tcl_SetObjResult(interp, Tcl_NewIntObj(-1));




	    return TCL_OK;


	}





    }


















    Tcl_SetObjResult(interp, Tcl_NewIntObj(TclStringFind(objv[1],
	    objv[2], start)));
    return TCL_OK;
}

/*
 *----------------------------------------------------------------------
 *
 * StringLastCmd --

Changes to generic/tclExecute.c.

5716
5717
5718
5719
5720
5721
5722



5723
5724
5725
5726
5727
5728
5729
5730
5731
5732
5733
5734
5735
5736

5737
5738
5739
5740
5741
5742
5743
	}
    doneStringMap:
	TRACE_WITH_OBJ(("%.20s %.20s %.20s => ",
		O2S(value2Ptr), O2S(value3Ptr), O2S(valuePtr)), objResultPtr);
	NEXT_INST_V(1, 3, 1);

    case INST_STR_FIND:



	ustring1 = Tcl_GetUnicodeFromObj(OBJ_AT_TOS, &length);	/* Haystack */
	ustring2 = Tcl_GetUnicodeFromObj(OBJ_UNDER_TOS, &length2);/* Needle */

	match = -1;
	if (length2 > 0 && length2 <= length) {
	    end = ustring1 + length - length2 + 1;
	    for (p=ustring1 ; p<end ; p++) {
		if ((*p == *ustring2) &&
			memcmp(ustring2,p,sizeof(Tcl_UniChar)*length2) == 0) {
		    match = p - ustring1;
		    break;
		}
	    }
	}


	TRACE(("%.20s %.20s => %d\n",
		O2S(OBJ_UNDER_TOS), O2S(OBJ_AT_TOS), match));
	TclNewIntObj(objResultPtr, match);
	NEXT_INST_F(1, 2, 1);

    case INST_STR_FIND_LAST:







>
>
>














>







5716
5717
5718
5719
5720
5721
5722
5723
5724
5725
5726
5727
5728
5729
5730
5731
5732
5733
5734
5735
5736
5737
5738
5739
5740
5741
5742
5743
5744
5745
5746
5747
	}
    doneStringMap:
	TRACE_WITH_OBJ(("%.20s %.20s %.20s => ",
		O2S(value2Ptr), O2S(value3Ptr), O2S(valuePtr)), objResultPtr);
	NEXT_INST_V(1, 3, 1);

    case INST_STR_FIND:
#if 1
	match = TclStringFind(OBJ_UNDER_TOS, OBJ_AT_TOS, 0);
#else
	ustring1 = Tcl_GetUnicodeFromObj(OBJ_AT_TOS, &length);	/* Haystack */
	ustring2 = Tcl_GetUnicodeFromObj(OBJ_UNDER_TOS, &length2);/* Needle */

	match = -1;
	if (length2 > 0 && length2 <= length) {
	    end = ustring1 + length - length2 + 1;
	    for (p=ustring1 ; p<end ; p++) {
		if ((*p == *ustring2) &&
			memcmp(ustring2,p,sizeof(Tcl_UniChar)*length2) == 0) {
		    match = p - ustring1;
		    break;
		}
	    }
	}
#endif

	TRACE(("%.20s %.20s => %d\n",
		O2S(OBJ_UNDER_TOS), O2S(OBJ_AT_TOS), match));
	TclNewIntObj(objResultPtr, match);
	NEXT_INST_F(1, 2, 1);

    case INST_STR_FIND_LAST:

Changes to generic/tclInt.h.

3134
3135
3136
3137
3138
3139
3140


3141
3142
3143
3144
3145
3146
3147
			    Tcl_Obj *const *objv, int objc, int subIdx,
			    Tcl_Obj *bad, Tcl_Obj *fix);
MODULE_SCOPE void *	TclStackRealloc(Tcl_Interp *interp, void *ptr,
			    int numBytes);
MODULE_SCOPE int	TclStringCatObjv(Tcl_Interp *interp, int inPlace,
			    int objc, Tcl_Obj *const objv[],
			    Tcl_Obj **objPtrPtr);


MODULE_SCOPE int	TclStringMatch(const char *str, int strLen,
			    const char *pattern, int ptnLen, int flags);
MODULE_SCOPE int	TclStringMatchObj(Tcl_Obj *stringObj,
			    Tcl_Obj *patternObj, int flags);
MODULE_SCOPE Tcl_Obj *	TclStringObjReverse(Tcl_Obj *objPtr);
MODULE_SCOPE void	TclSubstCompile(Tcl_Interp *interp, const char *bytes,
			    int numBytes, int flags, int line,







>
>







3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
			    Tcl_Obj *const *objv, int objc, int subIdx,
			    Tcl_Obj *bad, Tcl_Obj *fix);
MODULE_SCOPE void *	TclStackRealloc(Tcl_Interp *interp, void *ptr,
			    int numBytes);
MODULE_SCOPE int	TclStringCatObjv(Tcl_Interp *interp, int inPlace,
			    int objc, Tcl_Obj *const objv[],
			    Tcl_Obj **objPtrPtr);
MODULE_SCOPE int	TclStringFind(Tcl_Obj *needle, Tcl_Obj *haystack,
			    unsigned int start);
MODULE_SCOPE int	TclStringMatch(const char *str, int strLen,
			    const char *pattern, int ptnLen, int flags);
MODULE_SCOPE int	TclStringMatchObj(Tcl_Obj *stringObj,
			    Tcl_Obj *patternObj, int flags);
MODULE_SCOPE Tcl_Obj *	TclStringObjReverse(Tcl_Obj *objPtr);
MODULE_SCOPE void	TclSubstCompile(Tcl_Interp *interp, const char *bytes,
			    int numBytes, int flags, int line,

Changes to generic/tclStringObj.c.

2833
2834
2835
2836
2837
2838
2839

































































































2840
2841
2842
2843
2844
2845
2846
		dst += more;
	    }
	}
    }
    *objPtrPtr = objResultPtr;
    return TCL_OK;
}


































































































/*
 *---------------------------------------------------------------------------
 *
 * TclStringObjReverse --
 *
 *	Implements the [string reverse] operation.







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
		dst += more;
	    }
	}
    }
    *objPtrPtr = objResultPtr;
    return TCL_OK;
}

/*
 *---------------------------------------------------------------------------
 *
 * TclStringFind --
 *
 *	Implements the [string first] operation.
 *
 * Results:
 *	If needle is found as a substring of haystack, the index of the
 *	first instance of such a find is returned.  If needle is not present
 *	as a substring of haystack, -1 is returned.
 *
 * Side effects:
 *	needle and haystack may have their Tcl_ObjType changed.
 *
 *---------------------------------------------------------------------------
 */

int
TclStringFind(
    Tcl_Obj *needle,
    Tcl_Obj *haystack,
    unsigned int start)
{
    int lh, ln = Tcl_GetCharLength(needle);

    if (ln == 0) {
	/*
	 * 	We don't find empty substrings.  Bizarre!
	 *
	 * 	TODO: When we one day make this a true substring
	 * 	finder, change this to "return 0"
	 */
	return -1;
    }

    if (TclIsPureByteArray(needle) && TclIsPureByteArray(haystack)) {
	unsigned char *end, *try, *bh;
	unsigned char *bn = Tcl_GetByteArrayFromObj(needle, &ln);

	bh = Tcl_GetByteArrayFromObj(haystack, &lh);
	end = bh + lh;

	try = bh + start;
	while (try + ln <= end) {
	    try = memchr(try, bn[0], end - try);

	    if (try == NULL) {
		return -1;
	    }
	    if (0 == memcmp(try+1, bn+1, ln-1)) {
		return (try - bh);
	    }
	    try++;
	}
	return -1;
    }

    lh = Tcl_GetCharLength(haystack);
    if (haystack->bytes && (lh == haystack->length)) {
	/* haystack is all single-byte chars */

	if (needle->bytes && (ln == needle->length)) {
	    /* needle is also all single-byte chars */
	    char *found = strstr(haystack->bytes + start, needle->bytes);

	    if (found) {
		return (found - haystack->bytes);
	    } else {
		return -1;
	    }
	} else {
	    /*
	     * Cannot find substring with a multi-byte char inside
	     * a string with no multi-byte chars.
	     */
	    return -1;
	}
    } else {
	Tcl_UniChar *try, *end, *uh;
	Tcl_UniChar *un = Tcl_GetUnicodeFromObj(needle, &ln);

	uh = Tcl_GetUnicodeFromObj(haystack, &lh);
	end = uh + lh;

	try = uh + start;
	while (try + ln <= end) {
	    if ((*try == *un)
		    && (0 == memcmp(try+1, un+1, (ln-1)*sizeof(Tcl_UniChar)))) {
		return (try - uh);
	    }
	    try++;
	}
	return -1;
    }
}

/*
 *---------------------------------------------------------------------------
 *
 * TclStringObjReverse --
 *
 *	Implements the [string reverse] operation.