Index: generic/tclCmdMZ.c ================================================================== --- generic/tclCmdMZ.c +++ generic/tclCmdMZ.c @@ -1174,95 +1174,35 @@ ClientData dummy, /* Not used. */ Tcl_Interp *interp, /* Current interpreter. */ int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ { - Tcl_UniChar *needleStr, *haystackStr; - int match, start, needleLen, haystackLen; + int start = 0; if (objc < 3 || objc > 4) { Tcl_WrongNumArgs(interp, 1, objv, "needleString haystackString ?startIndex?"); return TCL_ERROR; } - /* - * We are searching haystackStr for the sequence needleStr. - */ - - match = -1; - start = 0; - haystackLen = -1; - - needleStr = Tcl_GetUnicodeFromObj(objv[1], &needleLen); - haystackStr = Tcl_GetUnicodeFromObj(objv[2], &haystackLen); - - if (objc == 4) { - /* - * If a startIndex is specified, we will need to fast forward to that - * point in the string before we think about a match. - */ - - if (TclGetIntForIndexM(interp, objv[3], haystackLen-1, - &start) != TCL_OK){ - return TCL_ERROR; - } - - /* - * Reread to prevent shimmering problems. - */ - - needleStr = Tcl_GetUnicodeFromObj(objv[1], &needleLen); - haystackStr = Tcl_GetUnicodeFromObj(objv[2], &haystackLen); - - if (start >= haystackLen) { - goto str_first_done; - } else if (start > 0) { - haystackStr += start; - haystackLen -= start; - } else if (start < 0) { - /* - * Invalid start index mapped to string start; Bug #423581 - */ - - start = 0; - } - } - - /* - * If the length of the needle is more than the length of the haystack, it - * cannot be contained in there so we can avoid searching. [Bug 2960021] - */ - - if (needleLen > 0 && needleLen <= haystackLen) { - register Tcl_UniChar *p, *end; - - end = haystackStr + haystackLen - needleLen + 1; - for (p = haystackStr; p < end; p++) { - /* - * Scan forward to find the first character. - */ - - if ((*p == *needleStr) && (memcmp(needleStr, p, - sizeof(Tcl_UniChar) * (size_t)needleLen) == 0)) { - match = p - haystackStr; - break; - } - } - } - - /* - * Compute the character index of the matching string by counting the - * number of characters before the match. - */ - - if ((match != -1) && (objc == 4)) { - match += start; - } - - str_first_done: - Tcl_SetObjResult(interp, Tcl_NewIntObj(match)); + if (objc == 4) { + int size = Tcl_GetCharLength(objv[2]); + + if (TCL_OK != TclGetIntForIndexM(interp, objv[3], size - 1, &start)) { + return TCL_ERROR; + } + + if (start < 0) { + start = 0; + } + if (start >= size) { + Tcl_SetObjResult(interp, Tcl_NewIntObj(-1)); + return TCL_OK; + } + } + Tcl_SetObjResult(interp, Tcl_NewIntObj(TclStringFind(objv[1], + objv[2], start))); return TCL_OK; } /* *---------------------------------------------------------------------- Index: generic/tclExecute.c ================================================================== --- generic/tclExecute.c +++ generic/tclExecute.c @@ -5718,10 +5718,13 @@ TRACE_WITH_OBJ(("%.20s %.20s %.20s => ", O2S(value2Ptr), O2S(value3Ptr), O2S(valuePtr)), objResultPtr); NEXT_INST_V(1, 3, 1); case INST_STR_FIND: +#if 1 + match = TclStringFind(OBJ_UNDER_TOS, OBJ_AT_TOS, 0); +#else ustring1 = Tcl_GetUnicodeFromObj(OBJ_AT_TOS, &length); /* Haystack */ ustring2 = Tcl_GetUnicodeFromObj(OBJ_UNDER_TOS, &length2);/* Needle */ match = -1; if (length2 > 0 && length2 <= length) { @@ -5732,10 +5735,11 @@ match = p - ustring1; break; } } } +#endif TRACE(("%.20s %.20s => %d\n", O2S(OBJ_UNDER_TOS), O2S(OBJ_AT_TOS), match)); TclNewIntObj(objResultPtr, match); NEXT_INST_F(1, 2, 1); Index: generic/tclInt.h ================================================================== --- generic/tclInt.h +++ generic/tclInt.h @@ -3136,10 +3136,12 @@ MODULE_SCOPE void * TclStackRealloc(Tcl_Interp *interp, void *ptr, int numBytes); MODULE_SCOPE int TclStringCatObjv(Tcl_Interp *interp, int inPlace, int objc, Tcl_Obj *const objv[], Tcl_Obj **objPtrPtr); +MODULE_SCOPE int TclStringFind(Tcl_Obj *needle, Tcl_Obj *haystack, + unsigned int start); MODULE_SCOPE int TclStringMatch(const char *str, int strLen, const char *pattern, int ptnLen, int flags); MODULE_SCOPE int TclStringMatchObj(Tcl_Obj *stringObj, Tcl_Obj *patternObj, int flags); MODULE_SCOPE Tcl_Obj * TclStringObjReverse(Tcl_Obj *objPtr); Index: generic/tclStringObj.c ================================================================== --- generic/tclStringObj.c +++ generic/tclStringObj.c @@ -2835,10 +2835,107 @@ } } *objPtrPtr = objResultPtr; return TCL_OK; } + +/* + *--------------------------------------------------------------------------- + * + * TclStringFind -- + * + * Implements the [string first] operation. + * + * Results: + * If needle is found as a substring of haystack, the index of the + * first instance of such a find is returned. If needle is not present + * as a substring of haystack, -1 is returned. + * + * Side effects: + * needle and haystack may have their Tcl_ObjType changed. + * + *--------------------------------------------------------------------------- + */ + +int +TclStringFind( + Tcl_Obj *needle, + Tcl_Obj *haystack, + unsigned int start) +{ + int lh, ln = Tcl_GetCharLength(needle); + + if (ln == 0) { + /* + * We don't find empty substrings. Bizarre! + * + * TODO: When we one day make this a true substring + * finder, change this to "return 0" + */ + return -1; + } + + if (TclIsPureByteArray(needle) && TclIsPureByteArray(haystack)) { + unsigned char *end, *try, *bh; + unsigned char *bn = Tcl_GetByteArrayFromObj(needle, &ln); + + bh = Tcl_GetByteArrayFromObj(haystack, &lh); + end = bh + lh; + + try = bh + start; + while (try + ln <= end) { + try = memchr(try, bn[0], end - try); + + if (try == NULL) { + return -1; + } + if (0 == memcmp(try+1, bn+1, ln-1)) { + return (try - bh); + } + try++; + } + return -1; + } + + lh = Tcl_GetCharLength(haystack); + if (haystack->bytes && (lh == haystack->length)) { + /* haystack is all single-byte chars */ + + if (needle->bytes && (ln == needle->length)) { + /* needle is also all single-byte chars */ + char *found = strstr(haystack->bytes + start, needle->bytes); + + if (found) { + return (found - haystack->bytes); + } else { + return -1; + } + } else { + /* + * Cannot find substring with a multi-byte char inside + * a string with no multi-byte chars. + */ + return -1; + } + } else { + Tcl_UniChar *try, *end, *uh; + Tcl_UniChar *un = Tcl_GetUnicodeFromObj(needle, &ln); + + uh = Tcl_GetUnicodeFromObj(haystack, &lh); + end = uh + lh; + + try = uh + start; + while (try + ln <= end) { + if ((*try == *un) + && (0 == memcmp(try+1, un+1, (ln-1)*sizeof(Tcl_UniChar)))) { + return (try - uh); + } + try++; + } + return -1; + } +} /* *--------------------------------------------------------------------------- * * TclStringObjReverse --