Tcl Source Code

Check-in [66c30c4369]
Login
Bounty program for improvements to Tcl and certain Tcl packages.
Tcl 2019 Conference, Houston/TX, US, Nov 4-8
Send your abstracts to [email protected]
or submit via the online form by Sep 9.

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Proposed solution for 3613609: lsort -nocase does not sort non-ASCII correctly
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | bug-3613609
Files: files | file ages | folders
SHA1: 66c30c43690c5f6d5cc5776422f654515dde88b7
User & Date: jan.nijtmans 2013-05-21 09:27:15
Context
2013-05-21
09:38
Slight improvement: if cs = "\xC0\x80" and ct = "\x00", loop would continue after NUL-byte, this sho... check-in: a765f37f78 user: jan.nijtmans tags: bug-3613609
09:27
Proposed solution for 3613609: lsort -nocase does not sort non-ASCII correctly check-in: 66c30c4369 user: jan.nijtmans tags: bug-3613609
2013-05-19
20:26
Don't #define VOID on VxWorks, as it is already typdef'd to void. Eliminate possibly conflicting LOC... check-in: 157acf6411 user: jan.nijtmans tags: core-8-5-branch
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to generic/tclCmdIL.c.

2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
....
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
....
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
....
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
....
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
	case LSEARCH_INLINE:		/* -inline */
	    inlineReturn = 1;
	    break;
	case LSEARCH_INTEGER:		/* -integer */
	    dataType = INTEGER;
	    break;
	case LSEARCH_NOCASE:		/* -nocase */
	    strCmpFn = strcasecmp;
	    noCase = 1;
	    break;
	case LSEARCH_NOT:		/* -not */
	    negatedMatch = 1;
	    break;
	case LSEARCH_REAL:		/* -real */
	    dataType = REAL;
................................................................................
		    if (length == elemLen) {
			/*
			 * This split allows for more optimal compilation of
			 * memcmp/strcasecmp.
			 */

			if (noCase) {
			    match = (strcasecmp(bytes, patternBytes) == 0);
			} else {
			    match = (memcmp(bytes, patternBytes,
				    (size_t) length) == 0);
			}
		    }
		    break;

................................................................................
	} else if (sortMode == SORTMODE_INTEGER) {
	    long a;
	    if (TclGetLongFromObj(sortInfo.interp, indexPtr, &a) != TCL_OK) {
		sortInfo.resultCode = TCL_ERROR;
		goto done1;
	    }
	    elementArray[i].index.intValue = a;
	} else if (sortInfo.sortMode == SORTMODE_REAL) {
	    double a;
	    if (Tcl_GetDoubleFromObj(sortInfo.interp, indexPtr, &a) != TCL_OK) {
		sortInfo.resultCode = TCL_ERROR;
		goto done1;
	    }
	    elementArray[i].index.doubleValue = a;
	} else {
................................................................................
	Tcl_SetObjResult(interp, resultPtr);
    }

  done1:
    ckfree((char *)elementArray);

  done:
    if (sortInfo.sortMode == SORTMODE_COMMAND) {
	TclDecrRefCount(sortInfo.compareCmdPtr);
	TclDecrRefCount(listObj);
	sortInfo.compareCmdPtr = NULL;
    }
    if (sortInfo.indexc > 1) {
	ckfree((char *) sortInfo.indexv);
    }
................................................................................
{
    int order = 0;

    if (infoPtr->sortMode == SORTMODE_ASCII) {
	order = strcmp(elemPtr1->index.strValuePtr,
		elemPtr2->index.strValuePtr);
    } else if (infoPtr->sortMode == SORTMODE_ASCII_NC) {
	order = strcasecmp(elemPtr1->index.strValuePtr,
		elemPtr2->index.strValuePtr);
    } else if (infoPtr->sortMode == SORTMODE_DICTIONARY) {
	order = DictionaryCompare(elemPtr1->index.strValuePtr,
		elemPtr2->index.strValuePtr);
    } else if (infoPtr->sortMode == SORTMODE_INTEGER) {
	long a, b;







|







 







|







 







|







 







|







 







|







2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
....
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
....
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
....
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
....
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
	case LSEARCH_INLINE:		/* -inline */
	    inlineReturn = 1;
	    break;
	case LSEARCH_INTEGER:		/* -integer */
	    dataType = INTEGER;
	    break;
	case LSEARCH_NOCASE:		/* -nocase */
	    strCmpFn = TclUtfCasecmp;
	    noCase = 1;
	    break;
	case LSEARCH_NOT:		/* -not */
	    negatedMatch = 1;
	    break;
	case LSEARCH_REAL:		/* -real */
	    dataType = REAL;
................................................................................
		    if (length == elemLen) {
			/*
			 * This split allows for more optimal compilation of
			 * memcmp/strcasecmp.
			 */

			if (noCase) {
			    match = (TclUtfCasecmp(bytes, patternBytes) == 0);
			} else {
			    match = (memcmp(bytes, patternBytes,
				    (size_t) length) == 0);
			}
		    }
		    break;

................................................................................
	} else if (sortMode == SORTMODE_INTEGER) {
	    long a;
	    if (TclGetLongFromObj(sortInfo.interp, indexPtr, &a) != TCL_OK) {
		sortInfo.resultCode = TCL_ERROR;
		goto done1;
	    }
	    elementArray[i].index.intValue = a;
	} else if (sortMode == SORTMODE_REAL) {
	    double a;
	    if (Tcl_GetDoubleFromObj(sortInfo.interp, indexPtr, &a) != TCL_OK) {
		sortInfo.resultCode = TCL_ERROR;
		goto done1;
	    }
	    elementArray[i].index.doubleValue = a;
	} else {
................................................................................
	Tcl_SetObjResult(interp, resultPtr);
    }

  done1:
    ckfree((char *)elementArray);

  done:
    if (sortMode == SORTMODE_COMMAND) {
	TclDecrRefCount(sortInfo.compareCmdPtr);
	TclDecrRefCount(listObj);
	sortInfo.compareCmdPtr = NULL;
    }
    if (sortInfo.indexc > 1) {
	ckfree((char *) sortInfo.indexv);
    }
................................................................................
{
    int order = 0;

    if (infoPtr->sortMode == SORTMODE_ASCII) {
	order = strcmp(elemPtr1->index.strValuePtr,
		elemPtr2->index.strValuePtr);
    } else if (infoPtr->sortMode == SORTMODE_ASCII_NC) {
	order = TclUtfCasecmp(elemPtr1->index.strValuePtr,
		elemPtr2->index.strValuePtr);
    } else if (infoPtr->sortMode == SORTMODE_DICTIONARY) {
	order = DictionaryCompare(elemPtr1->index.strValuePtr,
		elemPtr2->index.strValuePtr);
    } else if (infoPtr->sortMode == SORTMODE_INTEGER) {
	long a, b;

Changes to generic/tclCmdMZ.c.

3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
	     * General options.
	     */

	case OPT_LAST:
	    i++;
	    goto finishedOptions;
	case OPT_NOCASE:
	    strCmpFn = strcasecmp;
	    noCase = 1;
	    break;

	    /*
	     * Handle the different switch mode options.
	     */







|







3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
	     * General options.
	     */

	case OPT_LAST:
	    i++;
	    goto finishedOptions;
	case OPT_NOCASE:
	    strCmpFn = TclUtfCasecmp;
	    noCase = 1;
	    break;

	    /*
	     * Handle the different switch mode options.
	     */

Changes to generic/tclInt.h.

2758
2759
2760
2761
2762
2763
2764

2765
2766
2767
2768
2769
2770
2771
MODULE_SCOPE void	TclpFinalizeThreadDataThread(void);
MODULE_SCOPE void	TclFinalizeThreadStorage(void);
#ifdef TCL_WIDE_CLICKS
MODULE_SCOPE Tcl_WideInt TclpGetWideClicks(void);
MODULE_SCOPE double	TclpWideClicksToNanoseconds(Tcl_WideInt clicks);
#endif
MODULE_SCOPE Tcl_Obj *	TclDisassembleByteCodeObj(Tcl_Obj *objPtr);


/*
 *----------------------------------------------------------------
 * Command procedures in the generic core:
 *----------------------------------------------------------------
 */







>







2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
MODULE_SCOPE void	TclpFinalizeThreadDataThread(void);
MODULE_SCOPE void	TclFinalizeThreadStorage(void);
#ifdef TCL_WIDE_CLICKS
MODULE_SCOPE Tcl_WideInt TclpGetWideClicks(void);
MODULE_SCOPE double	TclpWideClicksToNanoseconds(Tcl_WideInt clicks);
#endif
MODULE_SCOPE Tcl_Obj *	TclDisassembleByteCodeObj(Tcl_Obj *objPtr);
MODULE_SCOPE int TclUtfCasecmp(CONST char *cs, CONST char *ct);

/*
 *----------------------------------------------------------------
 * Command procedures in the generic core:
 *----------------------------------------------------------------
 */

Changes to generic/tclUtf.c.

1097
1098
1099
1100
1101
1102
1103



























1104
1105
1106
1107
1108
1109
1110
	    if (ch1 != ch2) {
		return (ch1 - ch2);
	    }
	}
    }
    return 0;
}



























 
/*
 *----------------------------------------------------------------------
 *
 * Tcl_UniCharToUpper --
 *
 *	Compute the uppercase equivalent of the given Unicode character.






>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
	    if (ch1 != ch2) {
		return (ch1 - ch2);
	    }
	}
    }
    return 0;
}


/* Replacement for strcasecmp in Tcl core, in places where UTF-8 should be handled. */
int
TclUtfCasecmp(
    CONST char *cs,		/* UTF string to compare to ct. */
    CONST char *ct)		/* UTF string cs is compared to. */
{
    Tcl_UniChar ch1, ch2;
    char c;

    do {

	/* If c == '\0', loop should end. */
   	c = *cs;

	cs += TclUtfToUniChar(cs, &ch1);
	ct += TclUtfToUniChar(ct, &ch2);
	if (ch1 != ch2) {
	    ch1 = Tcl_UniCharToLower(ch1);
	    ch2 = Tcl_UniCharToLower(ch2);
	    if (ch1 != ch2) break;
	}
    } while (c);
    return (ch1 - ch2);
}

 
/*
 *----------------------------------------------------------------------
 *
 * Tcl_UniCharToUpper --
 *
 *	Compute the uppercase equivalent of the given Unicode character.

Changes to tests/cmdIL.test.

390
391
392
393
394
395
396



397
398
399
400
401
402
403
} [list ! ` AA c CC]
test cmdIL-4.34 {SortCompare procedure, -ascii option with -nocase option} {
    lsort -ascii -nocase {d e c b a d35 d300 100 20}
} {100 20 a b c d d300 d35 e}
test cmdIL-4.35 {SortCompare procedure, -ascii option with -nocase option} {
    lsort -ascii -nocase {d E c B a D35 d300 100 20}
} {100 20 a B c d d300 D35 E}




test cmdIL-5.1 {lsort with list style index} {
    lsort -ascii -decreasing -index {0 1} {
	{{Jim Alpha} 20000410}
	{{Joe Bravo} 19990320}
	{{Jacky Charlie} 19390911}
    }






>
>
>







390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
} [list ! ` AA c CC]
test cmdIL-4.34 {SortCompare procedure, -ascii option with -nocase option} {
    lsort -ascii -nocase {d e c b a d35 d300 100 20}
} {100 20 a b c d d300 d35 e}
test cmdIL-4.35 {SortCompare procedure, -ascii option with -nocase option} {
    lsort -ascii -nocase {d E c B a D35 d300 100 20}
} {100 20 a B c d d300 D35 E}
test cmdIL-4.36 {SortCompare procedure, UTF-8 with -nocase option} {
    lsort -ascii -nocase [list \u101 \u100]
} [list \u101 \u100]

test cmdIL-5.1 {lsort with list style index} {
    lsort -ascii -decreasing -index {0 1} {
	{{Jim Alpha} 20000410}
	{{Joe Bravo} 19990320}
	{{Jacky Charlie} 19390911}
    }