Tcl Source Code

Check-in [30a02828fc]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Remove unicode toXXXX commands as no longer needed for confirming equivalence with string toXXXX
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | tip-726-plus
Files: files | file ages | folders
SHA3-256: 30a02828fce3100d0ed5cf6991b7b700afad9c1a01cb02025dd01e2d3df2ea5b
User & Date: apnadkarni 2025-07-25 17:55:29.201
Context
2025-07-26
05:00
Fix gcc warnings, mingw build and extraneous prototype defs used in debug check-in: ff495c9305 user: apnadkarni tags: tip-726-plus
2025-07-25
17:55
Remove unicode toXXXX commands as no longer needed for confirming equivalence with string toXXXX check-in: 30a02828fc user: apnadkarni tags: tip-726-plus
17:46
Eliminate use of tclUniData.c check-in: 1dcc7678c8 user: apnadkarni tags: tip-726-plus
Changes
Unified Diff Ignore Whitespace Patch
Changes to generic/tclCmdMZ.c.
5930
5931
5932
5933
5934
5935
5936
5937
5938
5939
5940
5941
5942
5943
5944
5945
5946
5947
5948
5949
5950
5951
5952
5953
5954
5955
5956
5957
5958
5959
5960
5961
5962
5963
5964
5965
5966
5967
5968
5969
5970
5971
5972
5973
5974
5975
5976
5977
5978
5979
5980
5981
5982
5983
5984
5985
5986
5987
5988
5989
5990
5991
5992
5993
5994
5995
5996
5997
5998
5999
6000
6001
6002
6003
6004
6005
6006
6007
6008
6009
6010
6011
6012
6013
6014
6015
6016
6017
6018
6019
6020
6021
6022
6023
6024
6025
6026
6027
6028
6029
6030
6031
6032
6033
6034
6035
6036
6037
6038
6039
6040
6041
6042
6043
6044
6045
6046
6047
6048
6049
6050
6051
6052
6053
6054
6055
6056
6057
6058
6059
6060
6061
6062
6063
6064
6065
6066
6067
6068
6069
6070
6071
6072
6073
6074
6075
6076
6077
6078
6079
6080
6081
6082
6083
6084
6085
6086
6087
6088
6089
6090
6091
6092
6093
6094
6095
6096
6097
6098
6099
6100
6101
6102
6103
6104
6105
6106
6107
6108
6109
6110
6111
6112
6113
6114
6115
6116
6117
6118
6119
6120
6121
6122
6123
6124
6125
6126
6127
6128
6129
6130
6131
6132
6133
6134
6135
6136
6137
6138
6139
6140
6141
6142
6143
6144
6145
6146
6147
6148
6149
6150
6151
6152
6153
6154
6155
6156
6157
6158
6159
6160
6161
6162
6163
6164
6165
6166
6167
6168
6169
6170
6171
6172
6173
6174
6175
6176
6177
6178
	    &ds) == NULL) {
	return TCL_ERROR;
    }

    Tcl_DStringResult(interp, &ds);
    return TCL_OK;
}

static int
TclUnicodeToUpperCmd(
    TCL_UNUSED(void *),
    Tcl_Interp *interp,		/* Current interpreter. */
    int objc,			/* Number of arguments. */
    Tcl_Obj *const objv[])	/* Argument objects. */
{
    Tcl_Size length1, length2;
    const char *string1;
    char *string2;

    if (objc < 2 || objc > 4) {
	Tcl_WrongNumArgs(interp, 1, objv, "string ?first? ?last?");
	return TCL_ERROR;
    }

    string1 = TclGetStringFromObj(objv[1], &length1);

    if (objc == 2) {
	Tcl_Obj *resultPtr = Tcl_NewStringObj(string1, length1);

	length1 = Tcl_UtfToUpper(TclGetString(resultPtr));
	Tcl_SetObjLength(resultPtr, length1);
	Tcl_SetObjResult(interp, resultPtr);
    } else {
	Tcl_Size first, last;
	const char *start, *end;
	Tcl_Obj *resultPtr;

	length1 = Tcl_NumUtfChars(string1, length1) - 1;
	if (TclGetIntForIndexM(interp,objv[2],length1, &first) != TCL_OK) {
	    return TCL_ERROR;
	}
	if (first < 0) {
	    first = 0;
	}
	last = first;

	if ((objc == 4) && (TclGetIntForIndexM(interp, objv[3], length1,
		&last) != TCL_OK)) {
	    return TCL_ERROR;
	}

	if (last >= length1) {
	    last = length1;
	}
	if (last < first) {
	    Tcl_SetObjResult(interp, objv[1]);
	    return TCL_OK;
	}

	string1 = TclGetStringFromObj(objv[1], &length1);
	start = Tcl_UtfAtIndex(string1, first);
	end = Tcl_UtfAtIndex(start, last - first + 1);
	resultPtr = Tcl_NewStringObj(string1, end - string1);
	string2 = TclGetString(resultPtr) + (start - string1);

	length2 = Tcl_UtfToUpper(string2);
	Tcl_SetObjLength(resultPtr, length2 + (start - string1));

	Tcl_AppendToObj(resultPtr, end, -1);
	Tcl_SetObjResult(interp, resultPtr);
    }

    return TCL_OK;
}

static int
TclUnicodeToLowerCmd(
    TCL_UNUSED(void *),
    Tcl_Interp *interp,		/* Current interpreter. */
    int objc,			/* Number of arguments. */
    Tcl_Obj *const objv[])	/* Argument objects. */
{
    Tcl_Size length1, length2;
    const char *string1;
    char *string2;

    if (objc < 2 || objc > 4) {
	Tcl_WrongNumArgs(interp, 1, objv, "string ?first? ?last?");
	return TCL_ERROR;
    }

    string1 = TclGetStringFromObj(objv[1], &length1);

    if (objc == 2) {
	Tcl_Obj *resultPtr = Tcl_NewStringObj(string1, length1);

	length1 = Tcl_UtfToLower(TclGetString(resultPtr));
	Tcl_SetObjLength(resultPtr, length1);
	Tcl_SetObjResult(interp, resultPtr);
    } else {
	Tcl_Size first, last;
	const char *start, *end;
	Tcl_Obj *resultPtr;

	length1 = Tcl_NumUtfChars(string1, length1) - 1;
	if (TclGetIntForIndexM(interp,objv[2],length1, &first) != TCL_OK) {
	    return TCL_ERROR;
	}
	if (first < 0) {
	    first = 0;
	}
	last = first;

	if ((objc == 4) && (TclGetIntForIndexM(interp, objv[3], length1,
		&last) != TCL_OK)) {
	    return TCL_ERROR;
	}

	if (last >= length1) {
	    last = length1;
	}
	if (last < first) {
	    Tcl_SetObjResult(interp, objv[1]);
	    return TCL_OK;
	}

	string1 = TclGetStringFromObj(objv[1], &length1);
	start = Tcl_UtfAtIndex(string1, first);
	end = Tcl_UtfAtIndex(start, last - first + 1);
	resultPtr = Tcl_NewStringObj(string1, end - string1);
	string2 = TclGetString(resultPtr) + (start - string1);

	length2 = Tcl_UtfToLower(string2);
	Tcl_SetObjLength(resultPtr, length2 + (start - string1));

	Tcl_AppendToObj(resultPtr, end, -1);
	Tcl_SetObjResult(interp, resultPtr);
    }

    return TCL_OK;
}

static int
TclUnicodeToTitleCmd(
    TCL_UNUSED(void *),
    Tcl_Interp *interp,		/* Current interpreter. */
    int objc,			/* Number of arguments. */
    Tcl_Obj *const objv[])	/* Argument objects. */
{
    Tcl_Size length1, length2;
    const char *string1;
    char *string2;
    Tcl_Size Tcl_Utf8procToTitle(char *str);

    if (objc < 2 || objc > 4) {
	Tcl_WrongNumArgs(interp, 1, objv, "string ?first? ?last?");
	return TCL_ERROR;
    }

    string1 = TclGetStringFromObj(objv[1], &length1);

    if (objc == 2) {
	Tcl_Obj *resultPtr = Tcl_NewStringObj(string1, length1);

	length1 = Tcl_UtfToTitle(TclGetString(resultPtr));
	Tcl_SetObjLength(resultPtr, length1);
	Tcl_SetObjResult(interp, resultPtr);
    } else {
	Tcl_Size first, last;
	const char *start, *end;
	Tcl_Obj *resultPtr;

	length1 = Tcl_NumUtfChars(string1, length1) - 1;
	if (TclGetIntForIndexM(interp,objv[2],length1, &first) != TCL_OK) {
	    return TCL_ERROR;
	}
	if (first < 0) {
	    first = 0;
	}
	last = first;

	if ((objc == 4) && (TclGetIntForIndexM(interp, objv[3], length1,
		&last) != TCL_OK)) {
	    return TCL_ERROR;
	}

	if (last >= length1) {
	    last = length1;
	}
	if (last < first) {
	    Tcl_SetObjResult(interp, objv[1]);
	    return TCL_OK;
	}

	string1 = TclGetStringFromObj(objv[1], &length1);
	start = Tcl_UtfAtIndex(string1, first);
	end = Tcl_UtfAtIndex(start, last - first + 1);
	resultPtr = Tcl_NewStringObj(string1, end - string1);
	string2 = TclGetString(resultPtr) + (start - string1);

	length2 = Tcl_UtfToTitle(string2);
	Tcl_SetObjLength(resultPtr, length2 + (start - string1));

	Tcl_AppendToObj(resultPtr, end, -1);
	Tcl_SetObjResult(interp, resultPtr);
    }

    return TCL_OK;
}

/*
 * TclInitUnicodeCmd --
 *
 *	This procedure creates the "unicode" Tcl ensemble command. See user
 *	documentation for details on implemented commands.
 *
 * Results:
 *	A standard Tcl result.
 *
 * Side effects:
 *	Stores the result in the interpreter result.
 */
Tcl_Command
TclInitUnicodeCmd(
    Tcl_Interp *interp)
{
    static const EnsembleImplMap unicodeImplMap[] = {
	{"is", TclUnicodeIsCmd, NULL, NULL, NULL, 0},
	{"category", TclUnicodeCategoryCmd, NULL, NULL, NULL, 0},
	{"tolower", TclUnicodeToLowerCmd, NULL, NULL, NULL, 0},
	{"tonfc", TclUnicodeNormalizeCmd, NULL, NULL, (void *)TCL_NFC, 0},
	{"tonfd", TclUnicodeNormalizeCmd, NULL, NULL, (void *)TCL_NFD, 0},
	{"tonfkc", TclUnicodeNormalizeCmd, NULL, NULL, (void *)TCL_NFKC, 0},
	{"tonfkd", TclUnicodeNormalizeCmd, NULL, NULL, (void *)TCL_NFKD, 0},
	{"totitle", TclUnicodeToTitleCmd, NULL, NULL, NULL, 0},
	{"toupper", TclUnicodeToUpperCmd, NULL, NULL, NULL, 0},
	{NULL, NULL, NULL, NULL, NULL, 0}
    };
    return TclMakeEnsemble(interp, "unicode", unicodeImplMap);
}


/*
 * Local Variables:
 * mode: c
 * c-basic-offset: 4
 * fill-column: 78
 * End:
 */







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<




















<




<
<













5930
5931
5932
5933
5934
5935
5936










































































































































































































5937
5938
5939
5940
5941
5942
5943
5944
5945
5946
5947
5948
5949
5950
5951
5952
5953
5954
5955
5956

5957
5958
5959
5960


5961
5962
5963
5964
5965
5966
5967
5968
5969
5970
5971
5972
5973
	    &ds) == NULL) {
	return TCL_ERROR;
    }

    Tcl_DStringResult(interp, &ds);
    return TCL_OK;
}











































































































































































































/*
 * TclInitUnicodeCmd --
 *
 *	This procedure creates the "unicode" Tcl ensemble command. See user
 *	documentation for details on implemented commands.
 *
 * Results:
 *	A standard Tcl result.
 *
 * Side effects:
 *	Stores the result in the interpreter result.
 */
Tcl_Command
TclInitUnicodeCmd(
    Tcl_Interp *interp)
{
    static const EnsembleImplMap unicodeImplMap[] = {
	{"is", TclUnicodeIsCmd, NULL, NULL, NULL, 0},
	{"category", TclUnicodeCategoryCmd, NULL, NULL, NULL, 0},

	{"tonfc", TclUnicodeNormalizeCmd, NULL, NULL, (void *)TCL_NFC, 0},
	{"tonfd", TclUnicodeNormalizeCmd, NULL, NULL, (void *)TCL_NFD, 0},
	{"tonfkc", TclUnicodeNormalizeCmd, NULL, NULL, (void *)TCL_NFKC, 0},
	{"tonfkd", TclUnicodeNormalizeCmd, NULL, NULL, (void *)TCL_NFKD, 0},


	{NULL, NULL, NULL, NULL, NULL, 0}
    };
    return TclMakeEnsemble(interp, "unicode", unicodeImplMap);
}


/*
 * Local Variables:
 * mode: c
 * c-basic-offset: 4
 * fill-column: 78
 * End:
 */
Changes to tests/unicodeProperties.test.
115
116
117
118
119
120
121

122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142

    foreach class {alpha alnum control digit graph lower print space upper wordchar} {
    	test string-vs-unicode-is-$class "string is $class vs unicode" -body {
            testStringUnicodeCompatibility $class
		} -result "is $class mismatches:"
    }


	proc testStringUnicodeCaseConvertCompatibility {tocase} {
        set mismatches "$tocase mismatches:"
        foreach codePoint [lseq 0 $::tcltests::ucd::maxCodepoint] {
            set ch [format %c $codePoint]
            if {[string $tocase $ch] != [unicode $tocase $ch]} {
                append mismatches " " U+[format %x $codePoint]
            break
            }
        }
        return $mismatches
    }
    foreach tocase {tolower toupper totitle} {
    	test string-vs-unicode-$tocase "string $tocase vs unicode" -body {
            testStringUnicodeCaseConvertCompatibility $tocase
		} -result "$tocase mismatches:"
    }
}

::tcltest::cleanupTests
namespace delete unicode::test
return







>














|






115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143

    foreach class {alpha alnum control digit graph lower print space upper wordchar} {
    	test string-vs-unicode-is-$class "string is $class vs unicode" -body {
            testStringUnicodeCompatibility $class
		} -result "is $class mismatches:"
    }

    tcltest::testConstraint haveUnicodeToCmds [expr {![catch {unicode tolower A}]}]
	proc testStringUnicodeCaseConvertCompatibility {tocase} {
        set mismatches "$tocase mismatches:"
        foreach codePoint [lseq 0 $::tcltests::ucd::maxCodepoint] {
            set ch [format %c $codePoint]
            if {[string $tocase $ch] != [unicode $tocase $ch]} {
                append mismatches " " U+[format %x $codePoint]
            break
            }
        }
        return $mismatches
    }
    foreach tocase {tolower toupper totitle} {
    	test string-vs-unicode-$tocase "string $tocase vs unicode" -body {
            testStringUnicodeCaseConvertCompatibility $tocase
		} -constraints haveUnicodeToCmds -result "$tocase mismatches:"
    }
}

::tcltest::cleanupTests
namespace delete unicode::test
return