Tcl Source Code

Check-in [0ac59eb0c6]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add 4 new encodings, and add documentation.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | utf-max
Files: files | file ages | folders
SHA3-256: 0ac59eb0c63897653ca413a858d1ab1f93b831f5c5e69a94b0af3e3668941647
User & Date: jan.nijtmans 2019-03-18 20:07:15.980
Context
2019-03-20
22:54
Merge 8.7 check-in: 3ea5d3e8a3 user: jan.nijtmans tags: utf-max
2019-03-18
20:07
Add 4 new encodings, and add documentation. check-in: 0ac59eb0c6 user: jan.nijtmans tags: utf-max
2019-03-17
22:16
For Tcl >= 8.7, always compile-in the extended Unicode tables, no matter the value of TCL_UTF_MAX. D... check-in: 82477e9d3a user: jan.nijtmans tags: core-8-branch
22:01
More WIP. Seems to be *almost* working. check-in: ab13cbd74c user: jan.nijtmans tags: utf-max
Changes
Unified Diff Ignore Whitespace Patch
Changes to doc/Utf.3.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23



24
25
26



27
28
29



30
31
32



33
34
35
36
37
38






39
40
41



42
43
44
45
46
47
48
'\"
'\" Copyright (c) 1997 Sun Microsystems, Inc.
'\"
'\" See the file "license.terms" for information on usage and redistribution
'\" of this file, and for a DISCLAIMER OF ALL WARRANTIES.
'\"
.TH Utf 3 "8.1" Tcl "Tcl Library Procedures"
.so man.macros
.BS
.SH NAME
Tcl_UniChar, Tcl_UniCharToUtf, Tcl_UtfToUniChar, Tcl_UniCharToUtfDString, Tcl_UtfToUniCharDString, Tcl_UniCharLen, Tcl_UniCharNcmp, Tcl_UniCharNcasecmp, Tcl_UniCharCaseMatch, Tcl_UtfNcmp, Tcl_UtfNcasecmp, Tcl_UtfCharComplete, Tcl_NumUtfChars, Tcl_UtfFindFirst, Tcl_UtfFindLast, Tcl_UtfNext, Tcl_UtfPrev, Tcl_UniCharAtIndex, Tcl_UtfAtIndex, Tcl_UtfBackslash \- routines for manipulating UTF-8 strings
.SH SYNOPSIS
.nf
\fB#include <tcl.h>\fR
.sp
typedef ... \fBTcl_UniChar\fR;
.sp
int
\fBTcl_UniCharToUtf\fR(\fIch, buf\fR)
.sp
int
\fBTcl_UtfToUniChar\fR(\fIsrc, chPtr\fR)
.sp



char *
\fBTcl_UniCharToUtfDString\fR(\fIuniStr, uniLength, dsPtr\fR)
.sp



Tcl_UniChar *
\fBTcl_UtfToUniCharDString\fR(\fIsrc, length, dsPtr\fR)
.sp



int
\fBTcl_UniCharLen\fR(\fIuniStr\fR)
.sp



int
\fBTcl_UniCharNcmp\fR(\fIucs, uct, numChars\fR)
.sp
int
\fBTcl_UniCharNcasecmp\fR(\fIucs, uct, numChars\fR)
.sp






int
\fBTcl_UniCharCaseMatch\fR(\fIuniStr, uniPattern, nocase\fR)
.sp



int
\fBTcl_UtfNcmp\fR(\fIcs, ct, numChars\fR)
.sp
int
\fBTcl_UtfNcasecmp\fR(\fIcs, ct, numChars\fR)
.sp
int










|












>
>
>



>
>
>



>
>
>



>
>
>






>
>
>
>
>
>



>
>
>







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
'\"
'\" Copyright (c) 1997 Sun Microsystems, Inc.
'\"
'\" See the file "license.terms" for information on usage and redistribution
'\" of this file, and for a DISCLAIMER OF ALL WARRANTIES.
'\"
.TH Utf 3 "8.1" Tcl "Tcl Library Procedures"
.so man.macros
.BS
.SH NAME
Tcl_UniChar, Tcl_UniCharToUtf, Tcl_UtfToUniChar, Tcl_UtfToUtf16, Tcl_UniCharToUtfDString, Tcl_UtfToUniCharDString, Tcl_Utf16ToUtfDString, Tcl_UtfToUtf16DString, Tcl_UniCharLen, Tcl_Utf16Len, Tcl_UniCharNcmp, Tcl_UniCharNcasecmp, Tcl_Utf16Ncmp, Tcl_Utf16Ncasecmp, Tcl_UniCharCaseMatch, Tcl_Utf16CaseMatch, Tcl_UtfNcmp, Tcl_UtfNcasecmp, Tcl_UtfCharComplete, Tcl_NumUtfChars, Tcl_UtfFindFirst, Tcl_UtfFindLast, Tcl_UtfNext, Tcl_UtfPrev, Tcl_UniCharAtIndex, Tcl_UtfAtIndex, Tcl_UtfBackslash \- routines for manipulating UTF-8 strings
.SH SYNOPSIS
.nf
\fB#include <tcl.h>\fR
.sp
typedef ... \fBTcl_UniChar\fR;
.sp
int
\fBTcl_UniCharToUtf\fR(\fIch, buf\fR)
.sp
int
\fBTcl_UtfToUniChar\fR(\fIsrc, chPtr\fR)
.sp
int
\fBTcl_UtfToUtf16\fR(\fIsrc, utf16Ptr\fR)
.sp
char *
\fBTcl_UniCharToUtfDString\fR(\fIuniStr, uniLength, dsPtr\fR)
.sp
char *
\fBTcl_Utf16ToUtfDString\fR(\fIutf16Str, uniLength, dsPtr\fR)
.sp
Tcl_UniChar *
\fBTcl_UtfToUniCharDString\fR(\fIsrc, length, dsPtr\fR)
.sp
unsigned short *
\fBTcl_UtfToUtf16DString\fR(\fIsrc, length, dsPtr\fR)
.sp
int
\fBTcl_UniCharLen\fR(\fIuniStr\fR)
.sp
int
\fBTcl_Utf16Len\fR(\fIutf16Str\fR)
.sp
int
\fBTcl_UniCharNcmp\fR(\fIucs, uct, numChars\fR)
.sp
int
\fBTcl_UniCharNcasecmp\fR(\fIucs, uct, numChars\fR)
.sp
int
\fBTcl_Utf16Ncmp\fR(\fIutf16s, tf16t, numChars\fR)
.sp
int
\fBTcl_Utf16Ncasecmp\fR(\fIutf16s, utf16t, numChars\fR)
.sp
int
\fBTcl_UniCharCaseMatch\fR(\fIuniStr, uniPattern, nocase\fR)
.sp
int
\fBTcl_Utf16CaseMatch\fR(\fIutf16Str, utf16Pattern, nocase\fR)
.sp
int
\fBTcl_UtfNcmp\fR(\fIcs, ct, numChars\fR)
.sp
int
\fBTcl_UtfNcasecmp\fR(\fIcs, ct, numChars\fR)
.sp
int
76
77
78
79
80
81
82


83
84
85
86
87
88
89
90
91
92
93
94
95
96








97
98
99
100
101
102
103
.AP char *buf out
Buffer in which the UTF-8 representation of the Tcl_UniChar is stored.  At most
\fBTCL_UTF_MAX\fR bytes are stored in the buffer.
.AP int ch in
The Unicode character to be converted or examined.
.AP Tcl_UniChar *chPtr out
Filled with the Tcl_UniChar represented by the head of the UTF-8 string.


.AP "const char" *src in
Pointer to a UTF-8 string.
.AP "const char" *cs in
Pointer to a UTF-8 string.
.AP "const char" *ct in
Pointer to a UTF-8 string.
.AP "const Tcl_UniChar" *uniStr in
A null-terminated Unicode string.
.AP "const Tcl_UniChar" *ucs in
A null-terminated Unicode string.
.AP "const Tcl_UniChar" *uct in
A null-terminated Unicode string.
.AP "const Tcl_UniChar" *uniPattern in
A null-terminated Unicode string.








.AP int length in
The length of the UTF-8 string in bytes (not UTF-8 characters).  If
negative, all bytes up to the first null byte are used.
.AP int uniLength in
The length of the Unicode string in characters.  Must be greater than or
equal to 0.
.AP "Tcl_DString" *dsPtr in/out







>
>














>
>
>
>
>
>
>
>







97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
.AP char *buf out
Buffer in which the UTF-8 representation of the Tcl_UniChar is stored.  At most
\fBTCL_UTF_MAX\fR bytes are stored in the buffer.
.AP int ch in
The Unicode character to be converted or examined.
.AP Tcl_UniChar *chPtr out
Filled with the Tcl_UniChar represented by the head of the UTF-8 string.
.AP unsigned short *utf16Ptr out
Filled with the utf-16 represented by the head of the UTF-8 string.
.AP "const char" *src in
Pointer to a UTF-8 string.
.AP "const char" *cs in
Pointer to a UTF-8 string.
.AP "const char" *ct in
Pointer to a UTF-8 string.
.AP "const Tcl_UniChar" *uniStr in
A null-terminated Unicode string.
.AP "const Tcl_UniChar" *ucs in
A null-terminated Unicode string.
.AP "const Tcl_UniChar" *uct in
A null-terminated Unicode string.
.AP "const Tcl_UniChar" *uniPattern in
A null-terminated Unicode string.
.AP "const unsigned short" *utf16Str in
A null-terminated utf-16 string.
.AP "const unsigned short" *utf16s in
A null-terminated utf-16 string.
.AP "const unsigned short" *utf16t in
A null-terminated utf-16 string.
.AP "const unsigned short" *utf16Pattern in
A null-terminated utf-16 string.
.AP int length in
The length of the UTF-8 string in bytes (not UTF-8 characters).  If
negative, all bytes up to the first null byte are used.
.AP int uniLength in
The length of the Unicode string in characters.  Must be greater than or
equal to 0.
.AP "Tcl_DString" *dsPtr in/out
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138

139
140
141
142
143
144
145
.AP int nocase in
Specifies whether the match should be done case-sensitive (0) or
case-insensitive (1).
.BE

.SH DESCRIPTION
.PP
These routines convert between UTF-8 strings and Unicode characters.  An
Unicode character represented as an unsigned, fixed-size
quantity.  A UTF-8 character is a Unicode character represented as
a varying-length sequence of up to \fBTCL_UTF_MAX\fR bytes.  A multibyte UTF-8
sequence consists of a lead byte followed by some number of trail bytes.
.PP
\fBTCL_UTF_MAX\fR is the maximum number of bytes that it takes to
represent one Unicode character in the UTF-8 representation.
.PP
\fBTcl_UniCharToUtf\fR stores the character \fIch\fR as a UTF-8 string
in starting at \fIbuf\fR.  The return value is the number of bytes stored
in \fIbuf\fR. If ch is a high surrogate (range U+D800 - U+DBFF), then
the return value will be 0 and nothing will be stored. If you still
want to produce UTF-8 output for it (even though knowing it's an illegal
code-point on its own), just call \fBTcl_UniCharToUtf\fR again using ch = -1.

.PP
\fBTcl_UtfToUniChar\fR reads one UTF-8 character starting at \fIsrc\fR
and stores it as a Tcl_UniChar in \fI*chPtr\fR.  The return value is the
number of bytes read from \fIsrc\fR.  The caller must ensure that the
source buffer is long enough such that this routine does not run off the
end and dereference non-existent or random memory; if the source buffer
is known to be null-terminated, this will not happen.  If the input is







|
|










|
|
|
>







148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
.AP int nocase in
Specifies whether the match should be done case-sensitive (0) or
case-insensitive (1).
.BE

.SH DESCRIPTION
.PP
These routines convert between UTF-8 strings and Unicode/Utf-16 characters.
An Unicode character represented as an unsigned, fixed-size
quantity.  A UTF-8 character is a Unicode character represented as
a varying-length sequence of up to \fBTCL_UTF_MAX\fR bytes.  A multibyte UTF-8
sequence consists of a lead byte followed by some number of trail bytes.
.PP
\fBTCL_UTF_MAX\fR is the maximum number of bytes that it takes to
represent one Unicode character in the UTF-8 representation.
.PP
\fBTcl_UniCharToUtf\fR stores the character \fIch\fR as a UTF-8 string
in starting at \fIbuf\fR.  The return value is the number of bytes stored
in \fIbuf\fR. If ch is a high surrogate (range U+D800 - U+DBFF), then
the return value will be 1 and a single byte in the range 0xF0 - 0xF4
will be stored. If you still want to produce UTF-8 output for it (even
though knowing it's an illegal code-point on its own), just call
\fBTcl_UniCharToUtf\fR again specifying ch = -1.
.PP
\fBTcl_UtfToUniChar\fR reads one UTF-8 character starting at \fIsrc\fR
and stores it as a Tcl_UniChar in \fI*chPtr\fR.  The return value is the
number of bytes read from \fIsrc\fR.  The caller must ensure that the
source buffer is long enough such that this routine does not run off the
end and dereference non-existent or random memory; if the source buffer
is known to be null-terminated, this will not happen.  If the input is
183
184
185
186
187
188
189





190
191
192
193
194
195
196
is the Unicode case insensitive version.
.PP
\fBTcl_UniCharCaseMatch\fR is the Unicode equivalent to
\fBTcl_StringCaseMatch\fR.  It accepts a null-terminated Unicode string,
a Unicode pattern, and a boolean value specifying whether the match should
be case sensitive and returns whether the string matches the pattern.
.PP





\fBTcl_UtfNcmp\fR corresponds to \fBstrncmp\fR for UTF-8 strings. It
accepts two null-terminated UTF-8 strings and the number of characters
to compare.  (Both strings are assumed to be at least \fInumChars\fR
characters long.)  \fBTcl_UtfNcmp\fR compares the two strings
character-by-character according to the Unicode character ordering.
It returns an integer greater than, equal to, or less than 0 if the
first string is greater than, equal to, or less than the second string







>
>
>
>
>







215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
is the Unicode case insensitive version.
.PP
\fBTcl_UniCharCaseMatch\fR is the Unicode equivalent to
\fBTcl_StringCaseMatch\fR.  It accepts a null-terminated Unicode string,
a Unicode pattern, and a boolean value specifying whether the match should
be case sensitive and returns whether the string matches the pattern.
.PP
\fBTcl_Utf16CaseMatch\fR is the utf-16 equivalent to
\fBTcl_StringCaseMatch\fR.  It accepts a null-terminated utf-16 string,
a utf-16 pattern, and a boolean value specifying whether the match should
be case sensitive and returns whether the string matches the pattern.
.PP
\fBTcl_UtfNcmp\fR corresponds to \fBstrncmp\fR for UTF-8 strings. It
accepts two null-terminated UTF-8 strings and the number of characters
to compare.  (Both strings are assumed to be at least \fInumChars\fR
characters long.)  \fBTcl_UtfNcmp\fR compares the two strings
character-by-character according to the Unicode character ordering.
It returns an integer greater than, equal to, or less than 0 if the
first string is greater than, equal to, or less than the second string
Changes to generic/regc_locale.c.
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
    NOTE(REG_ULOCALE);

    /*
     * Search table.
     */

    Tcl_DStringInit(&ds);
    np = Tcl_UniCharToUtfDString(startp, (int)len, &ds);
    for (cn=cnames; cn->name!=NULL; cn++) {
	if (strlen(cn->name)==len && strncmp(cn->name, np, len)==0) {
	    break;			/* NOTE BREAK OUT */
	}
    }
    Tcl_DStringFree(&ds);
    if (cn->name != NULL) {







|







829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
    NOTE(REG_ULOCALE);

    /*
     * Search table.
     */

    Tcl_DStringInit(&ds);
    np = Tcl_UniCharToUtfDString(startp, len, &ds);
    for (cn=cnames; cn->name!=NULL; cn++) {
	if (strlen(cn->name)==len && strncmp(cn->name, np, len)==0) {
	    break;			/* NOTE BREAK OUT */
	}
    }
    Tcl_DStringFree(&ds);
    if (cn->name != NULL) {
Changes to generic/tcl.decls.
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386

2387
2388
2389
2390
2391
2392
2393

2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412


2413

2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428

declare 643 {
    int Tcl_IsShared(Tcl_Obj *objPtr)
}

# TIP #???
declare 644 {
    int *Tcl_GetUnicodeFromObj(Tcl_Obj *objPtr, int *lengthPtr)
}
declare 645 {

    Tcl_Obj *Tcl_NewUnicodeObj(const int *unicode, int numChars)
}
declare 646 {
    int Tcl_UtfToUniChar(const char *src, int *chPtr)
}
declare 647 {
    int Tcl_UniCharLen(const int *uniStr)

}
declare 648 {
    int Tcl_UniCharNcmp(const int *ucs, const int *uct,
	    unsigned long numChars)
}
declare 649 {
    int Tcl_UniCharNcasecmp(const int *ucs, const int *uct,
	    unsigned long numChars)
}
declare 650 {
    char *Tcl_UniCharToUtfDString(const int *uniStr,
	    int uniLength, Tcl_DString *dsPtr)
}
declare 651 {
    int *Tcl_UtfToUniCharDString(const char *src,
	    int length, Tcl_DString *dsPtr)
}
declare 652 {
    int Tcl_UniCharCaseMatch(const int *uniStr,


	    const int *uniPattern, int nocase)

}
declare 653 {
    void Tcl_AppendUnicodeToObj(Tcl_Obj *objPtr, const int *unicode,
	    int length)
}
declare 654 {
    void Tcl_SetUnicodeObj(Tcl_Obj *objPtr, const int *unicode,
	    int numChars)
}


# ----- BASELINE -- FOR -- 8.7.0 ----- #

##############################################################################








|


>
|


|


|
>


|
<


<
<
<
<



|



|
|
>
>
|
>


|
|


|
|







2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398

2399
2400




2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428

declare 643 {
    int Tcl_IsShared(Tcl_Obj *objPtr)
}

# TIP #???
declare 644 {
    Tcl_Obj *Tcl_NewUnicodeObj(const int *unicode, int numChars)
}
declare 645 {
    void Tcl_SetUnicodeObj(Tcl_Obj *objPtr, const int *unicode,
	    int numChars)
}
declare 646 {
    int *Tcl_GetUnicodeFromObj(Tcl_Obj *objPtr, int *lengthPtr)
}
declare 647 {
    void Tcl_AppendUnicodeToObj(Tcl_Obj *objPtr, const int *unicode,
	    int length)
}
declare 648 {
    int Tcl_UtfToUniChar(const char *src, int *chPtr)

}
declare 649 {




    char *Tcl_UniCharToUtfDString(const int *uniStr,
	    int uniLength, Tcl_DString *dsPtr)
}
declare 650 {
    int *Tcl_UtfToUniCharDString(const char *src,
	    int length, Tcl_DString *dsPtr)
}
declare 651 {
    int Tcl_UniCharLen(const int *uniStr)
}
declare 652 {
    int Tcl_UniCharNcmp(const int *ucs, const int *uct,
	    unsigned long numChars)
}
declare 653 {
    int Tcl_UniCharNcasecmp(const int *ucs, const int *uct,
	    unsigned long numChars)
}
declare 654 {
    int Tcl_UniCharCaseMatch(const int *uniStr,
	    const int *uniPattern, int nocase)
}


# ----- BASELINE -- FOR -- 8.7.0 ----- #

##############################################################################

Changes to generic/tcl.h.
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
/*
 * This represents a Unicode character. Any changes to this should also be
 * reflected in regcustom.h.
 */

#if TCL_UTF_MAX > 3
    /*
     * unsigned int isn't 100% accurate as it should be a strict 4-byte value
     * (perhaps wchar_t). 64-bit systems may have troubles. The size of this
     * value must be reflected correctly in regcustom.h and
     * in tclEncoding.c.
     * XXX: Tcl is currently UCS-2 and planning UTF-16 for the Unicode
     * XXX: string rep that Tcl_UniChar represents.  Changing the size
     * XXX: of Tcl_UniChar is /not/ supported.
     */
typedef int Tcl_UniChar;
#else
typedef unsigned short Tcl_UniChar;
#endif

/*







|
|
|
<
<
<
<







2148
2149
2150
2151
2152
2153
2154
2155
2156
2157




2158
2159
2160
2161
2162
2163
2164
/*
 * This represents a Unicode character. Any changes to this should also be
 * reflected in regcustom.h.
 */

#if TCL_UTF_MAX > 3
    /*
     * int isn't 100% accurate as it should be a strict 4-byte value
     * (perhaps wchar_t). ILP64 systems may have troubles. The size of this
     * value must be reflected correctly in regcustom.h.




     */
typedef int Tcl_UniChar;
#else
typedef unsigned short Tcl_UniChar;
#endif

/*
Changes to generic/tclBinary.c.
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
 badIndex:
    errorString = "not enough arguments for all format specifiers";
    goto error;

 badField:
    {
	Tcl_UniChar ch = 0;
	char buf[5] = "";

	TclUtfToUniChar(errorString, &ch);
	buf[Tcl_UniCharToUtf(ch, buf)] = '\0';
	Tcl_SetObjResult(interp, Tcl_ObjPrintf(
		"bad field specifier \"%s\"", buf));
	return TCL_ERROR;
    }







|







1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
 badIndex:
    errorString = "not enough arguments for all format specifiers";
    goto error;

 badField:
    {
	Tcl_UniChar ch = 0;
	char buf[TCL_UTF_MAX + 1] = "";

	TclUtfToUniChar(errorString, &ch);
	buf[Tcl_UniCharToUtf(ch, buf)] = '\0';
	Tcl_SetObjResult(interp, Tcl_ObjPrintf(
		"bad field specifier \"%s\"", buf));
	return TCL_ERROR;
    }
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
 badIndex:
    errorString = "not enough arguments for all format specifiers";
    goto error;

 badField:
    {
	Tcl_UniChar ch = 0;
	char buf[5] = "";

	TclUtfToUniChar(errorString, &ch);
	buf[Tcl_UniCharToUtf(ch, buf)] = '\0';
	Tcl_SetObjResult(interp, Tcl_ObjPrintf(
		"bad field specifier \"%s\"", buf));
	return TCL_ERROR;
    }







|







1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
 badIndex:
    errorString = "not enough arguments for all format specifiers";
    goto error;

 badField:
    {
	Tcl_UniChar ch = 0;
	char buf[TCL_UTF_MAX + 1] = "";

	TclUtfToUniChar(errorString, &ch);
	buf[Tcl_UniCharToUtf(ch, buf)] = '\0';
	Tcl_SetObjResult(interp, Tcl_ObjPrintf(
		"bad field specifier \"%s\"", buf));
	return TCL_ERROR;
    }
Changes to generic/tclDecls.h.
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902

1903
1904
1905

1906

1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919


1920
1921
1922
1923
1924
1925

1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
/* 641 */
EXTERN void		Tcl_IncrRefCount(Tcl_Obj *objPtr);
/* 642 */
EXTERN void		Tcl_DecrRefCount(Tcl_Obj *objPtr);
/* 643 */
EXTERN int		Tcl_IsShared(Tcl_Obj *objPtr);
/* 644 */
EXTERN int *		Tcl_GetUnicodeFromObj(Tcl_Obj *objPtr,
				int *lengthPtr);
/* 645 */

EXTERN Tcl_Obj *	Tcl_NewUnicodeObj(const int *unicode, int numChars);
/* 646 */
EXTERN int		Tcl_UtfToUniChar(const char *src, int *chPtr);

/* 647 */

EXTERN int		Tcl_UniCharLen(const int *uniStr);
/* 648 */
EXTERN int		Tcl_UniCharNcmp(const int *ucs, const int *uct,
				unsigned long numChars);
/* 649 */
EXTERN int		Tcl_UniCharNcasecmp(const int *ucs, const int *uct,
				unsigned long numChars);
/* 650 */
EXTERN char *		Tcl_UniCharToUtfDString(const int *uniStr,
				int uniLength, Tcl_DString *dsPtr);
/* 651 */
EXTERN int *		Tcl_UtfToUniCharDString(const char *src, int length,
				Tcl_DString *dsPtr);


/* 652 */
EXTERN int		Tcl_UniCharCaseMatch(const int *uniStr,
				const int *uniPattern, int nocase);
/* 653 */
EXTERN void		Tcl_AppendUnicodeToObj(Tcl_Obj *objPtr,
				const int *unicode, int length);

/* 654 */
EXTERN void		Tcl_SetUnicodeObj(Tcl_Obj *objPtr,
				const int *unicode, int numChars);

typedef struct {
    const struct TclPlatStubs *tclPlatStubs;
    const struct TclIntStubs *tclIntStubs;
    const struct TclIntPlatStubs *tclIntPlatStubs;
} TclStubHooks;








|
<

>
|

|
>

>
|

|
<

<
<
<


|


>
>

|
|

<
|
>

|
|







1893
1894
1895
1896
1897
1898
1899
1900

1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911

1912



1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923

1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
/* 641 */
EXTERN void		Tcl_IncrRefCount(Tcl_Obj *objPtr);
/* 642 */
EXTERN void		Tcl_DecrRefCount(Tcl_Obj *objPtr);
/* 643 */
EXTERN int		Tcl_IsShared(Tcl_Obj *objPtr);
/* 644 */
EXTERN Tcl_Obj *	Tcl_NewUnicodeObj(const int *unicode, int numChars);

/* 645 */
EXTERN void		Tcl_SetUnicodeObj(Tcl_Obj *objPtr,
				const int *unicode, int numChars);
/* 646 */
EXTERN int *		Tcl_GetUnicodeFromObj(Tcl_Obj *objPtr,
				int *lengthPtr);
/* 647 */
EXTERN void		Tcl_AppendUnicodeToObj(Tcl_Obj *objPtr,
				const int *unicode, int length);
/* 648 */
EXTERN int		Tcl_UtfToUniChar(const char *src, int *chPtr);

/* 649 */



EXTERN char *		Tcl_UniCharToUtfDString(const int *uniStr,
				int uniLength, Tcl_DString *dsPtr);
/* 650 */
EXTERN int *		Tcl_UtfToUniCharDString(const char *src, int length,
				Tcl_DString *dsPtr);
/* 651 */
EXTERN int		Tcl_UniCharLen(const int *uniStr);
/* 652 */
EXTERN int		Tcl_UniCharNcmp(const int *ucs, const int *uct,
				unsigned long numChars);
/* 653 */

EXTERN int		Tcl_UniCharNcasecmp(const int *ucs, const int *uct,
				unsigned long numChars);
/* 654 */
EXTERN int		Tcl_UniCharCaseMatch(const int *uniStr,
				const int *uniPattern, int nocase);

typedef struct {
    const struct TclPlatStubs *tclPlatStubs;
    const struct TclIntStubs *tclIntStubs;
    const struct TclIntPlatStubs *tclIntPlatStubs;
} TclStubHooks;

2601
2602
2603
2604
2605
2606
2607


2608
2609
2610


2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
    char * (*tcl_InitStringRep) (Tcl_Obj *objPtr, const char *bytes, unsigned int numBytes); /* 637 */
    Tcl_ObjIntRep * (*tcl_FetchIntRep) (Tcl_Obj *objPtr, const Tcl_ObjType *typePtr); /* 638 */
    void (*tcl_StoreIntRep) (Tcl_Obj *objPtr, const Tcl_ObjType *typePtr, const Tcl_ObjIntRep *irPtr); /* 639 */
    int (*tcl_HasStringRep) (Tcl_Obj *objPtr); /* 640 */
    void (*tcl_IncrRefCount) (Tcl_Obj *objPtr); /* 641 */
    void (*tcl_DecrRefCount) (Tcl_Obj *objPtr); /* 642 */
    int (*tcl_IsShared) (Tcl_Obj *objPtr); /* 643 */


    int * (*tcl_GetUnicodeFromObj) (Tcl_Obj *objPtr, int *lengthPtr); /* 644 */
    Tcl_Obj * (*tcl_NewUnicodeObj) (const int *unicode, int numChars); /* 645 */
    int (*tcl_UtfToUniChar) (const char *src, int *chPtr); /* 646 */


    int (*tcl_UniCharLen) (const int *uniStr); /* 647 */
    int (*tcl_UniCharNcmp) (const int *ucs, const int *uct, unsigned long numChars); /* 648 */
    int (*tcl_UniCharNcasecmp) (const int *ucs, const int *uct, unsigned long numChars); /* 649 */
    char * (*tcl_UniCharToUtfDString) (const int *uniStr, int uniLength, Tcl_DString *dsPtr); /* 650 */
    int * (*tcl_UtfToUniCharDString) (const char *src, int length, Tcl_DString *dsPtr); /* 651 */
    int (*tcl_UniCharCaseMatch) (const int *uniStr, const int *uniPattern, int nocase); /* 652 */
    void (*tcl_AppendUnicodeToObj) (Tcl_Obj *objPtr, const int *unicode, int length); /* 653 */
    void (*tcl_SetUnicodeObj) (Tcl_Obj *objPtr, const int *unicode, int numChars); /* 654 */
} TclStubs;

extern const TclStubs *tclStubsPtr;

#ifdef __cplusplus
}
#endif







>
>
|
|
|
>
>
|
|
|
<
<
|
<
<







2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617


2618


2619
2620
2621
2622
2623
2624
2625
    char * (*tcl_InitStringRep) (Tcl_Obj *objPtr, const char *bytes, unsigned int numBytes); /* 637 */
    Tcl_ObjIntRep * (*tcl_FetchIntRep) (Tcl_Obj *objPtr, const Tcl_ObjType *typePtr); /* 638 */
    void (*tcl_StoreIntRep) (Tcl_Obj *objPtr, const Tcl_ObjType *typePtr, const Tcl_ObjIntRep *irPtr); /* 639 */
    int (*tcl_HasStringRep) (Tcl_Obj *objPtr); /* 640 */
    void (*tcl_IncrRefCount) (Tcl_Obj *objPtr); /* 641 */
    void (*tcl_DecrRefCount) (Tcl_Obj *objPtr); /* 642 */
    int (*tcl_IsShared) (Tcl_Obj *objPtr); /* 643 */
    Tcl_Obj * (*tcl_NewUnicodeObj) (const int *unicode, int numChars); /* 644 */
    void (*tcl_SetUnicodeObj) (Tcl_Obj *objPtr, const int *unicode, int numChars); /* 645 */
    int * (*tcl_GetUnicodeFromObj) (Tcl_Obj *objPtr, int *lengthPtr); /* 646 */
    void (*tcl_AppendUnicodeToObj) (Tcl_Obj *objPtr, const int *unicode, int length); /* 647 */
    int (*tcl_UtfToUniChar) (const char *src, int *chPtr); /* 648 */
    char * (*tcl_UniCharToUtfDString) (const int *uniStr, int uniLength, Tcl_DString *dsPtr); /* 649 */
    int * (*tcl_UtfToUniCharDString) (const char *src, int length, Tcl_DString *dsPtr); /* 650 */
    int (*tcl_UniCharLen) (const int *uniStr); /* 651 */
    int (*tcl_UniCharNcmp) (const int *ucs, const int *uct, unsigned long numChars); /* 652 */
    int (*tcl_UniCharNcasecmp) (const int *ucs, const int *uct, unsigned long numChars); /* 653 */


    int (*tcl_UniCharCaseMatch) (const int *uniStr, const int *uniPattern, int nocase); /* 654 */


} TclStubs;

extern const TclStubs *tclStubsPtr;

#ifdef __cplusplus
}
#endif
3930
3931
3932
3933
3934
3935
3936




3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
	(tclStubsPtr->tcl_HasStringRep) /* 640 */
#define Tcl_IncrRefCount \
	(tclStubsPtr->tcl_IncrRefCount) /* 641 */
#define Tcl_DecrRefCount \
	(tclStubsPtr->tcl_DecrRefCount) /* 642 */
#define Tcl_IsShared \
	(tclStubsPtr->tcl_IsShared) /* 643 */




#define Tcl_GetUnicodeFromObj \
	(tclStubsPtr->tcl_GetUnicodeFromObj) /* 644 */
#define Tcl_NewUnicodeObj \
	(tclStubsPtr->tcl_NewUnicodeObj) /* 645 */
#define Tcl_UtfToUniChar \
	(tclStubsPtr->tcl_UtfToUniChar) /* 646 */
#define Tcl_UniCharLen \
	(tclStubsPtr->tcl_UniCharLen) /* 647 */
#define Tcl_UniCharNcmp \
	(tclStubsPtr->tcl_UniCharNcmp) /* 648 */
#define Tcl_UniCharNcasecmp \
	(tclStubsPtr->tcl_UniCharNcasecmp) /* 649 */
#define Tcl_UniCharToUtfDString \
	(tclStubsPtr->tcl_UniCharToUtfDString) /* 650 */
#define Tcl_UtfToUniCharDString \
	(tclStubsPtr->tcl_UtfToUniCharDString) /* 651 */
#define Tcl_UniCharCaseMatch \
	(tclStubsPtr->tcl_UniCharCaseMatch) /* 652 */
#define Tcl_AppendUnicodeToObj \
	(tclStubsPtr->tcl_AppendUnicodeToObj) /* 653 */
#define Tcl_SetUnicodeObj \
	(tclStubsPtr->tcl_SetUnicodeObj) /* 654 */

#endif /* defined(USE_TCL_STUBS) */

/* !END!: Do not edit above this line. */

#if defined(USE_TCL_STUBS)
#   undef Tcl_CreateInterp







>
>
>
>

|
|
|

|
|
|
|
|
|
|
|
|
|
|

|
<
<
<
<







3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958




3959
3960
3961
3962
3963
3964
3965
	(tclStubsPtr->tcl_HasStringRep) /* 640 */
#define Tcl_IncrRefCount \
	(tclStubsPtr->tcl_IncrRefCount) /* 641 */
#define Tcl_DecrRefCount \
	(tclStubsPtr->tcl_DecrRefCount) /* 642 */
#define Tcl_IsShared \
	(tclStubsPtr->tcl_IsShared) /* 643 */
#define Tcl_NewUnicodeObj \
	(tclStubsPtr->tcl_NewUnicodeObj) /* 644 */
#define Tcl_SetUnicodeObj \
	(tclStubsPtr->tcl_SetUnicodeObj) /* 645 */
#define Tcl_GetUnicodeFromObj \
	(tclStubsPtr->tcl_GetUnicodeFromObj) /* 646 */
#define Tcl_AppendUnicodeToObj \
	(tclStubsPtr->tcl_AppendUnicodeToObj) /* 647 */
#define Tcl_UtfToUniChar \
	(tclStubsPtr->tcl_UtfToUniChar) /* 648 */
#define Tcl_UniCharToUtfDString \
	(tclStubsPtr->tcl_UniCharToUtfDString) /* 649 */
#define Tcl_UtfToUniCharDString \
	(tclStubsPtr->tcl_UtfToUniCharDString) /* 650 */
#define Tcl_UniCharLen \
	(tclStubsPtr->tcl_UniCharLen) /* 651 */
#define Tcl_UniCharNcmp \
	(tclStubsPtr->tcl_UniCharNcmp) /* 652 */
#define Tcl_UniCharNcasecmp \
	(tclStubsPtr->tcl_UniCharNcasecmp) /* 653 */
#define Tcl_UniCharCaseMatch \
	(tclStubsPtr->tcl_UniCharCaseMatch) /* 654 */





#endif /* defined(USE_TCL_STUBS) */

/* !END!: Do not edit above this line. */

#if defined(USE_TCL_STUBS)
#   undef Tcl_CreateInterp
Changes to generic/tclEncoding.c.
565
566
567
568
569
570
571




572
573
574
575
576

577
578
579
580
581
582
583
void
TclInitEncodingSubsystem(void)
{
    Tcl_EncodingType type;
    TableEncodingData *dataPtr;
    unsigned size;
    unsigned short i;





    if (encodingsInitialized) {
	return;
    }


    Tcl_MutexLock(&encodingMutex);
    Tcl_InitHashTable(&encodingTable, TCL_STRING_KEYS);
    Tcl_MutexUnlock(&encodingMutex);

    /*
     * Create a few initial encodings. Note that the UTF-8 to UTF-8
     * translation is not a no-op, because it will turn a stream of improperly







>
>
>
>





>







565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
void
TclInitEncodingSubsystem(void)
{
    Tcl_EncodingType type;
    TableEncodingData *dataPtr;
    unsigned size;
    unsigned short i;
    union {
        char c;
        short s;
    } isLe;

    if (encodingsInitialized) {
	return;
    }

    isLe.s = 1;
    Tcl_MutexLock(&encodingMutex);
    Tcl_InitHashTable(&encodingTable, TCL_STRING_KEYS);
    Tcl_MutexUnlock(&encodingMutex);

    /*
     * Create a few initial encodings. Note that the UTF-8 to UTF-8
     * translation is not a no-op, because it will turn a stream of improperly
596
597
598
599
600
601
602
603
604
605
606
607

608
609
610


611



612
613
614
615







616
617
618
619
620
621
622
623
    type.toUtfProc	= UtfExtToUtfIntProc;
    type.fromUtfProc	= UtfIntToUtfExtProc;
    type.freeProc	= NULL;
    type.nullSize	= 1;
    type.clientData	= NULL;
    Tcl_CreateEncoding(&type);

    type.encodingName   = "ucs-2";
    type.toUtfProc	= Utf16ToUtfProc;
    type.fromUtfProc    = UtfToUcs2Proc;
    type.freeProc	= NULL;
    type.nullSize	= 2;

    type.clientData	= NULL;
    Tcl_CreateEncoding(&type);



    type.encodingName   = "utf-16";



    type.toUtfProc	= Utf16ToUtfProc;
    type.fromUtfProc    = UtfToUtf16Proc;
    type.freeProc	= NULL;
    type.nullSize	= 2;







    type.clientData	= NULL;
    Tcl_CreateEncoding(&type);

#ifndef TCL_NO_DEPRECATED
    type.encodingName   = "unicode";
    Tcl_CreateEncoding(&type);
#endif








<




>
|

|
>
>
|
>
>
>




>
>
>
>
>
>
>
|







601
602
603
604
605
606
607

608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
    type.toUtfProc	= UtfExtToUtfIntProc;
    type.fromUtfProc	= UtfIntToUtfExtProc;
    type.freeProc	= NULL;
    type.nullSize	= 1;
    type.clientData	= NULL;
    Tcl_CreateEncoding(&type);


    type.toUtfProc	= Utf16ToUtfProc;
    type.fromUtfProc    = UtfToUcs2Proc;
    type.freeProc	= NULL;
    type.nullSize	= 2;
    type.encodingName   = "ucs-2le";
    type.clientData	= INT2PTR(1);
    Tcl_CreateEncoding(&type);
    type.encodingName   = "ucs-2be";
    type.clientData	= INT2PTR(0);
    Tcl_CreateEncoding(&type);
    type.encodingName   = "ucs-2";
    type.clientData	= INT2PTR(isLe.c);
    Tcl_CreateEncoding(&type);

    type.toUtfProc	= Utf16ToUtfProc;
    type.fromUtfProc    = UtfToUtf16Proc;
    type.freeProc	= NULL;
    type.nullSize	= 2;
    type.encodingName   = "utf-16le";
    type.clientData	= INT2PTR(1);;
    Tcl_CreateEncoding(&type);
    type.encodingName   = "utf-16be";
    type.clientData	= INT2PTR(0);
    Tcl_CreateEncoding(&type);
    type.encodingName   = "utf-16";
    type.clientData	= INT2PTR(isLe.c);;
    Tcl_CreateEncoding(&type);

#ifndef TCL_NO_DEPRECATED
    type.encodingName   = "unicode";
    Tcl_CreateEncoding(&type);
#endif

2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
 *	None.
 *
 *-------------------------------------------------------------------------
 */

static int
Utf16ToUtfProc(
    ClientData clientData,	/* Not used. */
    const char *src,		/* Source string in Unicode. */
    int srcLen,			/* Source string length in bytes. */
    int flags,			/* Conversion control flags. */
    Tcl_EncodingState *statePtr,/* Place for conversion routine to store state
				 * information used during a piecewise
				 * conversion. Contents of statePtr are
				 * initialized and/or reset by conversion







|







2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
 *	None.
 *
 *-------------------------------------------------------------------------
 */

static int
Utf16ToUtfProc(
    ClientData clientData,	/* != NULL means LE, == NUL means BE */
    const char *src,		/* Source string in Unicode. */
    int srcLen,			/* Source string length in bytes. */
    int flags,			/* Conversion control flags. */
    Tcl_EncodingState *statePtr,/* Place for conversion routine to store state
				 * information used during a piecewise
				 * conversion. Contents of statePtr are
				 * initialized and/or reset by conversion
2482
2483
2484
2485
2486
2487
2488





2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501

    for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) {
	if (dst > dstEnd) {
	    result = TCL_CONVERT_NOSPACE;
	    break;
	}






	/*
	 * Special case for 1-byte utf chars for speed. Make sure we work with
	 * unsigned short-size data.
	 */

	ch = *(unsigned short *)src;
	if (ch && ch < 0x80) {
	    *dst++ = (ch & 0xFF);
	} else {
	    dst += Tcl_UniCharToUtf(ch, dst);
	}
	src += sizeof(unsigned short);
    }







>
>
>
>
>




<
<







2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514


2515
2516
2517
2518
2519
2520
2521

    for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) {
	if (dst > dstEnd) {
	    result = TCL_CONVERT_NOSPACE;
	    break;
	}

	if (clientData) {
	    ch = (src[1] & 0xFF) << 8 | (src[0] & 0xFF);
	} else {
	    ch = (src[0] & 0xFF) << 8 | (src[1] & 0xFF);
	}
	/*
	 * Special case for 1-byte utf chars for speed. Make sure we work with
	 * unsigned short-size data.
	 */


	if (ch && ch < 0x80) {
	    *dst++ = (ch & 0xFF);
	} else {
	    dst += Tcl_UniCharToUtf(ch, dst);
	}
	src += sizeof(unsigned short);
    }
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
 *	None.
 *
 *-------------------------------------------------------------------------
 */

static int
UtfToUtf16Proc(
    ClientData clientData,	/* TableEncodingData that specifies
				 * encoding. */
    const char *src,		/* Source string in UTF-8. */
    int srcLen,			/* Source string length in bytes. */
    int flags,			/* Conversion control flags. */
    Tcl_EncodingState *statePtr,/* Place for conversion routine to store state
				 * information used during a piecewise
				 * conversion. Contents of statePtr are
				 * initialized and/or reset by conversion







|
<







2540
2541
2542
2543
2544
2545
2546
2547

2548
2549
2550
2551
2552
2553
2554
 *	None.
 *
 *-------------------------------------------------------------------------
 */

static int
UtfToUtf16Proc(
    ClientData clientData,	/* != NULL means LE, == NUL means BE */

    const char *src,		/* Source string in UTF-8. */
    int srcLen,			/* Source string length in bytes. */
    int flags,			/* Conversion control flags. */
    Tcl_EncodingState *statePtr,/* Place for conversion routine to store state
				 * information used during a piecewise
				 * conversion. Contents of statePtr are
				 * initialized and/or reset by conversion
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601


2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612


2613
2614
2615
2616
2617
2618
2619
2620
2621
2622

2623
2624
2625
2626
2627
2628
2629
	src += TclUtfToUniChar(src, chPtr);

	/*
	 * Need to handle this in a way that won't cause misalignment by
	 * casting dst to a Tcl_UniChar. [Bug 1122671]
	 */

#ifdef WORDS_BIGENDIAN
#if TCL_UTF_MAX > 3
	if (*chPtr <= 0xFFFF) {
	    *dst++ = (*chPtr >> 8);
	    *dst++ = (*chPtr & 0xFF);
	} else {
	    *dst++ = ((*chPtr & 0x3) >> 8) | 0xDC;
	    *dst++ = (*chPtr & 0xFF);
	    *dst++ = (((*chPtr - 0x10000) >> 18) & 0x3) | 0xD8;
	    *dst++ = (((*chPtr - 0x10000) >> 10) & 0xFF);


	}
#else
	*dst++ = (*chPtr >> 8);
	*dst++ = (*chPtr & 0xFF);
#endif
#else
#if TCL_UTF_MAX > 3
	if (*chPtr <= 0xFFFF) {
	    *dst++ = (*chPtr & 0xFF);
	    *dst++ = (*chPtr >> 8);
	} else {


	    *dst++ = (((*chPtr - 0x10000) >> 10) & 0xFF);
	    *dst++ = (((*chPtr - 0x10000) >> 18) & 0x3) | 0xD8;
	    *dst++ = (*chPtr & 0xFF);
	    *dst++ = ((*chPtr & 0x3) >> 8) | 0xDC;
	}
#else
	*dst++ = (*chPtr & 0xFF);
	*dst++ = (*chPtr >> 8);
#endif
#endif

    }
    *srcReadPtr = src - srcStart;
    *dstWrotePtr = dst - dstStart;
    *dstCharsPtr = numChars;
    return result;
}








|

|
|
|
|
<
<
|
|
>
>
|

|
|

|

|
|
|
|
>
>
|
|
<
<
|

|
|

<
>







2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616


2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635


2636
2637
2638
2639
2640

2641
2642
2643
2644
2645
2646
2647
2648
	src += TclUtfToUniChar(src, chPtr);

	/*
	 * Need to handle this in a way that won't cause misalignment by
	 * casting dst to a Tcl_UniChar. [Bug 1122671]
	 */

	if (clientData) {
#if TCL_UTF_MAX > 3
	    if (*chPtr <= 0xFFFF) {
		*dst++ = (*chPtr & 0xFF);
		*dst++ = (*chPtr >> 8);
	    } else {


		*dst++ = (((*chPtr - 0x10000) >> 10) & 0xFF);
		*dst++ = (((*chPtr - 0x10000) >> 18) & 0x3) | 0xD8;
		*dst++ = (*chPtr & 0xFF);
		*dst++ = ((*chPtr & 0x3) >> 8) | 0xDC;
	    }
#else
	    *dst++ = (*chPtr & 0xFF);
	    *dst++ = (*chPtr >> 8);
#endif
	} else {
#if TCL_UTF_MAX > 3
	    if (*chPtr <= 0xFFFF) {
		*dst++ = (*chPtr >> 8);
		*dst++ = (*chPtr & 0xFF);
	    } else {
		*dst++ = ((*chPtr & 0x3) >> 8) | 0xDC;
		*dst++ = (*chPtr & 0xFF);
		*dst++ = (((*chPtr - 0x10000) >> 18) & 0x3) | 0xD8;
		*dst++ = (((*chPtr - 0x10000) >> 10) & 0xFF);


	    }
#else
	    *dst++ = (*chPtr >> 8);
	    *dst++ = (*chPtr & 0xFF);
#endif

	}
    }
    *srcReadPtr = src - srcStart;
    *dstWrotePtr = dst - dstStart;
    *dstCharsPtr = numChars;
    return result;
}

2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
 *	None.
 *
 *-------------------------------------------------------------------------
 */

static int
UtfToUcs2Proc(
    ClientData clientData,	/* TableEncodingData that specifies
				 * encoding. */
    const char *src,		/* Source string in UTF-8. */
    int srcLen,			/* Source string length in bytes. */
    int flags,			/* Conversion control flags. */
    Tcl_EncodingState *statePtr,/* Place for conversion routine to store state
				 * information used during a piecewise
				 * conversion. Contents of statePtr are
				 * initialized and/or reset by conversion







|
<







2660
2661
2662
2663
2664
2665
2666
2667

2668
2669
2670
2671
2672
2673
2674
 *	None.
 *
 *-------------------------------------------------------------------------
 */

static int
UtfToUcs2Proc(
    ClientData clientData,	/* != NULL means LE, == NUL means BE */

    const char *src,		/* Source string in UTF-8. */
    int srcLen,			/* Source string length in bytes. */
    int flags,			/* Conversion control flags. */
    Tcl_EncodingState *statePtr,/* Place for conversion routine to store state
				 * information used during a piecewise
				 * conversion. Contents of statePtr are
				 * initialized and/or reset by conversion
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730

2731
2732
2733
2734
2735
2736
2737
#endif

	/*
	 * Need to handle this in a way that won't cause misalignment by
	 * casting dst to a Tcl_UniChar. [Bug 1122671]
	 */

#ifdef WORDS_BIGENDIAN
	*dst++ = (ch >> 8);
	*dst++ = (ch & 0xFF);
#else
	*dst++ = (ch & 0xFF);
	*dst++ = (ch >> 8);
#endif

    }
    *srcReadPtr = src - srcStart;
    *dstWrotePtr = dst - dstStart;
    *dstCharsPtr = numChars;
    return result;
}








|
|
|
|
|
|
<
>







2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747

2748
2749
2750
2751
2752
2753
2754
2755
#endif

	/*
	 * Need to handle this in a way that won't cause misalignment by
	 * casting dst to a Tcl_UniChar. [Bug 1122671]
	 */

	if (clientData) {
	    *dst++ = (ch & 0xFF);
	    *dst++ = (ch >> 8);
	} else {
	    *dst++ = (ch >> 8);
	    *dst++ = (ch & 0xFF);

	}
    }
    *srcReadPtr = src - srcStart;
    *dstWrotePtr = dst - dstStart;
    *dstCharsPtr = numChars;
    return result;
}

Changes to generic/tclScan.c.
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
				 * required. */
{
    int gotXpg, gotSequential, value, i, flags;
    char *end;
    Tcl_UniChar ch = 0;
    int objIndex, xpgSize, nspace = numVars;
    int *nassign = TclStackAlloc(interp, nspace * sizeof(int));
    char buf[5] = "";
    Tcl_Obj *errorMsg;		/* Place to build an error messages. Note that
				 * these are messy operations because we do
				 * not want to use the formatting engine;
				 * we're inside there! */

    /*
     * Initialize an array that records the number of times a variable is







|







257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
				 * required. */
{
    int gotXpg, gotSequential, value, i, flags;
    char *end;
    Tcl_UniChar ch = 0;
    int objIndex, xpgSize, nspace = numVars;
    int *nassign = TclStackAlloc(interp, nspace * sizeof(int));
    char buf[TCL_UTF_MAX + 1] = "";
    Tcl_Obj *errorMsg;		/* Place to build an error messages. Note that
				 * these are messy operations because we do
				 * not want to use the formatting engine;
				 * we're inside there! */

    /*
     * Initialize an array that records the number of times a variable is
Changes to generic/tclStubInit.c.
76
77
78
79
80
81
82
83
84
85
86
87
88
89







90

91

92
93
94
95
96
97
98
    Tcl_Panic("This extension is compiled with -DTCL_UTF_MAX>3, but Tcl is compiled with -DTCL_UTF_MAX==3");
#else
    Tcl_Panic("This extension is compiled with -DTCL_UTF_MAX==3, but Tcl is compiled with -DTCL_UTF_MAX>3");
#endif
}

#if TCL_UTF_MAX == 3
#ifdef TCL_NO_DEPRECATED
#   define Tcl_GetUnicode 0
#endif
#   define Tcl_GetUnicodeFromObj (int *(*)(Tcl_Obj *, int *)) uniCodePanic
#   define Tcl_NewUnicodeObj (Tcl_Obj *(*)(const int *, int)) uniCodePanic
#   define Tcl_SetUnicodeObj (void(*)(Tcl_Obj *,const int *, int)) uniCodePanic
#   define Tcl_AppendUnicodeToObj (void(*)(Tcl_Obj *, const int *, int)) uniCodePanic







#else

#   define Tcl_GetUnicode (unsigned short *(*)(Tcl_Obj *)) uniCodePanic

#   define Tcl_GetUtf16FromObj (unsigned short *(*)(Tcl_Obj *, int *)) uniCodePanic
#   define Tcl_NewUtf16Obj (Tcl_Obj *(*)(const unsigned short *, int)) uniCodePanic
#   define Tcl_SetUtf16Obj (void(*)(Tcl_Obj *, const unsigned short *, int)) uniCodePanic
#   define Tcl_AppendUtf16ToObj (void(*)(Tcl_Obj *, const unsigned short *, int)) uniCodePanic
#endif

/* See bug 510001: TclSockMinimumBuffers needs plat imp */







<
<
<


|
|
>
>
>
>
>
>
>

>
|
>







76
77
78
79
80
81
82



83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
    Tcl_Panic("This extension is compiled with -DTCL_UTF_MAX>3, but Tcl is compiled with -DTCL_UTF_MAX==3");
#else
    Tcl_Panic("This extension is compiled with -DTCL_UTF_MAX==3, but Tcl is compiled with -DTCL_UTF_MAX>3");
#endif
}

#if TCL_UTF_MAX == 3



#   define Tcl_GetUnicodeFromObj (int *(*)(Tcl_Obj *, int *)) uniCodePanic
#   define Tcl_NewUnicodeObj (Tcl_Obj *(*)(const int *, int)) uniCodePanic
#   define Tcl_SetUnicodeObj (void (*)(Tcl_Obj *,const int *, int)) uniCodePanic
#   define Tcl_AppendUnicodeToObj (void (*)(Tcl_Obj *, const int *, int)) uniCodePanic
#   define Tcl_UtfToUniChar (int (*)(const char *, int *)) uniCodePanic
#   define Tcl_UniCharToUtfDString (char *(*)(const int *, int, Tcl_DString *)) uniCodePanic
#   define Tcl_UtfToUniCharDString (int *(*)(const char *, int, Tcl_DString *)) uniCodePanic
#   define Tcl_UniCharCaseMatch (int (*)(const int *, const int *, int)) uniCodePanic
#   define Tcl_UniCharLen (int (*)(const int *)) uniCodePanic
#   define Tcl_UniCharNcmp (int (*)(const int *, const int *, unsigned long)) uniCodePanic
#   define Tcl_UniCharNcasecmp (int (*)(const int *, const int *, unsigned long)) uniCodePanic
#else
#if !defined(TCL_NO_DEPRECATED) && TCL_MAJOR_VERSION < 9
#	define Tcl_GetUnicode (unsigned short *(*)(Tcl_Obj *)) uniCodePanic
#   endif
#   define Tcl_GetUtf16FromObj (unsigned short *(*)(Tcl_Obj *, int *)) uniCodePanic
#   define Tcl_NewUtf16Obj (Tcl_Obj *(*)(const unsigned short *, int)) uniCodePanic
#   define Tcl_SetUtf16Obj (void(*)(Tcl_Obj *, const unsigned short *, int)) uniCodePanic
#   define Tcl_AppendUtf16ToObj (void(*)(Tcl_Obj *, const unsigned short *, int)) uniCodePanic
#endif

/* See bug 510001: TclSockMinimumBuffers needs plat imp */
133
134
135
136
137
138
139


140
141
142
143
144
145
146
#   define Tcl_ListMathFuncs 0
#   define Tcl_SetIntObj 0
#   define Tcl_SetLongObj 0
#   define Tcl_NewIntObj 0
#   define Tcl_NewLongObj 0
#   define Tcl_DbNewLongObj 0
#   define Tcl_BackgroundError 0


#else
#define TclBNInitBignumFromLong initBignumFromLong
static void TclBNInitBignumFromLong(mp_int *a, long b)
{
    TclInitBignumFromWideInt(a, b);
}
#define TclSetStartupScriptPath setStartupScriptPath







>
>







139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
#   define Tcl_ListMathFuncs 0
#   define Tcl_SetIntObj 0
#   define Tcl_SetLongObj 0
#   define Tcl_NewIntObj 0
#   define Tcl_NewLongObj 0
#   define Tcl_DbNewLongObj 0
#   define Tcl_BackgroundError 0
#   define Tcl_GetUnicode 0

#else
#define TclBNInitBignumFromLong initBignumFromLong
static void TclBNInitBignumFromLong(mp_int *a, long b)
{
    TclInitBignumFromWideInt(a, b);
}
#define TclSetStartupScriptPath setStartupScriptPath
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355





356
357
358
359










360
361
362
363
364
365
366
		    "integer value too large to represent as non-long integer", -1));
	    result = TCL_ERROR;
	}
    }
    return result;
}
#define Tcl_ExprLongObj (int(*)(Tcl_Interp*,Tcl_Obj*,long*))exprIntObj
static int uniCharNcmp(const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsigned int n){
   return Tcl_UniCharNcmp(ucs, uct, (unsigned long)n);
}
#define Tcl_UniCharNcmp (int(*)(const Tcl_UniChar*,const Tcl_UniChar*,unsigned long))uniCharNcmp
static int utfNcmp(const char *s1, const char *s2, unsigned int n){
   return Tcl_UtfNcmp(s1, s2, (unsigned long)n);
}
#define Tcl_UtfNcmp (int(*)(const char*,const char*,unsigned long))utfNcmp
static int utfNcasecmp(const char *s1, const char *s2, unsigned int n){
   return Tcl_UtfNcasecmp(s1, s2, (unsigned long)n);
}
#define Tcl_UtfNcasecmp (int(*)(const char*,const char*,unsigned long))utfNcasecmp





static int uniCharNcasecmp(const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsigned int n){
   return Tcl_UniCharNcasecmp(ucs, uct, (unsigned long)n);
}
#define Tcl_UniCharNcasecmp (int(*)(const Tcl_UniChar*,const Tcl_UniChar*,unsigned long))uniCharNcasecmp











#endif /* TCL_WIDE_INT_IS_LONG */

#endif /* __CYGWIN__ */

#if defined(TCL_NO_DEPRECATED)
#   define Tcl_SeekOld 0







<
<
<
<








>
>
>
>
>
|


|
>
>
>
>
>
>
>
>
>
>







345
346
347
348
349
350
351




352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
		    "integer value too large to represent as non-long integer", -1));
	    result = TCL_ERROR;
	}
    }
    return result;
}
#define Tcl_ExprLongObj (int(*)(Tcl_Interp*,Tcl_Obj*,long*))exprIntObj




static int utfNcmp(const char *s1, const char *s2, unsigned int n){
   return Tcl_UtfNcmp(s1, s2, (unsigned long)n);
}
#define Tcl_UtfNcmp (int(*)(const char*,const char*,unsigned long))utfNcmp
static int utfNcasecmp(const char *s1, const char *s2, unsigned int n){
   return Tcl_UtfNcasecmp(s1, s2, (unsigned long)n);
}
#define Tcl_UtfNcasecmp (int(*)(const char*,const char*,unsigned long))utfNcasecmp
#if TCL_UTF_MAX > 3
static int uniCharNcmp(const int *ucs, const int *uct, unsigned int n){
   return Tcl_UniCharNcmp(ucs, uct, (unsigned long)n);
}
#define Tcl_UniCharNcmp (int(*)(const int*,const int*,unsigned long))uniCharNcmp
static int uniCharNcasecmp(const int *ucs, const int *uct, unsigned int n){
   return Tcl_UniCharNcasecmp(ucs, uct, (unsigned long)n);
}
#define Tcl_UniCharNcasecmp (int(*)(const int*,const int*,unsigned long))uniCharNcasecmp
#else
static int utf16Ncmp(const unsigned short *ucs, const unsigned short *uct, unsigned int n){
   return Tcl_Utf16Ncmp(ucs, uct, (unsigned long)n);
}
#define Tcl_Utf16Ncmp (int(*)(const unsigned short*,const unsigned short*,unsigned long))utf16Ncmp
static int utf16Ncasecmp(const unsigned short *ucs, const unsigned short *uct, unsigned int n){
   return Tcl_Utf16Ncasecmp(ucs, uct, (unsigned long)n);
}
#define Tcl_Utf16Ncasecmp (int(*)(const unsigned short*,const unsigned short*,unsigned long))utf16Ncasecmp
#endif

#endif /* TCL_WIDE_INT_IS_LONG */

#endif /* __CYGWIN__ */

#if defined(TCL_NO_DEPRECATED)
#   define Tcl_SeekOld 0
1655
1656
1657
1658
1659
1660
1661


1662
1663
1664


1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
    Tcl_InitStringRep, /* 637 */
    Tcl_FetchIntRep, /* 638 */
    Tcl_StoreIntRep, /* 639 */
    Tcl_HasStringRep, /* 640 */
    Tcl_IncrRefCount, /* 641 */
    Tcl_DecrRefCount, /* 642 */
    Tcl_IsShared, /* 643 */


    Tcl_GetUnicodeFromObj, /* 644 */
    Tcl_NewUnicodeObj, /* 645 */
    Tcl_UtfToUniChar, /* 646 */


    Tcl_UniCharLen, /* 647 */
    Tcl_UniCharNcmp, /* 648 */
    Tcl_UniCharNcasecmp, /* 649 */
    Tcl_UniCharToUtfDString, /* 650 */
    Tcl_UtfToUniCharDString, /* 651 */
    Tcl_UniCharCaseMatch, /* 652 */
    Tcl_AppendUnicodeToObj, /* 653 */
    Tcl_SetUnicodeObj, /* 654 */
};

/* !END!: Do not edit above this line. */







>
>
|
|
|
>
>
|
|
|
<
<
|
<
<



1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690


1691


1692
1693
1694
    Tcl_InitStringRep, /* 637 */
    Tcl_FetchIntRep, /* 638 */
    Tcl_StoreIntRep, /* 639 */
    Tcl_HasStringRep, /* 640 */
    Tcl_IncrRefCount, /* 641 */
    Tcl_DecrRefCount, /* 642 */
    Tcl_IsShared, /* 643 */
    Tcl_NewUnicodeObj, /* 644 */
    Tcl_SetUnicodeObj, /* 645 */
    Tcl_GetUnicodeFromObj, /* 646 */
    Tcl_AppendUnicodeToObj, /* 647 */
    Tcl_UtfToUniChar, /* 648 */
    Tcl_UniCharToUtfDString, /* 649 */
    Tcl_UtfToUniCharDString, /* 650 */
    Tcl_UniCharLen, /* 651 */
    Tcl_UniCharNcmp, /* 652 */
    Tcl_UniCharNcasecmp, /* 653 */


    Tcl_UniCharCaseMatch, /* 654 */


};

/* !END!: Do not edit above this line. */
Changes to generic/tclUtf.c.
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
 *
 * Side effects:
 *	None.
 *
 *---------------------------------------------------------------------------
 */

#undef Tcl_UniCharToUtfDString
char *
Tcl_UniCharToUtfDString(
    const int *uniStr,	/* Unicode string to convert to UTF-8. */
    int uniLength,		/* Length of Unicode string (must be >= 0). */
    Tcl_DString *dsPtr)		/* UTF-8 representation of string is appended
				 * to this previously initialized DString. */
{







|







219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
 *
 * Side effects:
 *	None.
 *
 *---------------------------------------------------------------------------
 */

#if TCL_UTF_MAX > 3
char *
Tcl_UniCharToUtfDString(
    const int *uniStr,	/* Unicode string to convert to UTF-8. */
    int uniLength,		/* Length of Unicode string (must be >= 0). */
    Tcl_DString *dsPtr)		/* UTF-8 representation of string is appended
				 * to this previously initialized DString. */
{
249
250
251
252
253
254
255

256
257
258
259
260
261
262
	p += Tcl_UniCharToUtf(*w, p);
	w++;
    }
    Tcl_DStringSetLength(dsPtr, oldLength + (p - string));

    return string;
}


char *
Tcl_Utf16ToUtfDString(
    const unsigned short *uniStr,	/* Utf-16 string to convert to UTF-8. */
    int uniLength,		/* Length of Utf-16 string (must be >= 0). */
    Tcl_DString *dsPtr)		/* UTF-8 representation of string is appended
				 * to this previously initialized DString. */







>







249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
	p += Tcl_UniCharToUtf(*w, p);
	w++;
    }
    Tcl_DStringSetLength(dsPtr, oldLength + (p - string));

    return string;
}
#endif /* TCL_UTF_MAX > 3 */

char *
Tcl_Utf16ToUtfDString(
    const unsigned short *uniStr,	/* Utf-16 string to convert to UTF-8. */
    int uniLength,		/* Length of Utf-16 string (must be >= 0). */
    Tcl_DString *dsPtr)		/* UTF-8 representation of string is appended
				 * to this previously initialized DString. */
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
static const unsigned short cp1252[32] = {
  0x20ac,   0x81, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
  0x02C6, 0x2030, 0x0160, 0x2039, 0x0152,   0x8D, 0x017D,   0x8F,
    0x90, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
   0x2DC, 0x2122, 0x0161, 0x203A, 0x0153,   0x9D, 0x017E, 0x0178
};

#undef Tcl_UtfToUniChar
int
Tcl_UtfToUniChar(
    const char *src,	/* The UTF-8 string. */
    int *chPtr)/* Filled with the Unicode represented by
				 * the UTF-8 string. */
{
    int byte;







|







334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
static const unsigned short cp1252[32] = {
  0x20ac,   0x81, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
  0x02C6, 0x2030, 0x0160, 0x2039, 0x0152,   0x8D, 0x017D,   0x8F,
    0x90, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
   0x2DC, 0x2122, 0x0161, 0x203A, 0x0153,   0x9D, 0x017E, 0x0178
};

#if TCL_UTF_MAX > 3
int
Tcl_UtfToUniChar(
    const char *src,	/* The UTF-8 string. */
    int *chPtr)/* Filled with the Unicode represented by
				 * the UTF-8 string. */
{
    int byte;
417
418
419
420
421
422
423

424
425
426
427
428
429
430
	 * represents itself.
	 */
    }

    *chPtr = byte;
    return 1;
}


int
Tcl_UtfToUtf16(
    const char *src,	/* The UTF-8 string. */
    unsigned short *chPtr)/* Filled with the Utf-16 representation of
				 * the UTF-8 string. */
{







>







418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
	 * represents itself.
	 */
    }

    *chPtr = byte;
    return 1;
}
#endif /* TCL_UTF_MAX > 3 */

int
Tcl_UtfToUtf16(
    const char *src,	/* The UTF-8 string. */
    unsigned short *chPtr)/* Filled with the Utf-16 representation of
				 * the UTF-8 string. */
{
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
 *
 * Side effects:
 *	None.
 *
 *---------------------------------------------------------------------------
 */

#undef Tcl_UtfToUniCharDString
int *
Tcl_UtfToUniCharDString(
    const char *src,		/* UTF-8 string to convert to Unicode. */
    int length,			/* Length of UTF-8 string in bytes, or -1 for
				 * strlen(). */
    Tcl_DString *dsPtr)		/* Unicode representation of string is
				 * appended to this previously initialized







|







538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
 *
 * Side effects:
 *	None.
 *
 *---------------------------------------------------------------------------
 */

#if TCL_UTF_MAX > 3
int *
Tcl_UtfToUniCharDString(
    const char *src,		/* UTF-8 string to convert to Unicode. */
    int length,			/* Length of UTF-8 string in bytes, or -1 for
				 * strlen(). */
    Tcl_DString *dsPtr)		/* Unicode representation of string is
				 * appended to this previously initialized
589
590
591
592
593
594
595

596
597
598
599
600
601
602
    }
    *w = '\0';
    Tcl_DStringSetLength(dsPtr,
	    oldLength + ((char *) w - (char *) wString));

    return wString;
}


unsigned short *
Tcl_UtfToUtf16DString(
    const char *src,		/* UTF-8 string to convert to Unicode. */
    int length,			/* Length of UTF-8 string in bytes, or -1 for
				 * strlen(). */
    Tcl_DString *dsPtr)		/* Unicode representation of string is







>







591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
    }
    *w = '\0';
    Tcl_DStringSetLength(dsPtr,
	    oldLength + ((char *) w - (char *) wString));

    return wString;
}
#endif /* TCL_UTF_MAX > 3 */

unsigned short *
Tcl_UtfToUtf16DString(
    const char *src,		/* UTF-8 string to convert to Unicode. */
    int length,			/* Length of UTF-8 string in bytes, or -1 for
				 * strlen(). */
    Tcl_DString *dsPtr)		/* Unicode representation of string is
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651

1652
1653
1654
1655
1656
1657
1658
 *
 * Side effects:
 *	None.
 *
 *----------------------------------------------------------------------
 */

#undef Tcl_UniCharLen
int
Tcl_UniCharLen(
    const int *uniStr)	/* Unicode string to find length of. */
{
    int len = 0;

    while (*uniStr != '\0') {
	len++;
	uniStr++;
    }
    return len;
}


int
Tcl_Utf16Len(
    const unsigned short *utf16Str)	/* Unicode string to find length of. */
{
    int len = 0;








|












>







1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
 *
 * Side effects:
 *	None.
 *
 *----------------------------------------------------------------------
 */

#if TCL_UTF_MAX > 3
int
Tcl_UniCharLen(
    const int *uniStr)	/* Unicode string to find length of. */
{
    int len = 0;

    while (*uniStr != '\0') {
	len++;
	uniStr++;
    }
    return len;
}
#endif /* TCL_UTF_MAX > 3 */

int
Tcl_Utf16Len(
    const unsigned short *utf16Str)	/* Unicode string to find length of. */
{
    int len = 0;

1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
 *
 * Side effects:
 *	None.
 *
 *----------------------------------------------------------------------
 */

#undef Tcl_UniCharNcmp
int
Tcl_UniCharNcmp(
    const int *ucs,	/* Unicode string to compare to uct. */
    const int *uct,	/* Unicode string ucs is compared to. */
    unsigned long numChars)	/* Number of unichars to compare. */
{
#ifdef WORDS_BIGENDIAN







|







1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
 *
 * Side effects:
 *	None.
 *
 *----------------------------------------------------------------------
 */

#if TCL_UTF_MAX > 3
int
Tcl_UniCharNcmp(
    const int *ucs,	/* Unicode string to compare to uct. */
    const int *uct,	/* Unicode string ucs is compared to. */
    unsigned long numChars)	/* Number of unichars to compare. */
{
#ifdef WORDS_BIGENDIAN
1703
1704
1705
1706
1707
1708
1709

1710
1711
1712
1713
1714
1715
1716
	if (*ucs != *uct) {
	    return (*ucs - *uct);
	}
    }
    return 0;
#endif /* WORDS_BIGENDIAN */
}


int
Tcl_Utf16Ncmp(
    const unsigned short *ucs,	/* Unicode string to compare to uct. */
    const unsigned short *uct,	/* Unicode string ucs is compared to. */
    unsigned long numChars)	/* Number of unichars to compare. */
{







>







1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
	if (*ucs != *uct) {
	    return (*ucs - *uct);
	}
    }
    return 0;
#endif /* WORDS_BIGENDIAN */
}
#endif /* TCL_UTF_MAX > 3 */

int
Tcl_Utf16Ncmp(
    const unsigned short *ucs,	/* Unicode string to compare to uct. */
    const unsigned short *uct,	/* Unicode string ucs is compared to. */
    unsigned long numChars)	/* Number of unichars to compare. */
{
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774


1775
1776
1777
1778
1779
1780
1781
 *
 * Side effects:
 *	None.
 *
 *----------------------------------------------------------------------
 */

#undef Tcl_UniCharNcasecmp
int
Tcl_UniCharNcasecmp(
    const int *ucs,	/* Unicode string to compare to uct. */
    const int *uct,	/* Unicode string ucs is compared to. */
    unsigned long numChars)	/* Number of unichars to compare. */
{
    for ( ; numChars != 0; numChars--, ucs++, uct++) {
	if (*ucs != *uct) {
	    int lcs = Tcl_UniCharToLower(*ucs);
	    int lct = Tcl_UniCharToLower(*uct);

	    if (lcs != lct) {
		return (lcs - lct);
	    }
	}
    }
    return 0;
}


int
Tcl_Utf16Ncasecmp(
    const unsigned short *ucs,	/* Utf16 string to compare to uct. */
    const unsigned short *uct,	/* Utf16 string ucs is compared to. */
    unsigned long numChars)	/* Number of Utf16 characters to compare. */
{
    for ( ; numChars != 0; numChars--, ucs++, uct++) {







|


















>
>







1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
 *
 * Side effects:
 *	None.
 *
 *----------------------------------------------------------------------
 */

#if TCL_UTF_MAX > 3
int
Tcl_UniCharNcasecmp(
    const int *ucs,	/* Unicode string to compare to uct. */
    const int *uct,	/* Unicode string ucs is compared to. */
    unsigned long numChars)	/* Number of unichars to compare. */
{
    for ( ; numChars != 0; numChars--, ucs++, uct++) {
	if (*ucs != *uct) {
	    int lcs = Tcl_UniCharToLower(*ucs);
	    int lct = Tcl_UniCharToLower(*uct);

	    if (lcs != lct) {
		return (lcs - lct);
	    }
	}
    }
    return 0;
}
#endif /* TCL_UTF_MAX > 3 */

int
Tcl_Utf16Ncasecmp(
    const unsigned short *ucs,	/* Utf16 string to compare to uct. */
    const unsigned short *uct,	/* Utf16 string ucs is compared to. */
    unsigned long numChars)	/* Number of Utf16 characters to compare. */
{
    for ( ; numChars != 0; numChars--, ucs++, uct++) {
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
 *
 * Side effects:
 *	None.
 *
 *----------------------------------------------------------------------
 */

#undef Tcl_UniCharCaseMatch
int
Tcl_UniCharCaseMatch(
    const int *uniStr,	/* Unicode String. */
    const int *uniPattern,
				/* Pattern, which may contain special
				 * characters. */
    int nocase)			/* 0 for case sensitive, 1 for insensitive */







|







2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
 *
 * Side effects:
 *	None.
 *
 *----------------------------------------------------------------------
 */

#if TCL_UTF_MAX > 3
int
Tcl_UniCharCaseMatch(
    const int *uniStr,	/* Unicode String. */
    const int *uniPattern,
				/* Pattern, which may contain special
				 * characters. */
    int nocase)			/* 0 for case sensitive, 1 for insensitive */
2286
2287
2288
2289
2290
2291
2292

2293
2294
2295
2296
2297
2298
2299
	} else if (*uniStr != *uniPattern) {
	    return 0;
	}
	uniStr++;
	uniPattern++;
    }
}


int
Tcl_Utf16CaseMatch(
    const unsigned short *uniStr,	/* Unicode String. */
    const unsigned short *uniPattern,
				/* Pattern, which may contain special
				 * characters. */







>







2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
	} else if (*uniStr != *uniPattern) {
	    return 0;
	}
	uniStr++;
	uniPattern++;
    }
}
#endif /* TCL_UTF_MAX > 3 */

int
Tcl_Utf16CaseMatch(
    const unsigned short *uniStr,	/* Unicode String. */
    const unsigned short *uniPattern,
				/* Pattern, which may contain special
				 * characters. */