Tcl Source Code

Changes On Branch tip-617
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Changes In Branch tip-617 Excluding Merge-Ins

This is equivalent to a diff from e81990041e to 68ce4c0fb3

2022-02-24
17:13
TIP #617: Tcl_WCharLen/Tcl_Char16Len check-in: 85afe2127e user: jan.nijtmans tags: core-8-branch
2022-02-12
11:49
Merge 8.6 check-in: 842fcfbdfc user: jan.nijtmans tags: core-8-branch
2022-02-11
15:34
Merge 8.7 check-in: b492dbc27a user: jan.nijtmans tags: encodings-with-flags
15:31
Merge 8.7 check-in: ced9151f74 user: jan.nijtmans tags: tip-616-for-8.7
2022-02-10
16:37
Merge 8.7 Closed-Leaf check-in: 68ce4c0fb3 user: jan.nijtmans tags: tip-617
15:23
Merge 8.7 check-in: 32aabe61f6 user: jan.nijtmans tags: trunk, main
15:22
Merge 8.6 check-in: e81990041e user: jan.nijtmans tags: core-8-branch
15:21
Add zlib1.dll for windows-arm64 check-in: 63c17e0fa8 user: jan.nijtmans tags: core-8-6-branch
14:16
Use Tcl_NewWideIntObj() for values that might be bigger than 32-bit check-in: fda1913575 user: jan.nijtmans tags: core-8-branch
2022-02-09
22:14
Merge 8.7 check-in: d55c77d491 user: jan.nijtmans tags: tip-617

Changes to doc/Utf.3.

1
2
3
4
5
6
7
8
9
10
11

12
13
14
15
16
17
18
1
2
3
4
5
6
7
8
9
10

11
12
13
14
15
16
17
18










-
+







'\"
'\" Copyright (c) 1997 Sun Microsystems, Inc.
'\"
'\" See the file "license.terms" for information on usage and redistribution
'\" of this file, and for a DISCLAIMER OF ALL WARRANTIES.
'\"
.TH Utf 3 "8.1" Tcl "Tcl Library Procedures"
.so man.macros
.BS
.SH NAME
Tcl_UniChar, Tcl_UniCharToUtf, Tcl_UtfToUniChar, Tcl_UtfToChar16, Tcl_UtfToWChar, Tcl_UniCharToUtfDString, Tcl_UtfToUniCharDString, Tcl_Char16ToUtfDString, Tcl_UtfToWCharDString, Tcl_UtfToChar16DString, Tcl_UniCharLen, Tcl_UniCharNcmp, Tcl_UniCharNcasecmp, Tcl_UniCharCaseMatch, Tcl_UtfNcmp, Tcl_UtfNcasecmp, Tcl_UtfCharComplete, Tcl_NumUtfChars, Tcl_UtfFindFirst, Tcl_UtfFindLast, Tcl_UtfNext, Tcl_UtfPrev, Tcl_UniCharAtIndex, Tcl_UtfAtIndex, Tcl_UtfBackslash \- routines for manipulating UTF-8 strings
Tcl_UniChar, Tcl_UniCharToUtf, Tcl_UtfToUniChar, Tcl_UtfToChar16, Tcl_UtfToWChar, Tcl_UniCharToUtfDString, Tcl_UtfToUniCharDString, Tcl_Char16ToUtfDString, Tcl_UtfToWCharDString, Tcl_UtfToChar16DString, Tcl_WCharLen, Tcl_Char16Len, Tcl_UniCharLen, Tcl_UniCharNcmp, Tcl_UniCharNcasecmp, Tcl_UniCharCaseMatch, Tcl_UtfNcmp, Tcl_UtfNcasecmp, Tcl_UtfCharComplete, Tcl_NumUtfChars, Tcl_UtfFindFirst, Tcl_UtfFindLast, Tcl_UtfNext, Tcl_UtfPrev, Tcl_UniCharAtIndex, Tcl_UtfAtIndex, Tcl_UtfBackslash \- routines for manipulating UTF-8 strings
.SH SYNOPSIS
.nf
\fB#include <tcl.h>\fR
.sp
typedef ... \fBTcl_UniChar\fR;
.sp
int
41
42
43
44
45
46
47






48
49
50
51
52
53
54
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60







+
+
+
+
+
+







.sp
unsigned short *
\fBTcl_UtfToChar16DString\fR(\fIsrc, length, dsPtr\fR)
.sp
wchar_t *
\fBTcl_UtfToWCharDString\fR(\fIsrc, length, dsPtr\fR)
.sp
int
\fBTcl_Char16Len\fR(\fIuniStr\fR)
.sp
int
\fBTcl_WCharLen\fR(\fIuniStr\fR)
.sp
int
\fBTcl_UniCharLen\fR(\fIuniStr\fR)
.sp
int
\fBTcl_UniCharNcmp\fR(\fIucs, uct, numChars\fR)
.sp
int
194
195
196
197
198
199
200








201
202
203
204
205
206
207
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221







+
+
+
+
+
+
+
+







.QW \-1 ,
in which case \fBTcl_UtfToUniCharDString\fR uses \fBstrlen\fR to
calculate the length.  The return value is a pointer to the Unicode
representation of the UTF-8 string.  Storage for the return value
is appended to the end of the \fBTcl_DString\fR.  The Unicode string
is terminated with a Unicode null character.
.PP
\fBTcl_Char16Len\fR corresponds to \fBstrlen\fR for UTF-16
characters.  It accepts a null-terminated Unicode string and returns
the number of Unicode characters (not bytes) in that string.
.PP
\fBTcl_WCharLen\fR corresponds to \fBstrlen\fR for wchar_t
characters.  It accepts a null-terminated Unicode string and returns
the number of Unicode characters (not bytes) in that string.
.PP
\fBTcl_UniCharLen\fR corresponds to \fBstrlen\fR for Unicode
characters.  It accepts a null-terminated Unicode string and returns
the number of Unicode characters (not bytes) in that string.
.PP
\fBTcl_UniCharNcmp\fR and \fBTcl_UniCharNcasecmp\fR correspond to
\fBstrncmp\fR and \fBstrncasecmp\fR, respectively, for Unicode characters.
They accept two null-terminated Unicode strings and the number of characters

Changes to generic/tcl.decls.

1240
1241
1242
1243
1244
1245
1246
1247
1248


1249
1250
1251
1252
1253
1254
1255
1240
1241
1242
1243
1244
1245
1246


1247
1248
1249
1250
1251
1252
1253
1254
1255







-
-
+
+







}
declare 350 {
    int Tcl_UniCharIsUpper(int ch)
}
declare 351 {
    int Tcl_UniCharIsWordChar(int ch)
}
declare 352 {deprecated {Use Tcl_GetCharLength}} {
    int Tcl_UniCharLen(const Tcl_UniChar *uniStr)
declare 352 {
    int Tcl_Char16Len(const unsigned short *uniStr)
}
declare 353 {deprecated {Use Tcl_UtfNcmp}} {
    int Tcl_UniCharNcmp(const Tcl_UniChar *ucs, const Tcl_UniChar *uct,
	    unsigned long numChars)
}
declare 354 {
    char *Tcl_Char16ToUtfDString(const unsigned short *uniStr,
2437
2438
2439
2440
2441
2442
2443






2444
2445
2446
2447
2448
2449
2450
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456







+
+
+
+
+
+







    int Tcl_UniCharIsUnicode(int ch)
}

# TIP #511
declare 660 {
    int Tcl_AsyncMarkFromSignal(Tcl_AsyncHandler async, int sigNumber)
}

# TIP #617
declare 668 {
    int Tcl_UniCharLen(const int *uniStr)
}


# ----- BASELINE -- FOR -- 8.7.0 ----- #

##############################################################################

# Define the platform specific public Tcl interface. These functions are only
# available on the designated platform.

Changes to generic/tclDecls.h.

1059
1060
1061
1062
1063
1064
1065
1066
1067

1068
1069
1070
1071
1072
1073
1074
1059
1060
1061
1062
1063
1064
1065


1066
1067
1068
1069
1070
1071
1072
1073







-
-
+







/* 349 */
EXTERN int		Tcl_UniCharIsSpace(int ch);
/* 350 */
EXTERN int		Tcl_UniCharIsUpper(int ch);
/* 351 */
EXTERN int		Tcl_UniCharIsWordChar(int ch);
/* 352 */
TCL_DEPRECATED("Use Tcl_GetCharLength")
int			Tcl_UniCharLen(const Tcl_UniChar *uniStr);
EXTERN int		Tcl_Char16Len(const unsigned short *uniStr);
/* 353 */
TCL_DEPRECATED("Use Tcl_UtfNcmp")
int			Tcl_UniCharNcmp(const Tcl_UniChar *ucs,
				const Tcl_UniChar *uct,
				unsigned long numChars);
/* 354 */
EXTERN char *		Tcl_Char16ToUtfDString(const unsigned short *uniStr,
1944
1945
1946
1947
1948
1949
1950









1951
1952
1953
1954
1955
1956
1957
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965







+
+
+
+
+
+
+
+
+







/* 657 */
EXTERN int		Tcl_UniCharIsUnicode(int ch);
/* Slot 658 is reserved */
/* Slot 659 is reserved */
/* 660 */
EXTERN int		Tcl_AsyncMarkFromSignal(Tcl_AsyncHandler async,
				int sigNumber);
/* Slot 661 is reserved */
/* Slot 662 is reserved */
/* Slot 663 is reserved */
/* Slot 664 is reserved */
/* Slot 665 is reserved */
/* Slot 666 is reserved */
/* Slot 667 is reserved */
/* 668 */
EXTERN int		Tcl_UniCharLen(const int *uniStr);

typedef struct {
    const struct TclPlatStubs *tclPlatStubs;
    const struct TclIntStubs *tclIntStubs;
    const struct TclIntPlatStubs *tclIntPlatStubs;
} TclStubHooks;

2331
2332
2333
2334
2335
2336
2337
2338

2339
2340
2341
2342
2343
2344
2345
2339
2340
2341
2342
2343
2344
2345

2346
2347
2348
2349
2350
2351
2352
2353







-
+







    int (*tcl_UniCharIsAlnum) (int ch); /* 345 */
    int (*tcl_UniCharIsAlpha) (int ch); /* 346 */
    int (*tcl_UniCharIsDigit) (int ch); /* 347 */
    int (*tcl_UniCharIsLower) (int ch); /* 348 */
    int (*tcl_UniCharIsSpace) (int ch); /* 349 */
    int (*tcl_UniCharIsUpper) (int ch); /* 350 */
    int (*tcl_UniCharIsWordChar) (int ch); /* 351 */
    TCL_DEPRECATED_API("Use Tcl_GetCharLength") int (*tcl_UniCharLen) (const Tcl_UniChar *uniStr); /* 352 */
    int (*tcl_Char16Len) (const unsigned short *uniStr); /* 352 */
    TCL_DEPRECATED_API("Use Tcl_UtfNcmp") int (*tcl_UniCharNcmp) (const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsigned long numChars); /* 353 */
    char * (*tcl_Char16ToUtfDString) (const unsigned short *uniStr, int uniLength, Tcl_DString *dsPtr); /* 354 */
    unsigned short * (*tcl_UtfToChar16DString) (const char *src, int length, Tcl_DString *dsPtr); /* 355 */
    Tcl_RegExp (*tcl_GetRegExpFromObj) (Tcl_Interp *interp, Tcl_Obj *patObj, int flags); /* 356 */
    TCL_DEPRECATED_API("Use Tcl_EvalTokensStandard") Tcl_Obj * (*tcl_EvalTokens) (Tcl_Interp *interp, Tcl_Token *tokenPtr, int count); /* 357 */
    void (*tcl_FreeParse) (Tcl_Parse *parsePtr); /* 358 */
    void (*tcl_LogCommandInfo) (Tcl_Interp *interp, const char *script, const char *command, int length); /* 359 */
2640
2641
2642
2643
2644
2645
2646








2647
2648
2649
2650
2651
2652
2653
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669







+
+
+
+
+
+
+
+







    int (*tcl_UtfCharComplete) (const char *src, int length); /* 654 */
    const char * (*tcl_UtfNext) (const char *src); /* 655 */
    const char * (*tcl_UtfPrev) (const char *src, const char *start); /* 656 */
    int (*tcl_UniCharIsUnicode) (int ch); /* 657 */
    void (*reserved658)(void);
    void (*reserved659)(void);
    int (*tcl_AsyncMarkFromSignal) (Tcl_AsyncHandler async, int sigNumber); /* 660 */
    void (*reserved661)(void);
    void (*reserved662)(void);
    void (*reserved663)(void);
    void (*reserved664)(void);
    void (*reserved665)(void);
    void (*reserved666)(void);
    void (*reserved667)(void);
    int (*tcl_UniCharLen) (const int *uniStr); /* 668 */
} TclStubs;

extern const TclStubs *tclStubsPtr;

#ifdef __cplusplus
}
#endif
3374
3375
3376
3377
3378
3379
3380
3381
3382


3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396


3397
3398
3399
3400
3401
3402
3403
3404
3405







-
-
+
+







	(tclStubsPtr->tcl_UniCharIsLower) /* 348 */
#define Tcl_UniCharIsSpace \
	(tclStubsPtr->tcl_UniCharIsSpace) /* 349 */
#define Tcl_UniCharIsUpper \
	(tclStubsPtr->tcl_UniCharIsUpper) /* 350 */
#define Tcl_UniCharIsWordChar \
	(tclStubsPtr->tcl_UniCharIsWordChar) /* 351 */
#define Tcl_UniCharLen \
	(tclStubsPtr->tcl_UniCharLen) /* 352 */
#define Tcl_Char16Len \
	(tclStubsPtr->tcl_Char16Len) /* 352 */
#define Tcl_UniCharNcmp \
	(tclStubsPtr->tcl_UniCharNcmp) /* 353 */
#define Tcl_Char16ToUtfDString \
	(tclStubsPtr->tcl_Char16ToUtfDString) /* 354 */
#define Tcl_UtfToChar16DString \
	(tclStubsPtr->tcl_UtfToChar16DString) /* 355 */
#define Tcl_GetRegExpFromObj \
3990
3991
3992
3993
3994
3995
3996









3997
3998
3999
4000
4001
4002
4003
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028







+
+
+
+
+
+
+
+
+







	(tclStubsPtr->tcl_UtfPrev) /* 656 */
#define Tcl_UniCharIsUnicode \
	(tclStubsPtr->tcl_UniCharIsUnicode) /* 657 */
/* Slot 658 is reserved */
/* Slot 659 is reserved */
#define Tcl_AsyncMarkFromSignal \
	(tclStubsPtr->tcl_AsyncMarkFromSignal) /* 660 */
/* Slot 661 is reserved */
/* Slot 662 is reserved */
/* Slot 663 is reserved */
/* Slot 664 is reserved */
/* Slot 665 is reserved */
/* Slot 666 is reserved */
/* Slot 667 is reserved */
#define Tcl_UniCharLen \
	(tclStubsPtr->tcl_UniCharLen) /* 668 */

#endif /* defined(USE_TCL_STUBS) */

/* !END!: Do not edit above this line. */

#undef TclUnusedStubEntry
#if defined(USE_TCL_STUBS)
4256
4257
4258
4259
4260
4261
4262


4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273



4274
4275
4276
4277
4278
4279
4280
4281
4282
4283



4284
4285
4286
4287
4288
4289
4290
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323







+
+











+
+
+










+
+
+







#if TCL_UTF_MAX <= 3
#   undef Tcl_UniCharToUtfDString
#   define Tcl_UniCharToUtfDString Tcl_Char16ToUtfDString
#   undef Tcl_UtfToUniCharDString
#   define Tcl_UtfToUniCharDString Tcl_UtfToChar16DString
#   undef Tcl_UtfToUniChar
#   define Tcl_UtfToUniChar Tcl_UtfToChar16
#   undef Tcl_UniCharLen
#   define Tcl_UniCharLen Tcl_Char16Len
#endif
#if defined(USE_TCL_STUBS)
#   define Tcl_WCharToUtfDString (sizeof(wchar_t) != sizeof(short) \
		? (char *(*)(const wchar_t *, int, Tcl_DString *))tclStubsPtr->tcl_UniCharToUtfDString \
		: (char *(*)(const wchar_t *, int, Tcl_DString *))Tcl_Char16ToUtfDString)
#   define Tcl_UtfToWCharDString (sizeof(wchar_t) != sizeof(short) \
		? (wchar_t *(*)(const char *, int, Tcl_DString *))tclStubsPtr->tcl_UtfToUniCharDString \
		: (wchar_t *(*)(const char *, int, Tcl_DString *))Tcl_UtfToChar16DString)
#   define Tcl_UtfToWChar (sizeof(wchar_t) != sizeof(short) \
		? (int (*)(const char *, wchar_t *))tclStubsPtr->tcl_UtfToUniChar \
		: (int (*)(const char *, wchar_t *))Tcl_UtfToChar16)
#   define Tcl_WCharLen (sizeof(wchar_t) != sizeof(short) \
		? (int (*)(wchar_t *))tclStubsPtr->tcl_UniCharLen \
		: (int (*)(wchar_t *))Tcl_Char16Len)
#else
#   define Tcl_WCharToUtfDString (sizeof(wchar_t) != sizeof(short) \
		? (char *(*)(const wchar_t *, int, Tcl_DString *))Tcl_UniCharToUtfDString \
		: (char *(*)(const wchar_t *, int, Tcl_DString *))Tcl_Char16ToUtfDString)
#   define Tcl_UtfToWCharDString (sizeof(wchar_t) != sizeof(short) \
		? (wchar_t *(*)(const char *, int, Tcl_DString *))Tcl_UtfToUniCharDString \
		: (wchar_t *(*)(const char *, int, Tcl_DString *))Tcl_UtfToChar16DString)
#   define Tcl_UtfToWChar (sizeof(wchar_t) != sizeof(short) \
		? (int (*)(const char *, wchar_t *))Tcl_UtfToUniChar \
		: (int (*)(const char *, wchar_t *))Tcl_UtfToChar16)
#   define Tcl_WCharLen (sizeof(wchar_t) != sizeof(short) \
		? (int (*)(wchar_t *))Tcl_UniCharLen \
		: (int (*)(wchar_t *))Tcl_Char16Len)
#endif

/*
 * Deprecated Tcl procedures:
 */

#undef Tcl_EvalObj

Changes to generic/tclStubInit.c.

1631
1632
1633
1634
1635
1636
1637
1638

1639
1640
1641
1642
1643
1644
1645
1631
1632
1633
1634
1635
1636
1637

1638
1639
1640
1641
1642
1643
1644
1645







-
+







    Tcl_UniCharIsAlnum, /* 345 */
    Tcl_UniCharIsAlpha, /* 346 */
    Tcl_UniCharIsDigit, /* 347 */
    Tcl_UniCharIsLower, /* 348 */
    Tcl_UniCharIsSpace, /* 349 */
    Tcl_UniCharIsUpper, /* 350 */
    Tcl_UniCharIsWordChar, /* 351 */
    Tcl_UniCharLen, /* 352 */
    Tcl_Char16Len, /* 352 */
    Tcl_UniCharNcmp, /* 353 */
    Tcl_Char16ToUtfDString, /* 354 */
    Tcl_UtfToChar16DString, /* 355 */
    Tcl_GetRegExpFromObj, /* 356 */
    Tcl_EvalTokens, /* 357 */
    Tcl_FreeParse, /* 358 */
    Tcl_LogCommandInfo, /* 359 */
1940
1941
1942
1943
1944
1945
1946








1947
1948
1949
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957







+
+
+
+
+
+
+
+



    Tcl_UtfCharComplete, /* 654 */
    Tcl_UtfNext, /* 655 */
    Tcl_UtfPrev, /* 656 */
    Tcl_UniCharIsUnicode, /* 657 */
    0, /* 658 */
    0, /* 659 */
    Tcl_AsyncMarkFromSignal, /* 660 */
    0, /* 661 */
    0, /* 662 */
    0, /* 663 */
    0, /* 664 */
    0, /* 665 */
    0, /* 666 */
    0, /* 667 */
    Tcl_UniCharLen, /* 668 */
};

/* !END!: Do not edit above this line. */

Changes to generic/tclUtf.c.

1769
1770
1771
1772
1773
1774
1775






























1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789

1790
1791
1792

1793
1794
1795
1796
1797
1798
1799
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822

1823
1824
1825
1826
1827
1828
1829
1830







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+














+


-
+







    /* Clear away extension bits, if any */
    return ch & 0x1FFFFF;
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_Char16Len --
 *
 *	Find the length of a UniChar string. The str input must be null
 *	terminated.
 *
 * Results:
 *	Returns the length of str in UniChars (not bytes).
 *
 * Side effects:
 *	None.
 *
 *----------------------------------------------------------------------
 */

int
Tcl_Char16Len(
    const unsigned short *uniStr)	/* Unicode string to find length of. */
{
    int len = 0;

    while (*uniStr != '\0') {
	len++;
	uniStr++;
    }
    return len;
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_UniCharLen --
 *
 *	Find the length of a UniChar string. The str input must be null
 *	terminated.
 *
 * Results:
 *	Returns the length of str in UniChars (not bytes).
 *
 * Side effects:
 *	None.
 *
 *----------------------------------------------------------------------
 */

#undef Tcl_UniCharLen
int
Tcl_UniCharLen(
    const Tcl_UniChar *uniStr)	/* Unicode string to find length of. */
    const int *uniStr)	/* Unicode string to find length of. */
{
    int len = 0;

    while (*uniStr != '\0') {
	len++;
	uniStr++;
    }