Tcl Source Code

Changes On Branch tip-685
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Changes In Branch tip-685 Excluding Merge-Ins

This is equivalent to a diff from cb3bf7771b to 3457ff04da

2024-01-11
08:07
Few minor implementation fixes (e.g. "transferable" < "true") Closed-Leaf check-in: 3457ff04da user: jan.nijtmans tags: tip-685
2024-01-10
21:01
Fix [4e38c347a4] Changed contract for Tcl_UtfN(case)cmp in Tcl 8.7 check-in: 45db2932ba user: jan.nijtmans tags: core-8-branch
12:41
TIP 685 implementation: rename "string is unicode" to "string is transferable". Also rename underlyi... check-in: 5018317bb2 user: oehhar tags: tip-685
2024-01-09
22:29
Merge 8.7 check-in: 2d14dee73e user: jan.nijtmans tags: trunk, main
12:15
Merge 8.6 check-in: cb3bf7771b user: jan.nijtmans tags: core-8-branch
12:02
Optimize use of $fullutf variable check-in: ceaac40e80 user: jan.nijtmans tags: core-8-6-branch
2024-01-08
13:23
Merge 8.6. Add (back) special Tcl_GetLongFromObj handling for Cygwin64 stub-table check-in: c8a34a084b user: jan.nijtmans tags: core-8-branch

Changes to doc/UniCharIsAlpha.3.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
'\"
'\" Copyright (c) 1997 Sun Microsystems, Inc.
'\"
'\" See the file "license.terms" for information on usage and redistribution
'\" of this file, and for a DISCLAIMER OF ALL WARRANTIES.
'\"
.TH Tcl_UniCharIsAlpha 3 "8.1" Tcl "Tcl Library Procedures"
.so man.macros
.BS
.SH NAME
Tcl_UniCharIsAlnum, Tcl_UniCharIsAlpha, Tcl_UniCharIsControl, Tcl_UniCharIsDigit, Tcl_UniCharIsGraph, Tcl_UniCharIsLower, Tcl_UniCharIsPrint, Tcl_UniCharIsPunct, Tcl_UniCharIsSpace, Tcl_UniCharIsUpper, Tcl_UniCharIsUnicode, Tcl_UniCharIsWordChar \- routines for classification of Tcl_UniChar characters
.SH SYNOPSIS
.nf
\fB#include <tcl.h>\fR
.sp
int
\fBTcl_UniCharIsAlnum\fR(\fIch\fR)
.sp










|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
'\"
'\" Copyright (c) 1997 Sun Microsystems, Inc.
'\"
'\" See the file "license.terms" for information on usage and redistribution
'\" of this file, and for a DISCLAIMER OF ALL WARRANTIES.
'\"
.TH Tcl_UniCharIsAlpha 3 "8.1" Tcl "Tcl Library Procedures"
.so man.macros
.BS
.SH NAME
Tcl_UniCharIsAlnum, Tcl_UniCharIsAlpha, Tcl_UniCharIsControl, Tcl_UniCharIsDigit, Tcl_UniCharIsGraph, Tcl_UniCharIsLower, Tcl_UniCharIsPrint, Tcl_UniCharIsPunct, Tcl_UniCharIsSpace, Tcl_UniCharIsTransferable, Tcl_UniCharIsUpper, Tcl_UniCharIsWordChar \- routines for classification of Tcl_UniChar characters
.SH SYNOPSIS
.nf
\fB#include <tcl.h>\fR
.sp
int
\fBTcl_UniCharIsAlnum\fR(\fIch\fR)
.sp
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
int
\fBTcl_UniCharIsPunct\fR(\fIch\fR)
.sp
int
\fBTcl_UniCharIsSpace\fR(\fIch\fR)
.sp
int
\fBTcl_UniCharIsUpper\fR(\fIch\fR)
.sp
int
\fBTcl_UniCharIsUnicode\fR(\fIch\fR)
.sp
int
\fBTcl_UniCharIsWordChar\fR(\fIch\fR)
.SH ARGUMENTS
.AS int ch
.AP int ch in
The Unicode character to be examined.







|


|







37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
int
\fBTcl_UniCharIsPunct\fR(\fIch\fR)
.sp
int
\fBTcl_UniCharIsSpace\fR(\fIch\fR)
.sp
int
\fBTcl_UniCharIsTransferable\fR(\fIch\fR)
.sp
int
\fBTcl_UniCharIsUpper\fR(\fIch\fR)
.sp
int
\fBTcl_UniCharIsWordChar\fR(\fIch\fR)
.SH ARGUMENTS
.AS int ch
.AP int ch in
The Unicode character to be examined.
78
79
80
81
82
83
84
85

86
87
88
89
90
91
92
93
94
.PP
\fBTcl_UniCharIsPrint\fR tests if the character is a Unicode print character.
.PP
\fBTcl_UniCharIsPunct\fR tests if the character is a Unicode punctuation character.
.PP
\fBTcl_UniCharIsSpace\fR tests if the character is a whitespace Unicode character.
.PP
\fBTcl_UniCharIsUpper\fR tests if the character is an uppercase Unicode character.

.PP
\fBTcl_UniCharIsUnicode\fR tests if the character is a Unicode character, not being
a surrogate or noncharacter.
.PP
\fBTcl_UniCharIsWordChar\fR tests if the character is alphanumeric or
a connector punctuation mark.

.SH KEYWORDS
unicode, classification







|
>

|
<






78
79
80
81
82
83
84
85
86
87
88

89
90
91
92
93
94
.PP
\fBTcl_UniCharIsPrint\fR tests if the character is a Unicode print character.
.PP
\fBTcl_UniCharIsPunct\fR tests if the character is a Unicode punctuation character.
.PP
\fBTcl_UniCharIsSpace\fR tests if the character is a whitespace Unicode character.
.PP
\fBTcl_UniCharIsTransferable\fR tests if the character is a Unicode character, not being
a surrogate or noncharacter.
.PP
\fBTcl_UniCharIsUpper\fR tests if the character is an uppercase Unicode character.

.PP
\fBTcl_UniCharIsWordChar\fR tests if the character is alphanumeric or
a connector punctuation mark.

.SH KEYWORDS
unicode, classification

Changes to doc/string.n.

170
171
172
173
174
175
176
177
178

179
180
181
182
183
184
185
.IP \fBspace\fR 12
Any Unicode whitespace character, mongolian vowel separator
(U+180e), zero width space (U+200b), word joiner (U+2060) or
zero width no-break space (U+feff) (=BOM).
.IP \fBtrue\fR 12
Any of the forms allowed to \fBTcl_GetBoolean\fR where the value is
true.
.IP \fBunicode\fR 12
Any Unicode character, except surrogates and noncharacters

.IP \fBupper\fR 12
Any upper case alphabet character in the Unicode character set.
.IP \fBwideinteger\fR 12
Any of the valid forms for a wide integer in Tcl, with optional
surrounding whitespace.  In case of overflow in the value, 0 is
returned and the \fIvarname\fR will contain \-1.
.IP \fBwordchar\fR 12







|
|
>







170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
.IP \fBspace\fR 12
Any Unicode whitespace character, mongolian vowel separator
(U+180e), zero width space (U+200b), word joiner (U+2060) or
zero width no-break space (U+feff) (=BOM).
.IP \fBtrue\fR 12
Any of the forms allowed to \fBTcl_GetBoolean\fR where the value is
true.
.IP \fBtransferable\fR 12
Any Unicode character, except surrogates and noncharacters.
The purpose is to check, if a string may be transfered to an external program using an unicode encoding.
.IP \fBupper\fR 12
Any upper case alphabet character in the Unicode character set.
.IP \fBwideinteger\fR 12
Any of the valid forms for a wide integer in Tcl, with optional
surrounding whitespace.  In case of overflow in the value, 0 is
returned and the \fIvarname\fR will contain \-1.
.IP \fBwordchar\fR 12

Changes to generic/tcl.decls.

2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
declare 655 {
    const char *Tcl_UtfNext(const char *src)
}
declare 656 {
    const char *Tcl_UtfPrev(const char *src, const char *start)
}
declare 657 {
    int Tcl_UniCharIsUnicode(int ch)
}

# TIP 656
declare 658 {
    int Tcl_ExternalToUtfDStringEx(Tcl_Interp *interp, Tcl_Encoding encoding,
        const char *src, Tcl_Size srcLen, int flags, Tcl_DString *dsPtr,
        Tcl_Size *errorLocationPtr)







|







2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
declare 655 {
    const char *Tcl_UtfNext(const char *src)
}
declare 656 {
    const char *Tcl_UtfPrev(const char *src, const char *start)
}
declare 657 {
    int Tcl_UniCharIsTransferable(int ch)
}

# TIP 656
declare 658 {
    int Tcl_ExternalToUtfDStringEx(Tcl_Interp *interp, Tcl_Encoding encoding,
        const char *src, Tcl_Size srcLen, int flags, Tcl_DString *dsPtr,
        Tcl_Size *errorLocationPtr)

Changes to generic/tclCmdMZ.c.

1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
    Tcl_WideInt w;

    static const char *const isClasses[] = {
	"alnum",	"alpha",	"ascii",	"control",
	"boolean",	"dict",		"digit",	"double",
	"entier",	"false",	"graph",	"integer",
	"list",		"lower",	"print",	"punct",
	"space",	"true",		"upper",	"unicode",
	"wideinteger", "wordchar",	"xdigit",	NULL
    };
    enum isClassesEnum {
	STR_IS_ALNUM,	STR_IS_ALPHA,	STR_IS_ASCII,	STR_IS_CONTROL,
	STR_IS_BOOL,	STR_IS_DICT,	STR_IS_DIGIT,	STR_IS_DOUBLE,
	STR_IS_ENTIER,	STR_IS_FALSE,	STR_IS_GRAPH,	STR_IS_INT,
	STR_IS_LIST,	STR_IS_LOWER,	STR_IS_PRINT,	STR_IS_PUNCT,
	STR_IS_SPACE,	STR_IS_TRUE,	STR_IS_UPPER,	STR_IS_UNICODE,
	STR_IS_WIDE,	STR_IS_WORD,	STR_IS_XDIGIT
    };
    static const char *const isOptions[] = {
	"-strict", "-failindex", NULL
    };
    enum isOptionsEnum {
	OPT_STRICT, OPT_FAILIDX







|







|







1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
    Tcl_WideInt w;

    static const char *const isClasses[] = {
	"alnum",	"alpha",	"ascii",	"control",
	"boolean",	"dict",		"digit",	"double",
	"entier",	"false",	"graph",	"integer",
	"list",		"lower",	"print",	"punct",
	"space",	"transferable",	"true",		"upper",
	"wideinteger", "wordchar",	"xdigit",	NULL
    };
    enum isClassesEnum {
	STR_IS_ALNUM,	STR_IS_ALPHA,	STR_IS_ASCII,	STR_IS_CONTROL,
	STR_IS_BOOL,	STR_IS_DICT,	STR_IS_DIGIT,	STR_IS_DOUBLE,
	STR_IS_ENTIER,	STR_IS_FALSE,	STR_IS_GRAPH,	STR_IS_INT,
	STR_IS_LIST,	STR_IS_LOWER,	STR_IS_PRINT,	STR_IS_PUNCT,
	STR_IS_SPACE,	STR_IS_TRANSFERABLE,	STR_IS_TRUE,	STR_IS_UPPER,
	STR_IS_WIDE,	STR_IS_WORD,	STR_IS_XDIGIT
    };
    static const char *const isOptions[] = {
	"-strict", "-failindex", NULL
    };
    enum isOptionsEnum {
	OPT_STRICT, OPT_FAILIDX
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
	break;
    case STR_IS_SPACE:
	chcomp = Tcl_UniCharIsSpace;
	break;
    case STR_IS_UPPER:
	chcomp = Tcl_UniCharIsUpper;
	break;
    case STR_IS_UNICODE:
	chcomp = Tcl_UniCharIsUnicode;
	break;
    case STR_IS_WORD:
	chcomp = Tcl_UniCharIsWordChar;
	break;
    case STR_IS_XDIGIT:
	chcomp = UniCharIsHexDigit;
	break;







|
|







1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
	break;
    case STR_IS_SPACE:
	chcomp = Tcl_UniCharIsSpace;
	break;
    case STR_IS_UPPER:
	chcomp = Tcl_UniCharIsUpper;
	break;
    case STR_IS_TRANSFERABLE:
	chcomp = Tcl_UniCharIsTransferable;
	break;
    case STR_IS_WORD:
	chcomp = Tcl_UniCharIsWordChar;
	break;
    case STR_IS_XDIGIT:
	chcomp = UniCharIsHexDigit;
	break;

Changes to generic/tclCompCmdsSZ.c.

504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
    DefineLineInformation;	/* TIP #280 */
    Tcl_Token *tokenPtr = TokenAfter(parsePtr->tokenPtr);
    static const char *const isClasses[] = {
	"alnum",	"alpha",	"ascii",	"control",
	"boolean",	"dict",		"digit",	"double",
	"entier",	"false",	"graph",	"integer",
	"list",		"lower",	"print",	"punct",
	"space",	"true",		"upper",	"unicode",
	"wideinteger", "wordchar",	"xdigit",	NULL
    };
    enum isClassesEnum {
	STR_IS_ALNUM,	STR_IS_ALPHA,	STR_IS_ASCII,	STR_IS_CONTROL,
	STR_IS_BOOL,	STR_IS_DICT,	STR_IS_DIGIT,	STR_IS_DOUBLE,
	STR_IS_ENTIER,	STR_IS_FALSE,	STR_IS_GRAPH,	STR_IS_INT,
	STR_IS_LIST,	STR_IS_LOWER,	STR_IS_PRINT,	STR_IS_PUNCT,
	STR_IS_SPACE,	STR_IS_TRUE,	STR_IS_UPPER,	STR_IS_UNICODE,
	STR_IS_WIDE,	STR_IS_WORD,	STR_IS_XDIGIT
    };
    int t, range, allowEmpty = 0, end;
    InstStringClassType strClassType;
    Tcl_Obj *isClass;

    if (parsePtr->numWords < 3 || parsePtr->numWords > 6) {







|







|







504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
    DefineLineInformation;	/* TIP #280 */
    Tcl_Token *tokenPtr = TokenAfter(parsePtr->tokenPtr);
    static const char *const isClasses[] = {
	"alnum",	"alpha",	"ascii",	"control",
	"boolean",	"dict",		"digit",	"double",
	"entier",	"false",	"graph",	"integer",
	"list",		"lower",	"print",	"punct",
	"space",	"transferable",	"true",		"upper",
	"wideinteger", "wordchar",	"xdigit",	NULL
    };
    enum isClassesEnum {
	STR_IS_ALNUM,	STR_IS_ALPHA,	STR_IS_ASCII,	STR_IS_CONTROL,
	STR_IS_BOOL,	STR_IS_DICT,	STR_IS_DIGIT,	STR_IS_DOUBLE,
	STR_IS_ENTIER,	STR_IS_FALSE,	STR_IS_GRAPH,	STR_IS_INT,
	STR_IS_LIST,	STR_IS_LOWER,	STR_IS_PRINT,	STR_IS_PUNCT,
	STR_IS_SPACE,	STR_IS_TRANSFERABLE,	STR_IS_TRUE,	STR_IS_UPPER,
	STR_IS_WIDE,	STR_IS_WORD,	STR_IS_XDIGIT
    };
    int t, range, allowEmpty = 0, end;
    InstStringClassType strClassType;
    Tcl_Obj *isClass;

    if (parsePtr->numWords < 3 || parsePtr->numWords > 6) {
602
603
604
605
606
607
608



609
610
611
612
613
614
615
616
617
618
619
620
621
	goto compileStrClass;
    case STR_IS_PUNCT:
	strClassType = STR_CLASS_PUNCT;
	goto compileStrClass;
    case STR_IS_SPACE:
	strClassType = STR_CLASS_SPACE;
	goto compileStrClass;



    case STR_IS_UPPER:
	strClassType = STR_CLASS_UPPER;
	goto compileStrClass;
    case STR_IS_UNICODE:
	strClassType = STR_CLASS_UNICODE;
	goto compileStrClass;
    case STR_IS_WORD:
	strClassType = STR_CLASS_WORD;
	goto compileStrClass;
    case STR_IS_XDIGIT:
	strClassType = STR_CLASS_XDIGIT;
    compileStrClass:
	if (allowEmpty) {







>
>
>



<
<
<







602
603
604
605
606
607
608
609
610
611
612
613
614



615
616
617
618
619
620
621
	goto compileStrClass;
    case STR_IS_PUNCT:
	strClassType = STR_CLASS_PUNCT;
	goto compileStrClass;
    case STR_IS_SPACE:
	strClassType = STR_CLASS_SPACE;
	goto compileStrClass;
    case STR_IS_TRANSFERABLE:
	strClassType = STR_CLASS_TRANSFERABLE;
	goto compileStrClass;
    case STR_IS_UPPER:
	strClassType = STR_CLASS_UPPER;
	goto compileStrClass;



    case STR_IS_WORD:
	strClassType = STR_CLASS_WORD;
	goto compileStrClass;
    case STR_IS_XDIGIT:
	strClassType = STR_CLASS_XDIGIT;
    compileStrClass:
	if (allowEmpty) {
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
    {"lower",	Tcl_UniCharIsLower},
    {"print",	Tcl_UniCharIsPrint},
    {"punct",	Tcl_UniCharIsPunct},
    {"space",	Tcl_UniCharIsSpace},
    {"upper",	Tcl_UniCharIsUpper},
    {"word",	Tcl_UniCharIsWordChar},
    {"xdigit",	UniCharIsHexDigit},
    {"unicode",	Tcl_UniCharIsUnicode},
    {"",	NULL}
};

/*
 *----------------------------------------------------------------------
 *
 * TclCompileSubstCmd --







|







1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
    {"lower",	Tcl_UniCharIsLower},
    {"print",	Tcl_UniCharIsPrint},
    {"punct",	Tcl_UniCharIsPunct},
    {"space",	Tcl_UniCharIsSpace},
    {"upper",	Tcl_UniCharIsUpper},
    {"word",	Tcl_UniCharIsWordChar},
    {"xdigit",	UniCharIsHexDigit},
    {"transferable",	Tcl_UniCharIsTransferable},
    {"",	NULL}
};

/*
 *----------------------------------------------------------------------
 *
 * TclCompileSubstCmd --

Changes to generic/tclCompile.h.

922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
    STR_CLASS_PUNCT,		/* Unicode punctuation characters. */
    STR_CLASS_SPACE,		/* Unicode space characters. */
    STR_CLASS_UPPER,		/* Unicode upper-case alphabet characters. */
    STR_CLASS_WORD,		/* Unicode word (alphabetic, digit, connector
				 * punctuation) characters. */
    STR_CLASS_XDIGIT,		/* Characters that can be used as digits in
				 * hexadecimal numbers ([0-9A-Fa-f]). */
    STR_CLASS_UNICODE		/* Unicode characters. */
} InstStringClassType;

typedef struct StringClassDesc {
    char name[8];		/* Name of the class. */
    int (*comparator)(int);	/* Function to test if a single unicode
				 * character is a member of the class. */
} StringClassDesc;

MODULE_SCOPE StringClassDesc const tclStringClassTable[];

/*







|



|







922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
    STR_CLASS_PUNCT,		/* Unicode punctuation characters. */
    STR_CLASS_SPACE,		/* Unicode space characters. */
    STR_CLASS_UPPER,		/* Unicode upper-case alphabet characters. */
    STR_CLASS_WORD,		/* Unicode word (alphabetic, digit, connector
				 * punctuation) characters. */
    STR_CLASS_XDIGIT,		/* Characters that can be used as digits in
				 * hexadecimal numbers ([0-9A-Fa-f]). */
    STR_CLASS_TRANSFERABLE	/* Transferable characters. */
} InstStringClassType;

typedef struct StringClassDesc {
    char name[16];		/* Name of the class. */
    int (*comparator)(int);	/* Function to test if a single unicode
				 * character is a member of the class. */
} StringClassDesc;

MODULE_SCOPE StringClassDesc const tclStringClassTable[];

/*

Changes to generic/tclDecls.h.

1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
/* 654 */
EXTERN int		Tcl_UtfCharComplete(const char *src, Tcl_Size length);
/* 655 */
EXTERN const char *	Tcl_UtfNext(const char *src);
/* 656 */
EXTERN const char *	Tcl_UtfPrev(const char *src, const char *start);
/* 657 */
EXTERN int		Tcl_UniCharIsUnicode(int ch);
/* 658 */
EXTERN int		Tcl_ExternalToUtfDStringEx(Tcl_Interp *interp,
				Tcl_Encoding encoding, const char *src,
				Tcl_Size srcLen, int flags,
				Tcl_DString *dsPtr,
				Tcl_Size *errorLocationPtr);
/* 659 */







|







1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
/* 654 */
EXTERN int		Tcl_UtfCharComplete(const char *src, Tcl_Size length);
/* 655 */
EXTERN const char *	Tcl_UtfNext(const char *src);
/* 656 */
EXTERN const char *	Tcl_UtfPrev(const char *src, const char *start);
/* 657 */
EXTERN int		Tcl_UniCharIsTransferable(int ch);
/* 658 */
EXTERN int		Tcl_ExternalToUtfDStringEx(Tcl_Interp *interp,
				Tcl_Encoding encoding, const char *src,
				Tcl_Size srcLen, int flags,
				Tcl_DString *dsPtr,
				Tcl_Size *errorLocationPtr);
/* 659 */
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
    void (*reserved650)(void);
    void (*reserved651)(void);
    void (*reserved652)(void);
    void (*reserved653)(void);
    int (*tcl_UtfCharComplete) (const char *src, Tcl_Size length); /* 654 */
    const char * (*tcl_UtfNext) (const char *src); /* 655 */
    const char * (*tcl_UtfPrev) (const char *src, const char *start); /* 656 */
    int (*tcl_UniCharIsUnicode) (int ch); /* 657 */
    int (*tcl_ExternalToUtfDStringEx) (Tcl_Interp *interp, Tcl_Encoding encoding, const char *src, Tcl_Size srcLen, int flags, Tcl_DString *dsPtr, Tcl_Size *errorLocationPtr); /* 658 */
    int (*tcl_UtfToExternalDStringEx) (Tcl_Interp *interp, Tcl_Encoding encoding, const char *src, Tcl_Size srcLen, int flags, Tcl_DString *dsPtr, Tcl_Size *errorLocationPtr); /* 659 */
    int (*tcl_AsyncMarkFromSignal) (Tcl_AsyncHandler async, int sigNumber); /* 660 */
    void (*reserved661)(void);
    void (*reserved662)(void);
    void (*reserved663)(void);
    void (*reserved664)(void);







|







2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
    void (*reserved650)(void);
    void (*reserved651)(void);
    void (*reserved652)(void);
    void (*reserved653)(void);
    int (*tcl_UtfCharComplete) (const char *src, Tcl_Size length); /* 654 */
    const char * (*tcl_UtfNext) (const char *src); /* 655 */
    const char * (*tcl_UtfPrev) (const char *src, const char *start); /* 656 */
    int (*tcl_UniCharIsTransferable) (int ch); /* 657 */
    int (*tcl_ExternalToUtfDStringEx) (Tcl_Interp *interp, Tcl_Encoding encoding, const char *src, Tcl_Size srcLen, int flags, Tcl_DString *dsPtr, Tcl_Size *errorLocationPtr); /* 658 */
    int (*tcl_UtfToExternalDStringEx) (Tcl_Interp *interp, Tcl_Encoding encoding, const char *src, Tcl_Size srcLen, int flags, Tcl_DString *dsPtr, Tcl_Size *errorLocationPtr); /* 659 */
    int (*tcl_AsyncMarkFromSignal) (Tcl_AsyncHandler async, int sigNumber); /* 660 */
    void (*reserved661)(void);
    void (*reserved662)(void);
    void (*reserved663)(void);
    void (*reserved664)(void);
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
/* Slot 653 is reserved */
#define Tcl_UtfCharComplete \
	(tclStubsPtr->tcl_UtfCharComplete) /* 654 */
#define Tcl_UtfNext \
	(tclStubsPtr->tcl_UtfNext) /* 655 */
#define Tcl_UtfPrev \
	(tclStubsPtr->tcl_UtfPrev) /* 656 */
#define Tcl_UniCharIsUnicode \
	(tclStubsPtr->tcl_UniCharIsUnicode) /* 657 */
#define Tcl_ExternalToUtfDStringEx \
	(tclStubsPtr->tcl_ExternalToUtfDStringEx) /* 658 */
#define Tcl_UtfToExternalDStringEx \
	(tclStubsPtr->tcl_UtfToExternalDStringEx) /* 659 */
#define Tcl_AsyncMarkFromSignal \
	(tclStubsPtr->tcl_AsyncMarkFromSignal) /* 660 */
/* Slot 661 is reserved */







|
|







4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
/* Slot 653 is reserved */
#define Tcl_UtfCharComplete \
	(tclStubsPtr->tcl_UtfCharComplete) /* 654 */
#define Tcl_UtfNext \
	(tclStubsPtr->tcl_UtfNext) /* 655 */
#define Tcl_UtfPrev \
	(tclStubsPtr->tcl_UtfPrev) /* 656 */
#define Tcl_UniCharIsTransferable \
	(tclStubsPtr->tcl_UniCharIsTransferable) /* 657 */
#define Tcl_ExternalToUtfDStringEx \
	(tclStubsPtr->tcl_ExternalToUtfDStringEx) /* 658 */
#define Tcl_UtfToExternalDStringEx \
	(tclStubsPtr->tcl_UtfToExternalDStringEx) /* 659 */
#define Tcl_AsyncMarkFromSignal \
	(tclStubsPtr->tcl_AsyncMarkFromSignal) /* 660 */
/* Slot 661 is reserved */

Changes to generic/tclStubInit.c.

1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
    0, /* 650 */
    0, /* 651 */
    0, /* 652 */
    0, /* 653 */
    Tcl_UtfCharComplete, /* 654 */
    Tcl_UtfNext, /* 655 */
    Tcl_UtfPrev, /* 656 */
    Tcl_UniCharIsUnicode, /* 657 */
    Tcl_ExternalToUtfDStringEx, /* 658 */
    Tcl_UtfToExternalDStringEx, /* 659 */
    Tcl_AsyncMarkFromSignal, /* 660 */
    0, /* 661 */
    0, /* 662 */
    0, /* 663 */
    0, /* 664 */







|







1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
    0, /* 650 */
    0, /* 651 */
    0, /* 652 */
    0, /* 653 */
    Tcl_UtfCharComplete, /* 654 */
    Tcl_UtfNext, /* 655 */
    Tcl_UtfPrev, /* 656 */
    Tcl_UniCharIsTransferable, /* 657 */
    Tcl_ExternalToUtfDStringEx, /* 658 */
    Tcl_UtfToExternalDStringEx, /* 659 */
    Tcl_AsyncMarkFromSignal, /* 660 */
    0, /* 661 */
    0, /* 662 */
    0, /* 663 */
    0, /* 664 */

Changes to generic/tclUtf.c.

2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
    }
    return (GetCategory(ch) == UPPERCASE_LETTER);
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_UniCharIsUnicode --
 *
 *	Test if a character is a Unicode character.
 *
 * Results:
 *	Returns non-zero if character belongs to the Unicode set.
 *
 *	Excluded are:
 *	  1) All characters > U+10FFFF
 *	  2) Surrogates U+D800 - U+DFFF
 *	  3) Last 2 characters of each plane, so U+??FFFE  and U+??FFFF
 *	  4) The characters in the range U+FDD0 - U+FDEF
 *
 * Side effects:
 *	None.
 *
 *----------------------------------------------------------------------
 */

int
Tcl_UniCharIsUnicode(
    int ch)			/* Unicode character to test. */
{
    return ((unsigned int)ch <= 0x10FFFF) && ((ch & 0xFFF800) != 0xD800)
	    && ((ch & 0xFFFE) != 0xFFFE) && ((unsigned int)(ch - 0xFDD0) >= 32);
}

/*







|

|

















|







2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
    }
    return (GetCategory(ch) == UPPERCASE_LETTER);
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_UniCharIsTransferable --
 *
 *	Test if a character is a transferable Unicode character.
 *
 * Results:
 *	Returns non-zero if character belongs to the Unicode set.
 *
 *	Excluded are:
 *	  1) All characters > U+10FFFF
 *	  2) Surrogates U+D800 - U+DFFF
 *	  3) Last 2 characters of each plane, so U+??FFFE  and U+??FFFF
 *	  4) The characters in the range U+FDD0 - U+FDEF
 *
 * Side effects:
 *	None.
 *
 *----------------------------------------------------------------------
 */

int
Tcl_UniCharIsTransferable(
    int ch)			/* Unicode character to test. */
{
    return ((unsigned int)ch <= 0x10FFFF) && ((ch & 0xFFF800) != 0xD800)
	    && ((ch & 0xFFFE) != 0xFFFE) && ((unsigned int)(ch - 0xFDD0) >= 32);
}

/*

Changes to tests/string.test.

534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
    list [catch {run {string is alpha -failin str}} msg] $msg
} {1 {wrong # args: should be "string is alpha ?-strict? ?-failindex var? str"}}
test string-6.4.$noComp {string is, too many args} {
    list [catch {run {string is alpha -failin var -strict str more}} msg] $msg
} {1 {wrong # args: should be "string is class ?-strict? ?-failindex var? str"}}
test string-6.5.$noComp {string is, class check} {
    list [catch {run {string is bogus str}} msg] $msg
} {1 {bad class "bogus": must be alnum, alpha, ascii, control, boolean, dict, digit, double, entier, false, graph, integer, list, lower, print, punct, space, true, upper, unicode, wideinteger, wordchar, or xdigit}}
test string-6.6.$noComp {string is, ambiguous class} {
    list [catch {run {string is al str}} msg] $msg
} {1 {ambiguous class "al": must be alnum, alpha, ascii, control, boolean, dict, digit, double, entier, false, graph, integer, list, lower, print, punct, space, true, upper, unicode, wideinteger, wordchar, or xdigit}}
test string-6.7.$noComp {string is alpha, all ok} {
    run {string is alpha -strict -failindex var abc}
} 1
test string-6.8.$noComp {string is, error in var} {
    list [run {string is alpha -failindex var abc5def}] $var
} {0 3}
test string-6.9.$noComp {string is, var shouldn't get set} {







|


|







534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
    list [catch {run {string is alpha -failin str}} msg] $msg
} {1 {wrong # args: should be "string is alpha ?-strict? ?-failindex var? str"}}
test string-6.4.$noComp {string is, too many args} {
    list [catch {run {string is alpha -failin var -strict str more}} msg] $msg
} {1 {wrong # args: should be "string is class ?-strict? ?-failindex var? str"}}
test string-6.5.$noComp {string is, class check} {
    list [catch {run {string is bogus str}} msg] $msg
} {1 {bad class "bogus": must be alnum, alpha, ascii, control, boolean, dict, digit, double, entier, false, graph, integer, list, lower, print, punct, space, transferable, true, upper, wideinteger, wordchar, or xdigit}}
test string-6.6.$noComp {string is, ambiguous class} {
    list [catch {run {string is al str}} msg] $msg
} {1 {ambiguous class "al": must be alnum, alpha, ascii, control, boolean, dict, digit, double, entier, false, graph, integer, list, lower, print, punct, space, transferable, true, upper, wideinteger, wordchar, or xdigit}}
test string-6.7.$noComp {string is alpha, all ok} {
    run {string is alpha -strict -failindex var abc}
} 1
test string-6.8.$noComp {string is, error in var} {
    list [run {string is alpha -failindex var abc5def}] $var
} {0 3}
test string-6.9.$noComp {string is, var shouldn't get set} {
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
} {0 87}
test string-6.130.1.$noComp {string is entier, false on bad octal} {
    list [run {string is entier -fail var 0o1234561123412345612345656234561234561234561234561234561234561234561234561234561234536963}] $var
} {0 87}
test string-6.131.$noComp {string is entier, false on bad hex} {
    list [run {string is entier -fail var 0X12345611234123456123456562345612345612345612345612345612345612345612345612345612345345XYZ}] $var
} {0 88}
test string-6.132.$noComp {string is unicode} {
    run {string is unicode \U10FFFD\uD7FF\uE000\uFDCF\uFDF0}
} 1
test string-6.133.$noComp {string is unicode, upper surrogate} {
    run {string is unicode \uD800}
} 0
test string-6.134.$noComp {string is unicode, lower surrogate} {
    run {string is unicode \uDFFF}
} 0
test string-6.135.$noComp {string is unicode, noncharacter} {
    run {string is unicode \uFFFE}
} 0
test string-6.136.$noComp {string is unicode, noncharacter} {
    run {string is unicode \uFFFF}
} 0
test string-6.137.$noComp {string is unicode, noncharacter} {
    run {string is unicode \uFDD0}
} 0
test string-6.138.$noComp {string is unicode, noncharacter} {
    run {string is unicode \uFDEF}
} 0
test string-6.139.$noComp {string is integer, bug [76ad7aeba3]} {
    run {string is integer 18446744073709551615}
} 1
test string-6.140.$noComp {string is integer, bug [76ad7aeba3]} {
    run {string is integer -18446744073709551615}
} 1







|
|

|
|

|
|

|
|

|
|

|
|

|
|







970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
} {0 87}
test string-6.130.1.$noComp {string is entier, false on bad octal} {
    list [run {string is entier -fail var 0o1234561123412345612345656234561234561234561234561234561234561234561234561234561234536963}] $var
} {0 87}
test string-6.131.$noComp {string is entier, false on bad hex} {
    list [run {string is entier -fail var 0X12345611234123456123456562345612345612345612345612345612345612345612345612345612345345XYZ}] $var
} {0 88}
test string-6.132.$noComp {string is transferable} {
    run {string is transferable \U10FFFD\uD7FF\uE000\uFDCF\uFDF0}
} 1
test string-6.133.$noComp {string is transferable, upper surrogate} {
    run {string is transferable \uD800}
} 0
test string-6.134.$noComp {string is transferable, lower surrogate} {
    run {string is transferable \uDFFF}
} 0
test string-6.135.$noComp {string is transferable, noncharacter} {
    run {string is transferable \uFFFE}
} 0
test string-6.136.$noComp {string is transferable, noncharacter} {
    run {string is transferable \uFFFF}
} 0
test string-6.137.$noComp {string is transferable, noncharacter} {
    run {string is transferable \uFDD0}
} 0
test string-6.138.$noComp {string is transferable, noncharacter} {
    run {string is transferable \uFDEF}
} 0
test string-6.139.$noComp {string is integer, bug [76ad7aeba3]} {
    run {string is integer 18446744073709551615}
} 1
test string-6.140.$noComp {string is integer, bug [76ad7aeba3]} {
    run {string is integer -18446744073709551615}
} 1