Tcl Source Code

Check-in [1916b6a72e]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Merge core-8-branch. Also, use a different value for TCL_STUB_MAGIC when TCL_UTF_MAX>4.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | tip-389
Files: files | file ages | folders
SHA3-256: 1916b6a72ea524e888050e35295c93f7853e5733c6c4694fc6c5fb796423de74
User & Date: jan.nijtmans 2017-11-29 11:49:49.911
Context
2017-12-01
11:33
merge core-8-branch check-in: dabd924a87 user: jan.nijtmans tags: tip-389
2017-11-29
11:49
Merge core-8-branch. Also, use a different value for TCL_STUB_MAGIC when TCL_UTF_MAX>4. check-in: 1916b6a72e user: jan.nijtmans tags: tip-389
11:05
merge core-8-6-branch check-in: 8976a447aa user: jan.nijtmans tags: core-8-branch
09:49
Fix [8e1e31eac0]: lsort treats NUL chars strangely check-in: e2a6110884 user: jan.nijtmans tags: tip-389
Changes
Unified Diff Ignore Whitespace Patch
Changes to doc/ToUpper.3.
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
\fBTcl_UtfToLower\fR(\fIstr\fR)
.sp
int
\fBTcl_UtfToTitle\fR(\fIstr\fR)
.SH ARGUMENTS
.AS char *str in/out
.AP int ch in
The character to be converted.
.AP char *str in/out
Pointer to UTF-8 string to be converted in place.
.BE

.SH DESCRIPTION
.PP
The first three routines convert the case of individual Unicode characters:







|







29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
\fBTcl_UtfToLower\fR(\fIstr\fR)
.sp
int
\fBTcl_UtfToTitle\fR(\fIstr\fR)
.SH ARGUMENTS
.AS char *str in/out
.AP int ch in
The Unicode character to be converted.
.AP char *str in/out
Pointer to UTF-8 string to be converted in place.
.BE

.SH DESCRIPTION
.PP
The first three routines convert the case of individual Unicode characters:
Changes to doc/UniCharIsAlpha.3.
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
.AS int ch
.AP int ch in
The character to be examined.
.BE

.SH DESCRIPTION
.PP
All of the routines described examine characters and return a
boolean value. A non-zero return value means that the character does
belong to the character class associated with the called routine. The
rest of this document just describes the character classes associated
with the various routines.

.SH "CHARACTER CLASSES"
.PP







|







49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
.AS int ch
.AP int ch in
The character to be examined.
.BE

.SH DESCRIPTION
.PP
All of the routines described examine Unicode characters and return a
boolean value. A non-zero return value means that the character does
belong to the character class associated with the called routine. The
rest of this document just describes the character classes associated
with the various routines.

.SH "CHARACTER CLASSES"
.PP
Changes to doc/Utf.3.
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
\fBTcl_UtfBackslash\fR(\fIsrc, readPtr, dst\fR)
.SH ARGUMENTS
.AS "const Tcl_UniChar" *uniPattern in/out
.AP char *buf out
Buffer in which the UTF-8 representation of the Tcl_UniChar is stored.  At most
\fBTCL_UTF_MAX\fR bytes are stored in the buffer.
.AP int ch in
The character to be converted or examined.
.AP Tcl_UniChar *chPtr out
Filled with the Tcl_UniChar represented by the head of the UTF-8 string.
.AP "const char" *src in
Pointer to a UTF-8 string.
.AP "const char" *cs in
Pointer to a UTF-8 string.
.AP "const char" *ct in







|







73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
\fBTcl_UtfBackslash\fR(\fIsrc, readPtr, dst\fR)
.SH ARGUMENTS
.AS "const Tcl_UniChar" *uniPattern in/out
.AP char *buf out
Buffer in which the UTF-8 representation of the Tcl_UniChar is stored.  At most
\fBTCL_UTF_MAX\fR bytes are stored in the buffer.
.AP int ch in
The Unicode character to be converted or examined.
.AP Tcl_UniChar *chPtr out
Filled with the Tcl_UniChar represented by the head of the UTF-8 string.
.AP "const char" *src in
Pointer to a UTF-8 string.
.AP "const char" *cs in
Pointer to a UTF-8 string.
.AP "const char" *ct in
Changes to generic/tcl.h.
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404

typedef int (Tcl_NRPostProc) (ClientData data[], Tcl_Interp *interp,
				int result);

/*
 *----------------------------------------------------------------------------
 * The following constant is used to test for older versions of Tcl in the
 * stubs tables.
 */

#define TCL_STUB_MAGIC		((int) 0xFCA3BACF)

/*
 * The following function is required to be defined in all stubs aware
 * extensions. The function is actually implemented in the stub library, not
 * the main Tcl library, although there is a trivial implementation in the
 * main library in case an extension is statically linked into an application.
 */







|


|







2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404

typedef int (Tcl_NRPostProc) (ClientData data[], Tcl_Interp *interp,
				int result);

/*
 *----------------------------------------------------------------------------
 * The following constant is used to test for older versions of Tcl in the
 * stubs tables. If TCL_UTF_MAX>4 use a different value.
 */

#define TCL_STUB_MAGIC		((int) 0xFCA3BACF + (TCL_UTF_MAX>4))

/*
 * The following function is required to be defined in all stubs aware
 * extensions. The function is actually implemented in the stub library, not
 * the main Tcl library, although there is a trivial implementation in the
 * main library in case an extension is statically linked into an application.
 */
Changes to generic/tclUtf.c.
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575

    while (1) {
	len = TclUtfToUniChar(src, &find);
	fullchar = find;
#if TCL_UTF_MAX == 4
	if (!len) {
	    len += TclUtfToUniChar(src, &find);
	    fullchar = (((fullchar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000;
	}
#endif
	if (find == fullchar) {
	    return src;
	}
	if (*src == '\0') {
	    return NULL;
	}
	src += len;
    }







|


|







558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575

    while (1) {
	len = TclUtfToUniChar(src, &find);
	fullchar = find;
#if TCL_UTF_MAX == 4
	if (!len) {
	    len += TclUtfToUniChar(src, &find);
	    fullchar = (((fullchar & 0x3ff) << 10) | (find & 0x3ff)) + 0x10000;
	}
#endif
	if (fullchar == ch) {
	    return src;
	}
	if (*src == '\0') {
	    return NULL;
	}
	src += len;
    }
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
    last = NULL;
    while (1) {
	len = TclUtfToUniChar(src, &find);
	fullchar = find;
#if TCL_UTF_MAX == 4
	if (!len) {
	    len += TclUtfToUniChar(src, &find);
	    fullchar = (((fullchar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000;
	}
#endif
	if (find == fullchar) {
	    last = src;
	}
	if (*src == '\0') {
	    break;
	}
	src += len;
    }







|


|







606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
    last = NULL;
    while (1) {
	len = TclUtfToUniChar(src, &find);
	fullchar = find;
#if TCL_UTF_MAX == 4
	if (!len) {
	    len += TclUtfToUniChar(src, &find);
	    fullchar = (((fullchar & 0x3ff) << 10) | (find & 0x3ff)) + 0x10000;
	}
#endif
	if (fullchar == ch) {
	    last = src;
	}
	if (*src == '\0') {
	    break;
	}
	src += len;
    }
726
727
728
729
730
731
732




733
734


735
736

737








738
739
740
741
742
743
744
745

int
Tcl_UniCharAtIndex(
    register const char *src,	/* The UTF-8 string to dereference. */
    register int index)		/* The position of the desired character. */
{
    Tcl_UniChar ch = 0;





    while (index >= 0) {


	index--;
	src += TclUtfToUniChar(src, &ch);

    }








    return ch;
}

/*
 *---------------------------------------------------------------------------
 *
 * Tcl_UtfAtIndex --
 *







>
>
>
>

|
>
>
|

>

>
>
>
>
>
>
>
>
|







726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760

int
Tcl_UniCharAtIndex(
    register const char *src,	/* The UTF-8 string to dereference. */
    register int index)		/* The position of the desired character. */
{
    Tcl_UniChar ch = 0;
    int fullchar = 0;
#if TCL_UTF_MAX == 4
	int len = 1;
#endif

    while (index-- >= 0) {
#if TCL_UTF_MAX == 4
	src += (len = TclUtfToUniChar(src, &ch));
#else
	src += TclUtfToUniChar(src, &ch);
#endif
    }
    fullchar = ch;
#if TCL_UTF_MAX == 4
     if (!len) {
	/* If last Tcl_UniChar was an upper surrogate, combine with lower surrogate */
	(void)TclUtfToUniChar(src, &ch);
	fullchar = (((fullchar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000;
    }
#endif
    return fullchar;
}

/*
 *---------------------------------------------------------------------------
 *
 * Tcl_UtfAtIndex --
 *
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
const char *
Tcl_UtfAtIndex(
    register const char *src,	/* The UTF-8 string. */
    register int index)		/* The position of the desired character. */
{
    Tcl_UniChar ch = 0;

    while (index > 0) {
	index--;
	src += TclUtfToUniChar(src, &ch);
    }
    return src;
}

/*
 *---------------------------------------------------------------------------







|
<







773
774
775
776
777
778
779
780

781
782
783
784
785
786
787
const char *
Tcl_UtfAtIndex(
    register const char *src,	/* The UTF-8 string. */
    register int index)		/* The position of the desired character. */
{
    Tcl_UniChar ch = 0;

    while (index-- > 0) {

	src += TclUtfToUniChar(src, &ch);
    }
    return src;
}

/*
 *---------------------------------------------------------------------------