Tcl Source Code

Changes On Branch tip-318-update
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Changes In Branch tip-318-update Excluding Merge-Ins

This is equivalent to a diff from 4d417791c9 to e01dd5837a

2012-11-05
14:42
TIP #413 check-in: 1763fa9dc9 user: jan.nijtmans tags: trunk, potential incompatibility
2012-10-23
22:07
unbreak Mac OSX build check-in: 38faf636a9 user: jan.nijtmans tags: trunk
21:52
merge trunk Closed-Leaf check-in: e01dd5837a user: jan.nijtmans tags: tip-318-update
21:50
Add "flags" parameter from Tcl_LoadFile to to various internal functions, so these flags are availab... check-in: 4d417791c9 user: jan.nijtmans tags: trunk
21:45
Remove unused TclpLoadFile function. check-in: fa1b4b9d54 user: jan.nijtmans tags: core-8-5-branch
2012-10-18
17:38
* generic/tclBasic.c (TclNRCoroutineObjCmd): insure that numlevels are properly set, fix bug discove... check-in: ad2663e692 user: mig tags: trunk
2012-10-16
14:25
Remove two characters, zero width non-joiner (U+200c) and zero width joiner (U+200d), which were fin... check-in: d02f58c491 user: jan.nijtmans tags: tip-318-update

Changes to doc/string.n.

145
146
147
148
149
150
151
152


153
154
155
156
157
158
159
145
146
147
148
149
150
151

152
153
154
155
156
157
158
159
160







-
+
+







.IP \fBlower\fR 12
Any Unicode lower case alphabet character.
.IP \fBprint\fR 12
Any Unicode printing character, including space.
.IP \fBpunct\fR 12
Any Unicode punctuation character.
.IP \fBspace\fR 12
Any Unicode space character.
Any Unicode whitespace character, zero width space (U+200b),
word joiner (U+2060) and zero width no-break space (U+feff) (=BOM).
.IP \fBtrue\fR 12
Any of the forms allowed to \fBTcl_GetBoolean\fR where the value is
true.
.IP \fBupper\fR 12
Any upper case alphabet character in the Unicode character set.
.IP \fBwideinteger\fR 12
Any of the valid forms for a wide integer in Tcl, with optional
331
332
333
334
335
336
337
338
339


340
341
342
343
344
345
346


347
348
349
350
351
352
353


354
355
356
357
358
359
360
332
333
334
335
336
337
338


339
340
341
342
343
344
345


346
347
348
349
350
351
352


353
354
355
356
357
358
359
360
361







-
-
+
+





-
-
+
+





-
-
+
+







the string to stop at (inclusive).  \fIfirst\fR and \fIlast\fR may be
specified using the forms described in \fBSTRING INDICES\fR.
.TP
\fBstring trim \fIstring\fR ?\fIchars\fR?
.
Returns a value equal to \fIstring\fR except that any leading or
trailing characters present in the string given by \fIchars\fR are removed.  If
\fIchars\fR is not specified then white space is removed (spaces,
tabs, newlines, and carriage returns).
\fIchars\fR is not specified then white space is removed (any character
for which \fBstring is space\fR returns 1, and "\0").
.TP
\fBstring trimleft \fIstring\fR ?\fIchars\fR?
.
Returns a value equal to \fIstring\fR except that any leading
characters present in the string given by \fIchars\fR are removed.  If
\fIchars\fR is not specified then white space is removed (spaces,
tabs, newlines, and carriage returns).
\fIchars\fR is not specified then white space is removed (any character
for which \fBstring is space\fR returns 1, and "\0").
.TP
\fBstring trimright \fIstring\fR ?\fIchars\fR?
.
Returns a value equal to \fIstring\fR except that any trailing
characters present in the string given by \fIchars\fR are removed.  If
\fIchars\fR is not specified then white space is removed (spaces,
tabs, newlines, and carriage returns).
\fIchars\fR is not specified then white space is removed (any character
for which \fBstring is space\fR returns 1, and "\0").
.TP
\fBstring wordend \fIstring charIndex\fR
.
Returns the index of the character just after the last one in the word
containing character \fIcharIndex\fR of \fIstring\fR.  \fIcharIndex\fR
may be specified using the forms in \fBSTRING INDICES\fR.  A word is
considered to be any contiguous range of alphanumeric (Unicode letters

Changes to generic/regc_locale.c.

350
351
352
353
354
355
356
357

358
359
360
361
362
363


364
365
366
367
368
369
370
350
351
352
353
354
355
356

357
358
359
360
361
362

363
364
365
366
367
368
369
370
371







-
+





-
+
+







#define NUM_PUNCT_CHAR (sizeof(punctCharTable)/sizeof(chr))

/*
 * Unicode: white space characters.
 */

static const crange spaceRangeTable[] = {
    {0x9, 0xd}, {0x2000, 0x200a}
    {0x9, 0xd}, {0x2000, 0x200b}
};

#define NUM_SPACE_RANGE (sizeof(spaceRangeTable)/sizeof(crange))

static const chr spaceCharTable[] = {
    0x20, 0xa0, 0x1680, 0x180e, 0x2028, 0x2029, 0x202f, 0x205f, 0x3000
    0x20, 0x85, 0xa0, 0x1680, 0x180e, 0x2028, 0x2029, 0x202f, 0x205f,
    0x2060, 0x3000, 0xfeff
};

#define NUM_SPACE_CHAR (sizeof(spaceCharTable)/sizeof(chr))

/*
 * Unicode: lowercase characters.
 */

Changes to generic/tclCmdMZ.c.

30
31
32
33
34
35
36
37

38
39
40
41
42


























43
44
45
46
47
48
49
30
31
32
33
34
35
36

37


38
39

40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72







-
+
-
-


-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







static int		TryPostHandler(ClientData data[], Tcl_Interp *interp,
			    int result);
static int		UniCharIsAscii(int character);
static int		UniCharIsHexDigit(int character);

/*
 * Default set of characters to trim in [string trim] and friends. This is a
 * UTF-8 literal string containing space, tab, newline, carriage return,
 * UTF-8 literal string containing all Unicode space characters [TIP #413]
 * ethiopic wordspace (U+1361), ogham space mark (U+1680), and ideographic
 * space (U+3000). [TIP #318]
 */

#define DEFAULT_TRIM_SET " \t\n\r\xe1\x8d\xa1\xe1\x9a\x80\xe3\x80\x80"
#define DEFAULT_TRIM_SET \
	"\x09\x0a\x0b\x0c\x0d " /* ASCII */\
	"\xc0\x80" /*     nul (U+0000) */\
	"\xc2\x85" /*     next line (U+0085) */\
	"\xc2\xa0" /*     non-breaking space (U+00a0) */\
	"\xe1\x9a\x80" /* ogham space mark (U+1680) */ \
	"\xe1\xa0\x8e" /* mongolian vowel separator (U+180e) */\
	"\xe2\x80\x80" /* en quad (U+2000) */\
	"\xe2\x80\x81" /* em quad (U+2001) */\
	"\xe2\x80\x82" /* en space (U+2002) */\
	"\xe2\x80\x83" /* em space (U+2003) */\
	"\xe2\x80\x84" /* three-per-em space (U+2004) */\
	"\xe2\x80\x85" /* four-per-em space (U+2005) */\
	"\xe2\x80\x86" /* six-per-em space (U+2006) */\
	"\xe2\x80\x87" /* figure space (U+2007) */\
	"\xe2\x80\x88" /* punctuation space (U+2008) */\
	"\xe2\x80\x89" /* thin space (U+2009) */\
	"\xe2\x80\x8a" /* hair space (U+200a) */\
	"\xe2\x80\x8b" /* zero width space (U+200b) */\
	"\xe2\x80\xa8" /* line separator (U+2028) */\
	"\xe2\x80\xa9" /* paragraph separator (U+2029) */\
	"\xe2\x80\xaf" /* narrow no-break space (U+202f) */\
	"\xe2\x81\x9f" /* medium mathematical space (U+205f) */\
	"\xe2\x81\xa0" /* word joiner (U+2060) */\
	"\xe3\x80\x80" /* ideographic space (U+3000) */\
	"\xef\xbb\xbf" /* zero width no-break space (U+feff) */

/*
 *----------------------------------------------------------------------
 *
 * Tcl_PwdObjCmd --
 *
 *	This procedure is invoked to process the "pwd" Tcl command. See the

Changes to generic/tclUtf.c.

1512
1513
1514
1515
1516
1517
1518



1519
1520
1521
1522
1523
1524
1525
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528







+
+
+







    /*
     * If the character is within the first 127 characters, just use the
     * standard C function, otherwise consult the Unicode table.
     */

    if (((Tcl_UniChar) ch) < ((Tcl_UniChar) 0x80)) {
	return isspace(UCHAR(ch)); /* INTL: ISO space */
    } else if ((Tcl_UniChar) ch == 0x0085 || (Tcl_UniChar) ch == 0x200b
	    || (Tcl_UniChar) ch == 0x2060 || (Tcl_UniChar) ch == 0xfeff) {
	return 1;
    } else {
	return ((SPACE_BITS >> GetCategory(ch)) & 1);
    }
}

/*
 *----------------------------------------------------------------------

Changes to tests/string.test.

1480
1481
1482
1483
1484
1485
1486
1487
1488


1489
1490
1491
1492
1493
1494
1495
1496
1497
1498


1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517


1518
1519
1520
1521
1522
1523
1524
1480
1481
1482
1483
1484
1485
1486


1487
1488
1489
1490
1491
1492
1493
1494
1495
1496


1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515


1516
1517
1518
1519
1520
1521
1522
1523
1524







-
-
+
+








-
-
+
+

















-
-
+
+







test string-18.10 {string trim} {
    string trim ABC DEF
} {ABC}
test string-18.11 {string trim, unicode} {
    string trim "\xe7\xe8 AB\xe7C \xe8\xe7" \xe7\xe8
} " AB\xe7C "
test string-18.12 {string trim, unicode default} {
    string trim ABC\u1361\u1680\u3000
} ABC
    string trim \ufeff\x00\u0085\u00a0\u1680\u180eABC\u1361\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u200b\u2028\u2029\u202f\u205f\u3000
} ABC\u1361

test string-19.1 {string trimleft} {
    list [catch {string trimleft} msg] $msg
} {1 {wrong # args: should be "string trimleft string ?chars?"}}
test string-19.2 {string trimleft} {
    string trimleft "    XYZ      "
} {XYZ      }
test string-19.3 {string trimleft, unicode default} {
    string trimleft \u1361\u1680\u3000ABC
} ABC
    string trimleft \ufeff\u0085\u00a0\x00\u1680\u180e\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u200b\u2028\u2029\u202f\u205f\u3000\u1361ABC
} \u1361ABC

test string-20.1 {string trimright errors} {
    list [catch {string trimright} msg] $msg
} {1 {wrong # args: should be "string trimright string ?chars?"}}
test string-20.2 {string trimright errors} {
    list [catch {string trimg a} msg] $msg
} {1 {unknown or ambiguous subcommand "trimg": must be bytelength, compare, equal, first, index, is, last, length, map, match, range, repeat, replace, reverse, tolower, totitle, toupper, trim, trimleft, trimright, wordend, or wordstart}}
test string-20.3 {string trimright} {
    string trimright "    XYZ      "
} {    XYZ}
test string-20.4 {string trimright} {
    string trimright "   "
} {}
test string-20.5 {string trimright} {
    string trimright ""
} {}
test string-20.6 {string trimright, unicode default} {
    string trimright ABC\u1361\u1680\u3000
} ABC
    string trimright ABC\u1361\u0085\x00\u00a0\u1680\u180e\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u200b\u2028\u2029\u202f\u205f\u3000
} ABC\u1361

test string-21.1 {string wordend} {
    list [catch {string wordend a} msg] $msg
} {1 {wrong # args: should be "string wordend string index"}}
test string-21.2 {string wordend} {
    list [catch {string wordend a b c} msg] $msg
} {1 {wrong # args: should be "string wordend string index"}}