Index: doc/expr.n ================================================================== --- doc/expr.n +++ doc/expr.n @@ -15,11 +15,11 @@ .SH SYNOPSIS \fBexpr \fIarg \fR?\fIarg arg ...\fR? .BE .SH DESCRIPTION .PP -Concatenates \fIarg\fRs, separated by a space, into an expression, and evaluates +The \fIexpr\fR command concatenates \fIarg\fRs, separated by a space, into an expression, and evaluates that expression, returning its value. The operators permitted in an expression include a subset of the operators permitted in C expressions. For those operators common to both Tcl and C, Tcl applies the same meaning and precedence as the corresponding C operators. @@ -44,26 +44,10 @@ .SS OPERANDS .PP An expression consists of a combination of operands, operators, parentheses and commas, possibly with whitespace between any of these elements, which is ignored. -An integer operand may be specified in decimal (the normal case, the optional -first two characters are \fB0d\fR), binary -(the first two characters are \fB0b\fR), octal -(the first two characters are \fB0o\fR), or hexadecimal -(the first two characters are \fB0x\fR) form. For -compatibility with older Tcl releases, an operand that begins with \fB0\fR is -interpreted as an octal integer even if the second character is not \fBo\fR. -A floating-point number may be specified in any of several -common decimal formats, and may use the decimal point \fB.\fR, -\fBe\fR or \fBE\fR for scientific notation, and -the sign characters \fB+\fR and \fB\-\fR. The -following are all valid floating-point numbers: 2.1, 3., 6e4, 7.91e+16. -The strings \fBInf\fR -and \fBNaN\fR, in any combination of case, are also recognized as floating point -values. An operand that doesn't have a numeric interpretation must be quoted -with either braces or with double quotes. .PP An operand may be specified in any of the following ways: .IP [1] As a numeric value, either integer or floating-point. .IP [2] @@ -101,10 +85,53 @@ \fBexpr\fR 3.1 + $a \fI6.1\fR \fBexpr\fR 2 + "$a.$b" \fI5.6\fR \fBexpr\fR 4*[llength "6 2"] \fI8\fR \fBexpr\fR {{word one} < "word $a"} \fI0\fR .CE +.PP +\fBInteger value\fR +.PP +An integer operand may be specified in decimal (the normal case, the optional +first two characters are \fB0d\fR), binary +(the first two characters are \fB0b\fR), octal +(the first two characters are \fB0o\fR), or hexadecimal +(the first two characters are \fB0x\fR) form. For +compatibility with older Tcl releases, an operand that begins with \fB0\fR is +interpreted as an octal integer even if the second character is not \fBo\fR. +.PP +\fBFloating-point value\fR +.PP +A floating-point number may be specified in any of several +common decimal formats, and may use the decimal point \fB.\fR, +\fBe\fR or \fBE\fR for scientific notation, and +the sign characters \fB+\fR and \fB\-\fR. The +following are all valid floating-point numbers: 2.1, 3., 6e4, 7.91e+16. +The strings \fBInf\fR +and \fBNaN\fR, in any combination of case, are also recognized as floating point +values. An operand that doesn't have a numeric interpretation must be quoted +with either braces or with double quotes. +.PP +\fBBoolean value\fR +.PP +A boolean value may be represented by any of the values \fB0\fR, \fBfalse\fR, \fBno\fR, +or \fBoff\fR and any of the values \fB1\fR, \fBtrue\fR, \fByes\fR, or \fBon\fR. +.PP +\fBDigit Separator\fR +.PP +Digits in any numeric value may be separated with one or more underscore +characters, "\fB_\fR", to improve readability. These separators may only +appear between digits. The separator may not appear at the start of a +numeric value, between the leading 0 and radix specifier, or at the +end of a numeric value. Here are some examples: +.PP +.CS +.ta 9c +\fBexpr\fR 100_000_000 \fI100000000\fR +\fBexpr\fR 0xffff_ffff \fI4294967295\fR +\fBformat\fR 0x%x 0b1111_1110_1101_1011 \fI0xfedb\fR +.CE +.PP .SS OPERATORS .PP For operators having both a numeric mode and a string mode, the numeric mode is chosen when all operands have a numeric interpretation. The integer interpretation of an operand is preferred over the floating-point @@ -472,15 +499,15 @@ .CE .SH "SEE ALSO" array(n), for(n), if(n), mathfunc(n), mathop(n), namespace(n), proc(n), string(n), Tcl(n), while(n) .SH KEYWORDS -arithmetic, boolean, compare, expression, fuzzy comparison +arithmetic, boolean, compare, expression, fuzzy comparison, integer value .SH COPYRIGHT .nf Copyright \(co 1993 The Regents of the University of California. Copyright \(co 1994-2000 Sun Microsystems Incorporated. Copyright \(co 2005 by Kevin B. Kenny . All rights reserved. .fi '\" Local Variables: '\" mode: nroff '\" End: Index: doc/string.n ================================================================== --- doc/string.n +++ doc/string.n @@ -503,10 +503,10 @@ } .CE .SH "SEE ALSO" expr(n), list(n) .SH KEYWORDS -case conversion, compare, index, match, pattern, string, word, equal, +case conversion, compare, index, integer value, match, pattern, string, word, equal, ctype, character, reverse .\" Local Variables: .\" mode: nroff .\" End: Index: generic/tclInt.h ================================================================== --- generic/tclInt.h +++ generic/tclInt.h @@ -2701,10 +2701,12 @@ * prefixes. */ #define TCL_PARSE_NO_WHITESPACE 32 /* Reject leading/trailing whitespace. */ #define TCL_PARSE_BINARY_ONLY 64 /* Parse binary even without prefix. */ +#define TCL_PARSE_NO_UNDERSCORE 128 + /* Reject underscore digit separator */ /* *---------------------------------------------------------------------- * Type values TclGetNumberFromObj *---------------------------------------------------------------------- Index: generic/tclScan.c ================================================================== --- generic/tclScan.c +++ generic/tclScan.c @@ -900,11 +900,11 @@ Tcl_IncrRefCount(objPtr); if (width == 0) { width = ~0; } if (TCL_OK != TclParseNumber(NULL, objPtr, NULL, string, width, - &end, TCL_PARSE_INTEGER_ONLY | parseFlag)) { + &end, TCL_PARSE_INTEGER_ONLY | TCL_PARSE_NO_UNDERSCORE | parseFlag)) { Tcl_DecrRefCount(objPtr); if (width < 0) { if (*end == '\0') { underflow = 1; } @@ -1004,11 +1004,11 @@ Tcl_IncrRefCount(objPtr); if (width == 0) { width = ~0; } if (TCL_OK != TclParseNumber(NULL, objPtr, NULL, string, width, - &end, TCL_PARSE_DECIMAL_ONLY | TCL_PARSE_NO_WHITESPACE)) { + &end, TCL_PARSE_DECIMAL_ONLY | TCL_PARSE_NO_WHITESPACE | TCL_PARSE_NO_UNDERSCORE)) { Tcl_DecrRefCount(objPtr); if (width < 0) { if (*end == '\0') { underflow = 1; } Index: generic/tclStrToD.c ================================================================== --- generic/tclStrToD.c +++ generic/tclStrToD.c @@ -527,11 +527,13 @@ int status = TCL_OK; /* Status to return to caller. */ char d = 0; /* Last hexadecimal digit scanned; initialized * to avoid a compiler warning. */ int shift = 0; /* Amount to shift when accumulating binary */ int explicitOctal = 0; - + int under = 0; /* Flag trailing '_' as error if true once + * number is accepted. */ + #define ALL_BITS ((Tcl_WideUInt)-1) #define MOST_BITS (ALL_BITS >> 1) /* * Initialize bytes to start of the object's string rep if the caller @@ -635,11 +637,11 @@ acceptState = state; acceptPoint = p; acceptLen = len; if (c == 'x' || c == 'X') { - if (flags & (TCL_PARSE_OCTAL_ONLY|TCL_PARSE_BINARY_ONLY)) { + if (flags & (TCL_PARSE_OCTAL_ONLY|TCL_PARSE_BINARY_ONLY) || under) { goto endgame; } state = ZERO_X; break; } @@ -648,25 +650,31 @@ } if (flags & TCL_PARSE_SCAN_PREFIXES) { goto zeroo; } if (c == 'b' || c == 'B') { - if (flags & TCL_PARSE_OCTAL_ONLY) { + if ((flags & TCL_PARSE_OCTAL_ONLY) || under) { goto endgame; } state = ZERO_B; break; } if (flags & TCL_PARSE_BINARY_ONLY) { goto zerob; } if (c == 'o' || c == 'O') { + if (under) { + goto endgame; + } explicitOctal = 1; state = ZERO_O; break; } if (c == 'd' || c == 'D') { + if (under) { + goto endgame; + } state = ZERO_D; break; } #ifdef TCL_NO_DEPRECATED goto decimal; @@ -686,13 +694,15 @@ /* FALLTHROUGH */ case ZERO_O: zeroo: if (c == '0') { numTrailZeros++; + under = 0; state = OCTAL; break; } else if (c >= '1' && c <= '7') { + under = 0; if (objPtr != NULL) { shift = 3 * (numTrailZeros + 1); significandOverflow = AccumulateDecimalDigit( (unsigned)(c-'0'), numTrailZeros, &significandWide, &significandBig, @@ -731,10 +741,14 @@ numSigDigs = 1; } numTrailZeros = 0; state = OCTAL; break; + } else if (c == '_' && !(flags & TCL_PARSE_NO_UNDERSCORE)) { + /* Ignore numeric "white space" */ + under = 1; + break; } /* FALLTHROUGH */ case BAD_OCTAL: if (explicitOctal) { @@ -759,10 +773,11 @@ * still be floating point. */ if (c == '0') { numTrailZeros++; + under = 0; state = BAD_OCTAL; break; } else if (isdigit(UCHAR(c))) { if (objPtr != NULL) { significandOverflow = AccumulateDecimalDigit( @@ -774,16 +789,19 @@ numSigDigs += (numTrailZeros + 1); } else { numSigDigs = 1; } numTrailZeros = 0; + under = 0; state = BAD_OCTAL; break; } else if (c == '.') { + under = 0; state = FRACTION; break; } else if (c == 'E' || c == 'e') { + under = 0; state = EXPONENT_START; break; } #endif goto endgame; @@ -802,18 +820,26 @@ case ZERO_X: zerox: if (c == '0') { numTrailZeros++; + under = 0; state = HEXADECIMAL; break; } else if (isdigit(UCHAR(c))) { + under = 0; d = (c-'0'); } else if (c >= 'A' && c <= 'F') { + under = 0; d = (c-'A'+10); } else if (c >= 'a' && c <= 'f') { + under = 0; d = (c-'a'+10); + } else if (c == '_' && !(flags & TCL_PARSE_NO_UNDERSCORE)) { + /* Ignore numeric "white space" */ + under = 1; + break; } else { goto endgame; } if (objPtr != NULL) { shift = 4 * (numTrailZeros + 1); @@ -849,12 +875,17 @@ acceptLen = len; case ZERO_B: zerob: if (c == '0') { numTrailZeros++; + under = 0; state = BINARY; break; + } else if (c == '_' && !(flags & TCL_PARSE_NO_UNDERSCORE)) { + /* Ignore numeric "white space" */ + under = 1; + break; } else if (c != '1') { goto endgame; } if (objPtr != NULL) { shift = numTrailZeros + 1; @@ -884,14 +915,21 @@ state = BINARY; break; case ZERO_D: if (c == '0') { + under = 0; numTrailZeros++; } else if ( ! isdigit(UCHAR(c))) { + if (c == '_' && !(flags & TCL_PARSE_NO_UNDERSCORE)) { + /* Ignore numeric "white space" */ + under = 1; + break; + } goto endgame; } + under = 0; state = DECIMAL; flags |= TCL_PARSE_INTEGER_ONLY; /* FALLTHROUGH */ case DECIMAL: @@ -906,10 +944,11 @@ acceptState = state; acceptPoint = p; acceptLen = len; if (c == '0') { numTrailZeros++; + under = 0; state = DECIMAL; break; } else if (isdigit(UCHAR(c))) { if (objPtr != NULL) { significandOverflow = AccumulateDecimalDigit( @@ -917,18 +956,25 @@ &significandWide, &significandBig, significandOverflow); } numSigDigs += numTrailZeros+1; numTrailZeros = 0; + under = 0; state = DECIMAL; break; + } else if (c == '_' && !(flags & TCL_PARSE_NO_UNDERSCORE)) { + /* Ignore numeric "white space" */ + under = 1; + break; } else if (flags & TCL_PARSE_INTEGER_ONLY) { goto endgame; } else if (c == '.') { + under = 0; state = FRACTION; break; } else if (c == 'E' || c == 'e') { + under = 0; state = EXPONENT_START; break; } goto endgame; @@ -950,10 +996,11 @@ case LEADING_RADIX_POINT: if (c == '0') { numDigitsAfterDp++; numTrailZeros++; + under = 0; state = FRACTION; break; } else if (isdigit(UCHAR(c))) { numDigitsAfterDp++; if (objPtr != NULL) { @@ -966,12 +1013,17 @@ numSigDigs += numTrailZeros+1; } else { numSigDigs = 1; } numTrailZeros = 0; + under = 0; state = FRACTION; break; + } else if (c == '_' && !(flags & TCL_PARSE_NO_UNDERSCORE)) { + /* Ignore numeric "white space" */ + under = 1; + break; } goto endgame; case EXPONENT_START: /* @@ -979,14 +1031,16 @@ * character follows before using the C library strtol routine, * which allows whitespace. */ if (c == '+') { + under = 0; state = EXPONENT_SIGNUM; break; } else if (c == '-') { exponentSignum = 1; + under = 0; state = EXPONENT_SIGNUM; break; } /* FALLTHROUGH */ @@ -996,12 +1050,17 @@ * character. */ if (isdigit(UCHAR(c))) { exponent = c - '0'; + under = 0; state = EXPONENT; break; + } else if (c == '_' && !(flags & TCL_PARSE_NO_UNDERSCORE)) { + /* Ignore numeric "white space" */ + under = 1; + break; } goto endgame; case EXPONENT: /* @@ -1016,12 +1075,17 @@ if (exponent < (LONG_MAX - 9) / 10) { exponent = 10 * exponent + (c - '0'); } else { exponent = LONG_MAX; } + under = 0; state = EXPONENT; break; + } else if (c == '_' && !(flags & TCL_PARSE_NO_UNDERSCORE)) { + /* Ignore numeric "white space" */ + under = 1; + break; } goto endgame; /* * Parse out INFINITY by simply spelling it out. INF is accepted @@ -1028,49 +1092,56 @@ * as an abbreviation; other prefices are not. */ case sI: if (c == 'n' || c == 'N') { + under = 0; state = sIN; break; } goto endgame; case sIN: if (c == 'f' || c == 'F') { + under = 0; state = sINF; break; } goto endgame; case sINF: acceptState = state; acceptPoint = p; acceptLen = len; + under = 0; if (c == 'i' || c == 'I') { state = sINFI; break; } goto endgame; case sINFI: if (c == 'n' || c == 'N') { + under = 0; state = sINFIN; break; } goto endgame; case sINFIN: if (c == 'i' || c == 'I') { + under = 0; state = sINFINI; break; } goto endgame; case sINFINI: if (c == 't' || c == 'T') { + under = 0; state = sINFINIT; break; } goto endgame; case sINFINIT: if (c == 'y' || c == 'Y') { + under = 0; state = sINFINITY; break; } goto endgame; @@ -1078,25 +1149,28 @@ * Parse NaN's. */ #ifdef IEEE_FLOATING_POINT case sN: if (c == 'a' || c == 'A') { + under = 0; state = sNA; break; } goto endgame; case sNA: if (c == 'n' || c == 'N') { + under = 0; state = sNAN; break; } goto endgame; case sNAN: acceptState = state; acceptPoint = p; acceptLen = len; if (c == '(') { + under = 0; state = sNANPAREN; break; } goto endgame; @@ -1103,16 +1177,18 @@ /* * Parse NaN(hexdigits) */ case sNANHEX: if (c == ')') { + under = 0; state = sNANFINISH; break; } /* FALLTHROUGH */ case sNANPAREN: if (TclIsSpaceProc(c)) { + under = 0; break; } if (numSigDigs < 13) { if (c >= '0' && c <= '9') { d = c - '0'; @@ -1123,10 +1199,11 @@ } else { goto endgame; } numSigDigs++; significandWide = (significandWide << 4) + d; + under = 0; state = sNANHEX; break; } goto endgame; case sNANFINISH: @@ -1135,10 +1212,11 @@ case sINFINITY: acceptState = state; acceptPoint = p; acceptLen = len; goto endgame; + } p++; len--; } @@ -1152,15 +1230,18 @@ if (endPtrPtr != NULL) { *endPtrPtr = p; } } else { /* - * Back up to the last accepting state in the lexer. + * Back up to the last accepting state in the lexer. + * If the last char seen is the numeric whitespace character '_', + * backup to that. */ - p = acceptPoint; - len = acceptLen; + p = under ? acceptPoint-1 : acceptPoint; + len = under ? acceptLen-1 : acceptLen; + if (!(flags & TCL_PARSE_NO_WHITESPACE)) { /* * Accept trailing whitespace. */ Index: tests/get.test ================================================================== --- tests/get.test +++ tests/get.test @@ -107,10 +107,16 @@ lmap x {0 0.0 " .0" ".0 " " 0e0 " "07" "- 0" "-0" "0o12" "0b10"} { catch {testdoubleobj set 1 $x} x set x } } {0.0 0.0 0.0 0.0 0.0 7.0 {expected floating-point number but got "- 0"} 0.0 10.0 2.0} +test get-3.5 {tcl_GetInt with numeric whitespace (i.e. '_')} testgetint { + lmap x {0_0 " 1_0" "0_2 " " 3_3 " 14__23__32___4 " 0x_a " " 0_07 " " 0o_1_0 " " 0_b1_0 " _33 42_ 0_x15 0_o17 0_d19 } { + catch {testgetint $x} x + set x + } +} {0 10 2 33 1423324 10 7 8 {expected integer but got " 0_b1_0 "} {expected integer but got "_33"} {expected integer but got "42_"} {expected integer but got "0_x15"} {expected integer but got "0_o17"} {expected integer but got "0_d19"}} # cleanup ::tcltest::cleanupTests return Index: tests/scan.test ================================================================== --- tests/scan.test +++ tests/scan.test @@ -553,10 +553,15 @@ set a {}; } -body { list [scan "207698809136909011942886895" \ %llu a] $a } -result {1 207698809136909011942886895} +test scan-5.20 {ignore digit separators} -setup { + set a {}; set b {}; set c {}; +} -body { + list [scan "10_23_45" %d_%d_%d a b c] $a $b $c +} -result {3 10 23 45} test scan-6.1 {floating-point scanning} -setup { set a {}; set b {}; set c {}; set d {} } -body { list [scan "2.1 -3.0e8 .99962 a" "%f%g%e%f" a b c d] $a $b $c $d @@ -598,10 +603,15 @@ test scan-6.8 {floating-point scanning} -setup { set a {}; set b {}; set c {}; set d {} } -body { list [scan "4.6 5.2" "%f %f %f %f" a b c d] $a $b $c $d } -result {2 4.6 5.2 {} {}} +test scan-6.8 {disallow diget separator in floating-point} -setup { + set a {}; set b {}; set c {}; +} -body { + list [scan "3.14_2.35_98.6" %f_%f_%f a b c ] $a $b $c +} -result {3 3.14 2.35 98.6} test scan-7.1 {string and character scanning} -setup { set a {}; set b {}; set c {}; set d {} } -body { list [scan "abc defghijk dum " "%s %3s %20s %s" a b c d] $a $b $c $d