Index: generic/tcl.h ================================================================== --- generic/tcl.h +++ generic/tcl.h @@ -1973,15 +1973,11 @@ * necessary. */ #define TCL_ENCODING_PROFILE_TCL8 0x01000000 #define TCL_ENCODING_PROFILE_STRICT 0x02000000 #define TCL_ENCODING_PROFILE_REPLACE 0x03000000 -#if TCL_MAJOR_VERSION < 9 -#define TCL_ENCODING_PROFILE_DEFAULT TCL_ENCODING_PROFILE_TCL8 -#else -#define TCL_ENCODING_PROFILE_DEFAULT TCL_ENCODING_PROFILE_TCL8 -#endif +#define TCL_ENCODING_PROFILE_DEFAULT TCL_ENCODING_PROFILE_STRICT /* * The following definitions are the error codes returned by the conversion * routines: * Index: generic/tclCmdAH.c ================================================================== --- generic/tclCmdAH.c +++ generic/tclCmdAH.c @@ -433,11 +433,11 @@ static const char *const options[] = {"-profile", "-failindex", NULL}; enum convertfromOptions { PROFILE, FAILINDEX } optIndex; Tcl_Encoding encoding; Tcl_Obj *dataObj; Tcl_Obj *failVarObj; - int profile = TCL_ENCODING_PROFILE_TCL8; + int profile = TCL_ENCODING_PROFILE_STRICT; /* * Possible combinations: * 1) data -> objc = 2 * 2) ?options? encoding data -> objc >= 3 Index: generic/tclEncoding.c ================================================================== --- generic/tclEncoding.c +++ generic/tclEncoding.c @@ -1152,11 +1152,11 @@ * encoding-specific string length. */ Tcl_DString *dstPtr) /* Uninitialized or free DString in which the * converted string is stored. */ { Tcl_ExternalToUtfDStringEx( - NULL, encoding, src, srcLen, TCL_ENCODING_PROFILE_TCL8, dstPtr, NULL); + NULL, encoding, src, srcLen, TCL_ENCODING_PROFILE_STRICT, dstPtr, NULL); return Tcl_DStringValue(dstPtr); } /* Index: generic/tclIO.c ================================================================== --- generic/tclIO.c +++ generic/tclIO.c @@ -1662,10 +1662,14 @@ tmp = (char *)Tcl_Alloc(7); tmp[0] = '\0'; } statePtr->channelName = tmp; statePtr->flags = mask; + /* uncomment this to make default encoding error handling strict */ + /* + statePtr->flags |= CHANNEL_ENCODING_STRICT; + */ statePtr->maxPerms = mask; /* Save max privileges for close callback */ /* * Set the channel to system default encoding. * @@ -1679,15 +1683,15 @@ name = Tcl_GetEncodingName(NULL); statePtr->encoding = Tcl_GetEncoding(NULL, name); statePtr->inputEncodingState = NULL; statePtr->inputEncodingFlags = TCL_ENCODING_START; ENCODING_PROFILE_SET(statePtr->inputEncodingFlags, - TCL_ENCODING_PROFILE_TCL8); + TCL_ENCODING_PROFILE_DEFAULT); statePtr->outputEncodingState = NULL; statePtr->outputEncodingFlags = TCL_ENCODING_START; ENCODING_PROFILE_SET(statePtr->outputEncodingFlags, - TCL_ENCODING_PROFILE_TCL8); + TCL_ENCODING_PROFILE_DEFAULT); /* * Set the channel up initially in AUTO input translation mode to accept * "\n", "\r" and "\r\n". Output translation mode is set to a platform * specific default value. The eofChar is set to 0 for both input and @@ -5929,10 +5933,11 @@ if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR)) { ResetFlag(statePtr, CHANNEL_EOF|CHANNEL_ENCODING_ERROR); /* TODO: UpdateInterest not needed here? */ UpdateInterest(chanPtr); + Tcl_SetErrno(EILSEQ); return -1; } /* * Early out when next read will see eofchar. @@ -6029,10 +6034,11 @@ */ if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR) && !GotFlag(statePtr, CHANNEL_STICKY_EOF) && (!GotFlag(statePtr, CHANNEL_NONBLOCKING))) { + copied = -1; goto finish; } } if (copiedNow < 0) { @@ -6108,10 +6114,13 @@ ResetFlag(statePtr, CHANNEL_EOF|CHANNEL_ENCODING_ERROR); Tcl_SetErrno(EILSEQ); copied = -1; } TclChannelRelease((Tcl_Channel)chanPtr); + if (copied == TCL_INDEX_NONE) { + ResetFlag(statePtr, CHANNEL_ENCODING_ERROR|CHANNEL_EOF); + } return copied; } /* *--------------------------------------------------------------------------- Index: generic/tclIOCmd.c ================================================================== --- generic/tclIOCmd.c +++ generic/tclIOCmd.c @@ -281,11 +281,11 @@ Tcl_Obj *const objv[]) /* Argument objects. */ { Tcl_Channel chan; /* The channel to read from. */ Tcl_Size lineLen; /* Length of line just read. */ int mode; /* Mode in which channel is opened. */ - Tcl_Obj *linePtr, *chanObjPtr; + Tcl_Obj *linePtr, *chanObjPtr, *resultDictPtr, *returnOptsPtr; int code = TCL_OK; if ((objc != 2) && (objc != 3)) { Tcl_WrongNumArgs(interp, 1, objv, "channelId ?varName?"); return TCL_ERROR; @@ -304,11 +304,10 @@ TclChannelPreserve(chan); TclNewObj(linePtr); lineLen = Tcl_GetsObj(chan, linePtr); if (lineLen == TCL_IO_FAILURE) { if (!Tcl_Eof(chan) && !Tcl_InputBlocked(chan)) { - Tcl_DecrRefCount(linePtr); /* * TIP #219. * Capture error messages put by the driver into the bypass area * and put them into the regular interpreter result. Fall back to @@ -318,11 +317,18 @@ if (!TclChanCaughtErrorBypass(interp, chan)) { Tcl_SetObjResult(interp, Tcl_ObjPrintf( "error reading \"%s\": %s", TclGetString(chanObjPtr), Tcl_PosixError(interp))); } + resultDictPtr = Tcl_NewDictObj(); + Tcl_DictObjPut(NULL, resultDictPtr, Tcl_NewStringObj("read", -1) + , linePtr); + returnOptsPtr = Tcl_NewDictObj(); + Tcl_DictObjPut(NULL, returnOptsPtr, Tcl_NewStringObj("-result", -1) + , resultDictPtr); code = TCL_ERROR; + Tcl_SetReturnOptions(interp, returnOptsPtr); goto done; } lineLen = TCL_IO_FAILURE; } if (objc == 3) { @@ -369,11 +375,11 @@ Tcl_Channel chan; /* The channel to read from. */ int newline, i; /* Discard newline at end? */ Tcl_WideInt toRead; /* How many bytes to read? */ Tcl_Size charactersRead; /* How many characters were read? */ int mode; /* Mode in which channel is opened. */ - Tcl_Obj *resultPtr, *chanObjPtr; + Tcl_Obj *resultPtr, *resultDictPtr, *returnOptsPtr, *chanObjPtr; if ((objc != 2) && (objc != 3)) { Interp *iPtr; argerror: @@ -431,11 +437,10 @@ TclNewObj(resultPtr); TclChannelPreserve(chan); charactersRead = Tcl_ReadChars(chan, resultPtr, toRead, 0); if (charactersRead == TCL_IO_FAILURE) { - Tcl_DecrRefCount(resultPtr); /* * TIP #219. * Capture error messages put by the driver into the bypass area and * put them into the regular interpreter result. Fall back to the * regular message if nothing was found in the bypass. @@ -444,11 +449,18 @@ if (!TclChanCaughtErrorBypass(interp, chan)) { Tcl_SetObjResult(interp, Tcl_ObjPrintf( "error reading \"%s\": %s", TclGetString(chanObjPtr), Tcl_PosixError(interp))); } + resultDictPtr = Tcl_NewDictObj(); + Tcl_DictObjPut(NULL, resultDictPtr, Tcl_NewStringObj("read", -1) + , resultPtr); + returnOptsPtr = Tcl_NewDictObj(); + Tcl_DictObjPut(NULL, returnOptsPtr, Tcl_NewStringObj("-result", -1) + , resultDictPtr); TclChannelRelease(chan); + Tcl_SetReturnOptions(interp, returnOptsPtr); return TCL_ERROR; } /* * If requested, remove the last newline in the channel if at EOF. Index: generic/tclIOUtil.c ================================================================== --- generic/tclIOUtil.c +++ generic/tclIOUtil.c @@ -1731,10 +1731,15 @@ } if (Tcl_SetChannelOption(interp, chan, "-encoding", encodingName) != TCL_OK) { Tcl_CloseEx(interp,chan,0); return result; + } + if (Tcl_SetChannelOption(interp, chan, "-profile", "strict") + != TCL_OK) { + Tcl_CloseEx(interp,chan,0); + return result; } TclNewObj(objPtr); Tcl_IncrRefCount(objPtr); Index: library/http/http.tcl ================================================================== --- library/http/http.tcl +++ library/http/http.tcl @@ -1744,13 +1744,10 @@ if {$delay > 3000} { Log socket delay $delay - token $token } fconfigure $sock -translation {auto crlf} \ -buffersize $state(-blocksize) - if {[package vsatisfies [package provide Tcl] 9.0-]} { - fconfigure $sock -profile tcl8 - } ##Log socket opened, DONE fconfigure - token $token } Log "Using $sock for $state(socketinfo) - token $token" \ [expr {$state(-keepalive)?"keepalive":""}] @@ -2165,13 +2162,10 @@ # Initialisation to {auto *} now done in geturl, KeepSocket and DoneRequest. # We are concerned here with the request (write) not the response (read). lassign [fconfigure $sock -translation] trRead trWrite fconfigure $sock -translation [list $trRead crlf] \ -buffersize $state(-blocksize) - if {[package vsatisfies [package provide Tcl] 9.0-]} { - fconfigure $sock -profile tcl8 - } # The following is disallowed in safe interpreters, but the socket is # already in non-blocking mode in that case. catch {fconfigure $sock -blocking off} @@ -2558,13 +2552,10 @@ #Log ---- $state(socketinfo) >> conn to $token for HTTP response lassign [fconfigure $sock -translation] trRead trWrite fconfigure $sock -translation [list auto $trWrite] \ -buffersize $state(-blocksize) - if {[package vsatisfies [package provide Tcl] 9.0-]} { - fconfigure $sock -profile tcl8 - } Log ^D$tk begin receiving response - token $token coroutine ${token}--EventCoroutine http::Event $sock $token if {[info exists state(-handler)] || [info exists state(-progress)]} { fileevent $sock readable [list http::EventGateway $sock $token] @@ -4552,15 +4543,11 @@ # IANA charset. However, we only know how to convert what we have # encodings for. set enc [CharsetToEncoding $state(charset)] if {$enc ne "binary"} { - if {[package vsatisfies [package provide Tcl] 9.0-]} { - set state(body) [encoding convertfrom -profile tcl8 $enc $state(body)] - } else { - set state(body) [encoding convertfrom $enc $state(body)] - } + set state(body) [encoding convertfrom $enc $state(body)] } # Translate text line endings. set state(body) [string map {\r\n \n \r \n} $state(body)] } @@ -4639,15 +4626,11 @@ } set enc [CharsetToEncoding $res] if {$enc eq "binary"} { return 0 } - if {[package vsatisfies [package provide Tcl] 9.0-]} { - set state(body) [encoding convertfrom -profile tcl8 $enc $state(body)] - } else { - set state(body) [encoding convertfrom $enc $state(body)] - } + set state(body) [encoding convertfrom $enc $state(body)] set state(body) [string map {\r\n \n \r \n} $state(body)] set state(type) application/xml set state(binary) 0 set state(charset) $res return 1 @@ -4724,15 +4707,11 @@ # The spec says: "non-alphanumeric characters are replaced by '%HH'". Use # a pre-computed map and [string map] to do the conversion (much faster # than [regsub]/[subst]). [Bug 1020491] - if {[package vsatisfies [package provide Tcl] 9.0-]} { - set string [encoding convertto -profile tcl8 $http(-urlencoding) $string] - } else { - set string [encoding convertto $http(-urlencoding) $string] - } + set string [encoding convertto $http(-urlencoding) $string] return [string map $formMap $string] } # http::ProxyRequired -- # Default proxy filter. Index: library/tcltest/tcltest.tcl ================================================================== --- library/tcltest/tcltest.tcl +++ library/tcltest/tcltest.tcl @@ -398,11 +398,11 @@ set outputChannel $filename } default { set outputChannel [open $filename a] if {[package vsatisfies [package provide Tcl] 8.7-]} { - fconfigure $outputChannel -profile tcl8 -encoding utf-8 + fconfigure $outputChannel -encoding utf-8 } set ChannelsWeOpened($outputChannel) 1 # If we created the file in [temporaryDirectory], then # [cleanupTests] will delete it, unless we claim it was @@ -445,11 +445,11 @@ set errorChannel $filename } default { set errorChannel [open $filename a] if {[package vsatisfies [package provide Tcl] 8.7-]} { - fconfigure $errorChannel -profile tcl8 -encoding utf-8 + fconfigure $errorChannel -encoding utf-8 } set ChannelsWeOpened($errorChannel) 1 # If we created the file in [temporaryDirectory], then # [cleanupTests] will delete it, unless we claim it was @@ -790,11 +790,11 @@ proc ReadLoadScript {args} { variable Option if {$Option(-loadfile) eq {}} {return} set tmp [open $Option(-loadfile) r] if {[package vsatisfies [package provide Tcl] 8.7-]} { - fconfigure $tmp -profile tcl8 -encoding utf-8 + fconfigure $tmp -encoding utf-8 } loadScript [read $tmp] close $tmp } Option -loadfile {} { @@ -1131,10 +1131,11 @@ if {[catch {testConstraint $n2 [eval [ConstraintInitializer $n2]]}]} { testConstraint $n2 0 } } } + # tcltest::Asciify -- # # Transforms the passed string to contain only printable ascii characters. # Useful for printing to terminals. Non-printables are mapped to @@ -1369,11 +1370,11 @@ ConstraintInitializer stdio { set code 0 if {![catch {set f [open "|[list [interpreter]]" w]}]} { if {[package vsatisfies [package provide Tcl] 8.7-]} { - fconfigure $f -profile tcl8 -encoding utf-8 + fconfigure $f -encoding utf-8 } if {![catch {puts $f exit}]} { if {![catch {close $f}]} { set code 1 } @@ -2219,11 +2220,11 @@ } else { set testFile [file normalize [uplevel 1 {info script}]] if {[file readable $testFile]} { set testFd [open $testFile r] if {[package vsatisfies [package provide Tcl] 8.7-]} { - fconfigure $testFd -profile tcl8 -encoding utf-8 + fconfigure $testFd -encoding utf-8 } set testLine [expr {[lsearch -regexp \ [split [read $testFd] "\n"] \ "^\[ \t\]*test [string map {. \\.} $name] "] + 1}] close $testFd @@ -2250,15 +2251,11 @@ } if {$processTest && $scriptFailure} { if {$scriptCompare} { puts [outputChannel] "---- Error testing result: $scriptMatch" } else { - if {[catch { - puts [outputChannel] "---- Result was:\n[Asciify $actualAnswer]" - } errMsg]} { - puts [outputChannel] "\n---- Result was:\n" - } + puts [outputChannel] "---- Result was:\n[Asciify $actualAnswer]" puts [outputChannel] "---- Result should have been\ ($match matching):\n[Asciify $result]" } } if {$errorCodeFailure} { @@ -2934,11 +2931,11 @@ set cmd [linsert $childargv 0 | $shell $file] if {[catch { incr numTestFiles set pipeFd [open $cmd "r"] if {[package vsatisfies [package provide Tcl] 8.7-]} { - fconfigure $pipeFd -profile tcl8 -encoding utf-8 + fconfigure $pipeFd -encoding utf-8 } while {[gets $pipeFd line] >= 0} { if {[regexp [join { {^([^:]+):\t} {Total\t([0-9]+)\t} @@ -3134,11 +3131,11 @@ putting ``$contents'' into $fullName" set fd [open $fullName w] fconfigure $fd -translation lf if {[package vsatisfies [package provide Tcl] 8.7-]} { - fconfigure $fd -profile tcl8 -encoding utf-8 + fconfigure $fd -encoding utf-8 } if {[string index $contents end] eq "\n"} { puts -nonewline $fd $contents } else { puts $fd $contents @@ -3285,11 +3282,11 @@ set directory [temporaryDirectory] } set fullName [file join $directory $name] set f [open $fullName] if {[package vsatisfies [package provide Tcl] 8.7-]} { - fconfigure $f -profile tcl8 -encoding utf-8 + fconfigure $f -encoding utf-8 } set data [read -nonewline $f] close $f return $data } Index: tests/cmdAH.test ================================================================== --- tests/cmdAH.test +++ tests/cmdAH.test @@ -332,11 +332,10 @@ encoding system iso8859-1 encoding system } -cleanup { encoding system $system } -result iso8859-1 - # # encoding convertfrom 4.3.* # Odd number of args is always invalid since last two args # are ENCODING DATA and all options take a value Index: tests/encoding.test ================================================================== --- tests/encoding.test +++ tests/encoding.test @@ -777,13 +777,16 @@ close $f removeFile iso2022.tcl list $count [viewable $line] } [list 3 "乎乞也 (\\u4E4E\\u4E5E\\u4E5F)"] -test encoding-24.4 {Parse valid or invalid utf-8} { - string length [encoding convertfrom utf-8 "\xC0\x80"] -} 1 +test {encoding-24.4 utf-8 invalid strict} {Parse invalid utf-8, strict} -body { + string length [encoding convertfrom -profile strict utf-8 "\xC0\x80"] +} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC0'} +test {encoding-24.4 utf-8 invalid tcl8} {UtfToUtfProc utf-8} { + encoding convertfrom -profile tcl8 utf-8 \xC0\x80 +} \x00 test encoding-24.5 {Parse valid or invalid utf-8} { string length [encoding convertfrom -profile tcl8 utf-8 "\xC0\x81"] } 2 test encoding-24.6 {Parse valid or invalid utf-8} { string length [encoding convertfrom -profile tcl8 utf-8 "\xC1\xBF"] @@ -810,10 +813,13 @@ encoding convertfrom -profile strict utf-8 "\xC1\xBF" } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC1'} test encoding-24.14 {Parse valid utf-8} { expr {[encoding convertfrom utf-8 "\xC2\x80"] eq "\u80"} } 1 +test encoding-24.15.default {Parse invalid utf-8, default} -body { + encoding convertfrom -profile strict utf-8 "Z\xE0\x80" +} -returnCodes 1 -result "unexpected byte sequence starting at index 1: '\\xE0'" test encoding-24.15.strict {Parse invalid utf-8, -profile strict} -body { encoding convertfrom -profile strict utf-8 "Z\xE0\x80" } -returnCodes 1 -result "unexpected byte sequence starting at index 1: '\\xE0'" test encoding-24.15.tcl8 {Parse invalid utf-8, -profile tcl8} -body { encoding convertfrom -profile tcl8 utf-8 "Z\xE0\x80" @@ -830,10 +836,13 @@ test encoding-24.19.1 {Parse valid or invalid utf-8} -body { encoding convertto -profile tcl8 utf-8 "ZX\uD800" } -result ZX\xED\xA0\x80 test encoding-24.19.2 {Parse valid or invalid utf-8} -body { encoding convertto -profile strict utf-8 "ZX\uD800" +} -returnCodes 1 -match glob -result "unexpected character at index 2: 'U+00D800'" +test encoding-24.19.3 {Parse valid or invalid utf-8} -body { + encoding convertto utf-8 "ZX\uD800" } -returnCodes 1 -match glob -result "unexpected character at index 2: 'U+00D800'" test encoding-24.20 {Parse with -profile tcl8 but without providing encoding} -body { encoding convertfrom -profile tcl8 "\x20" } -result {wrong # args: should be "::tcl::encoding::convertfrom ?-profile profile? ?-failindex var? encoding data" or "::tcl::encoding::convertfrom data"} -returnCodes error test encoding-24.21 {Parse with -profile tcl8 but without providing encoding} -body { @@ -867,11 +876,11 @@ encoding convertfrom -profile strict utf-8 \xEF\xBF\xBF } -result \uFFFF test encoding-24.31 {Parse invalid utf-8 with -profile tcl8} -body { encoding convertfrom -profile tcl8 utf-8 \xEF\xBF\xBF } -result \uFFFF -test encoding-24.32 {Try to generate invalid utf-8} -body { +test encoding-24.33 {Try to generate invalid utf-8} -body { encoding convertto utf-8 \uFFFF } -result \xEF\xBF\xBF test encoding-24.33 {Try to generate invalid utf-8} -body { encoding convertto -profile strict utf-8 \uFFFF } -result \xEF\xBF\xBF Index: tests/encodingVectors.tcl ================================================================== --- tests/encodingVectors.tcl +++ tests/encodingVectors.tcl @@ -8,11 +8,11 @@ # vectors. # # List of defined encoding profiles set encProfiles {tcl8 strict replace} -set encDefaultProfile tcl8; # Should reflect the default from implementation +set encDefaultProfile strict; # Should reflect the default from implementation # encValidStrings - Table of valid strings. # # Each row is # The pair should be unique for generated test ids to be unique. Index: tests/io.test ================================================================== --- tests/io.test +++ tests/io.test @@ -1612,10 +1612,50 @@ fconfigure $f -encoding utf-8 -buffersize 10 set in [read $f] close $f scan [string index $in end] %c } 160 + + +apply [list {} { + set template { + test {io-12.9 @variant@} {ReadChars: multibyte chars split, default (strict)} -body { + set res {} + set f [open $path(test1) w] + fconfigure $f -translation binary + puts -nonewline $f [string repeat a 9]\xC2 + close $f + set f [open $path(test1)] + fconfigure $f -encoding utf-8 @strict@ -buffersize 10 + set status [catch {read $f} cres copts] + set in [dict get $copts -result] + lappend res $in + lappend res $status $cres + set status [catch {read $f} cres copts] + set in [dict get $copts -result] + lappend res $in + lappend res $status $cres + set res + } -cleanup { + catch {close $f} + } -match glob -result {{read aaaaaaaaa} 1 {error reading "file*":\ + invalid or incomplete multibyte or wide character}\ + {read {}} 1 {error reading "file*":\ + invalid or incomplete multibyte or wide character}} + } + + # if default encoding is not currently to strict + # foreach variant {default encodingstrict} strict {{} {-encodingstrict 1}} + foreach variant {{profile strict}} strict {{-profile strict}} { + set script [string map [ + list @variant@ $variant @strict@ $strict] $template] + uplevel 1 $script + } +} [namespace current]] + + + test {io-12.9 profile tcl8} {ReadChars: multibyte chars split} -body { set f [open $path(test1) w] fconfigure $f -translation binary puts -nonewline $f [string repeat a 9]\xC2 close $f @@ -1625,10 +1665,11 @@ read $f scan [string index $in end] %c } -cleanup { catch {close $f} } -result 194 + test {io-12.10 strict} {ReadChars: multibyte chars split} -body { set f [open $path(test1) w] fconfigure $f -translation binary puts -nonewline $f [string repeat a 9]\xC2 close $f @@ -5877,11 +5918,11 @@ lappend l [fconfigure $sock -eofchar] [fconfigure $sock -translation] close $sock set l } {{} auto} test io-39.24 {Tcl_SetChannelOption, server socket is not readable or - writable so we can't change -eofchar or -translation } { + writable so we can't change -eofchar or -translation } { set l [list] set sock [socket -server [namespace code accept] -myaddr 127.0.0.1 0] fconfigure $sock -eofchar D -translation lf lappend l [fconfigure $sock -eofchar] [fconfigure $sock -translation] close $sock @@ -9278,11 +9319,33 @@ } -cleanup { close $f removeFile io-75.5 } -result 4181 -test io-75.6 {invalid utf-8 encoding, gets is not ignored (-profile strict)} -setup { +test io-75.6.read {invalid utf-8 encoding, read is not ignored (-encodingstrict 1)} -setup { + set fn [makeFile {} io-75.6] + set f [open $fn w+] + fconfigure $f -encoding binary + # \x81 is invalid in utf-8 + puts -nonewline $f A\x81 + flush $f + seek $f 0 + fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf \ + -profile strict +} -body { + set status [catch {read $f} cres copts] + set d [dict get $copts -result read] + binary scan $d H* hd + lappend hd $status $cres +} -cleanup { + close $f + removeFile io-75.6 +} -match glob -result {41 1 {error reading "file*":\ + invalid or incomplete multibyte or wide character}} + + +test io-75.6.gets {invalid utf-8 encoding, gets is not ignored (-profile strict)} -setup { set fn [makeFile {} io-75.6] set f [open $fn w+] fconfigure $f -encoding binary # \x81 is an incomplete byte sequence in utf-8 puts -nonewline $f A\x81 @@ -9296,11 +9359,11 @@ close $f removeFile io-75.6 } -match glob -returnCodes 1 -result {error reading "file*":\ invalid or incomplete multibyte or wide character} -test io-75.7 { +test io-75.7.gets { invalid utf-8 encoding gets is not ignored (-profile strict) } -setup { set fn [makeFile {} io-75.7] set f [open $fn w+] fconfigure $f -encoding binary @@ -9316,33 +9379,95 @@ close $f removeFile io-75.7 } -match glob -result {1 {error reading "file*":\ invalid or incomplete multibyte or wide character}} -test io-75.8 {invalid utf-8 encoding eof handling (-profile strict)} -setup { +test io-75.7.read {invalid utf-8 encoding eof handling (-profile strict)} -setup { + set fn [makeFile {} io-75.7] + set f [open $fn w+] + fconfigure $f -encoding binary + # \xA1 is invalid in utf-8. -eofchar is not detected, because it comes later. + puts -nonewline $f A\xA1\x1A + flush $f + seek $f 0 + fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A \ + -translation lf -profile strict +} -body { + set status [catch {read $f} cres copts] + set d [dict get $copts -result read] + binary scan $d H* hd + lappend hd [eof $f] + lappend hd $status + lappend hd $cres + fconfigure $f -encoding iso8859-1 + lappend hd [read $f];# We changed encoding, so now we can read the \xA1 + close $f + set hd +} -cleanup { + removeFile io-75.7 +} -match glob -result {41 0 1 {error reading "file*":\ + invalid or incomplete multibyte or wide character} ¡} + +test io-75.8.incomplete { + incomplete uft-8 char after eof char is not an error (-profile strict) +} -setup { + set hd {} set fn [makeFile {} io-75.8] set f [open $fn w+] fconfigure $f -encoding binary - # \x81 is invalid in utf-8, but since \x1A comes first, -eofchar takes - # precedence. + # \x81 is invalid in utf-8, but since the eof character \x1A comes first, + # -eofchar takes precedence. puts -nonewline $f A\x1A\x81 flush $f seek $f 0 fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A \ -translation lf -profile strict } -body { set d [read $f] binary scan $d H* hd lappend hd [eof $f] + # there should be no error on additional reads lappend hd [read $f] close $f set hd } -cleanup { removeFile io-75.8 } -result {41 1 {}} -test io-75.8.eoflater {invalid utf-8 encoding eof handling (-profile strict)} -setup { + +test {io-75.8 {invalid after eof}} { + invalid utf-8 after eof char is not an error (-profile strict) +} -setup { + set res {} + set fn [makeFile {} io-75.8] + set f [open $fn w+] + fconfigure $f -encoding binary + # \xc0\x80 is invalid utf-8 data, but because the eof character \x1A + # appears first, it's not an error. + puts -nonewline $f A\x1a\xc0\x80 + flush $f + seek $f 0 + fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A \ + -translation lf -profile strict +} -body { + set d [read $f] + foreach char [split $d {}] { + lappend res [format %x [scan $char %c]] + } + lappend res [eof $f] + # there should be no error on additional reads + lappend res [read $f] + close $f + set res +} -cleanup { + removeFile io-75.8 +} -result {41 1 {}} + + +test {io-75.8 {invalid before eof}} { + invalid utf-8 encoding eof handling (-profile strict) +} -setup { set fn [makeFile {} io-75.8] set f [open $fn w+] # This also configures the channel encoding profile as strict. fconfigure $f -encoding binary # \x81 is invalid in utf-8. -eofchar is not detected, because it comes later. @@ -9383,10 +9508,11 @@ } -cleanup { close $chan unset res } -match glob -result {1 {error reading "*":\ invalid or incomplete multibyte or wide character}} + test io-75.9 {unrepresentable character write passes and is replaced by ?} -setup { set fn [makeFile {} io-75.9] set f [open $fn w+] fconfigure $f -encoding iso8859-1 -profile strict @@ -9399,11 +9525,46 @@ close $f removeFile io-75.9 } -match glob -result [list {A} {error writing "*":\ invalid or incomplete multibyte or wide character}] -test io-75.10 { +apply [list {} { + set template { + test {io-75.10 ${mode}} { + incomplete multibyte encoding read is an error + } -setup { + set res {} + set fn [makeFile {} io-75.10] + set f [open $fn w+] + fconfigure $f -encoding binary + puts -nonewline $f A\xC0 + flush $f + seek $f 0 + fconfigure $f -encoding utf-8 -buffering none {*}${option} + } -body { + set status [catch {read $f} cres copts] + set d [dict get $copts -result read] + close $f + binary scan $d H* hd + lappend res $hd + lappend res $status + lappend res $cres + return $res + } -cleanup { + removeFile io-75.10 + } -match glob -result {41 1 {error reading "file*":\ + invalid or incomplete multibyte or wide character}} + } + # the default encoding mode is not currently strict + #foreach mode {default strict} option {{} {-encodingstrict 1}} + foreach mode {{profile strict}} option {{-profile strict}} { + set test [string map [ + list {${mode}} [list $mode] {${option}} [list $option]] $template] + uplevel $test + } +} [namespace current]] +test {io-75.10 {profile tcl8}} { incomplete multibyte encoding read is not ignored because "binary" sets profile to strict } -setup { set res {} set fn [makeFile {} io-75.10] @@ -9453,22 +9614,59 @@ close $f removeFile io-75.11 } -match glob -result {41 1 {error reading "file*":\ invalid or incomplete multibyte or wide character}} -test io-75.12 { - invalid utf-8 encoding read is not ignored because setting the encoding to - "binary" also set the profile to strict + +apply [list {} { + set template { + test {io-75.12 ${mode}} {invalid utf-8 encoding read returns an error} -setup { + set res {} + set fn [makeFile {} io-75.12] + set f [open $fn w+] + fconfigure $f -encoding binary + puts -nonewline $f A\x81 + flush $f + seek $f 0 + fconfigure $f -encoding utf-8 -buffering none -eofchar {} \ + -translation lf {*}${option} + } -body { + set status [catch {read $f} cres copts] + set d [dict get $copts -result read] + close $f + binary scan $d H* hd + lappend res $hd $status $cres + return $res + } -cleanup { + removeFile io-75.12 + } -match glob -result {41 1 {error reading "file*":\ + invalid or incomplete multibyte or wide character}} + } + + # the default encoding mod is not currently strict + #foreach mode {default strict} option {{} {-encodingstrict 1}} + foreach mode {{profile strict}} option {{-profile strict}} { + set test [string map [ + list {${mode}} [list $mode] {${option}} [list $option]] $template] + uplevel $test + } +} [namespace current]] + + +test {io-75.12 {profile tcl8}} { + invalid utf-8 encoding read, is not ignored because setting the encoding to + "binary" also sets the profile to strict } -setup { set res {} set fn [makeFile {} io-75.12] set f [open $fn w+] fconfigure $f -encoding binary puts -nonewline $f A\x81 flush $f seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar {} -translation lf + fconfigure $f -encoding utf-8 -buffering none -eofchar {} \ + -translation lf } -body { catch {read $f} errmsg lappend res $errmsg chan configure $f -profile tcl8 seek $f 0 @@ -9481,10 +9679,33 @@ removeFile io-75.12 unset res } -match glob -result {{error reading "file*":\ invalid or incomplete multibyte or wide character} 4181} test io-75.13 { + In blocking mode [read] produces an error and leaves the data succesfully + read so far in the return options dictionary. +} -setup { + set fn [makeFile {} io-75.13] + set f [open $fn w+] + fconfigure $f -encoding binary + # \x81 is invalid in utf-8 + puts -nonewline $f A\x81 + flush $f + seek $f 0 + fconfigure $f -encoding utf-8 -eofchar "" -translation lf -profile strict +} -body { + set status [catch {read $f} cres copts] + set d [dict get $copts -result read] + binary scan $d H* hd + lappend hd $status + lappend hd $cres +} -cleanup { + close $f + removeFile io-75.13 +} -match glob -result {41 1 {error reading "file*":\ + invalid or incomplete multibyte or wide character}} +test io-75.13.nonblocking { In nonblocking mode when there is an encoding error the data that has been successfully read so far is returned first and then the error is returned on the next call to [read]. } -setup { set fn [makeFile {} io-75.13] @@ -9561,10 +9782,65 @@ return $res } -cleanup { close $chan } -match glob -result {hello AB 1 {error reading "*": invalid or incomplete multibyte or wide character}\ 1 {error reading "*": invalid or incomplete multibyte or wide character} 43 44 c0 40 EF GHI} + +test io-75.14 {invalid utf-8 encoding [gets] coninues in non-strict mode after error} -setup { + set res {} + set fn [makeFile {} io-75.14] + set f [open $fn w+] + fconfigure $f -encoding binary + # \xc0 is invalid in utf-8 + puts -nonewline $f a\nb\xc0\nc\n + flush $f + seek $f 0 + fconfigure $f -encoding utf-8 -buffering none -eofchar {} -translation lf -profile strict +} -body { + lappend res [gets $f] + set status [catch {gets $f} cres copts] + lappend res $status $cres + chan configure $f -profile tcl8 + lappend res [gets $f] + lappend res [gets $f] + close $f + return $res +} -cleanup { + removeFile io-75.14 +} -match glob -result {a 1 {error reading "file*":\ + invalid or incomplete multibyte or wide character} bÀ c} + + +test io-75.15 {invalid utf-8 encoding strict gets should not hang} -setup { + set res {} + set fn [makeFile {} io-75.15] + set chan [open $fn w+] + fconfigure $chan -encoding binary + # This is not valid UTF-8 + puts $chan hello\nAB\xc0\x40CD\nEFG + close $chan +} -body { + #Now try to read it with [gets] + set chan [open $fn] + fconfigure $chan -encoding utf-8 -profile strict + lappend res [gets $chan] + set status [catch {gets $chan} cres copts] + lappend res $status $cres + set status [catch {gets $chan} cres copts] + lappend res $status $cres + lappend res [dict get $copts -result] + chan configure $chan -encoding binary + foreach char [split [read $chan 2] {}] { + lappend res [format %x [scan $char %c]] + } + return $res +} -cleanup { + close $chan + removeFile io-75.15 +} -match glob -result {hello 1 {error reading "file*":\ + invalid or incomplete multibyte or wide character} 1 {error reading "file*":\ + invalid or incomplete multibyte or wide character} {read AB} 41 42} # ### ### ### ######### ######### ######### Index: tests/ioCmd.test ================================================================== --- tests/ioCmd.test +++ tests/ioCmd.test @@ -1395,11 +1395,11 @@ rename foo {} set res } -result {{-blocking 1 -buffering full -buffersize 4096 -encoding * -eofchar {} -profile * -translation {auto *}}} test iocmd-25.2 {chan configure, cgetall, no options} -match glob -body { set res {} - proc foo args {oninit cget cgetall; onfinal; track; return ""} + proc foo args {oninit cget cgetall; onfinal; track; return {}} set c [chan create {r w} foo] note [fconfigure $c] close $c rename foo {} set res