Index: doc/read.n ================================================================== --- doc/read.n +++ doc/read.n @@ -59,17 +59,19 @@ An encoding error is reported by the POSIX error code \fBEILSEQ\fR. .PP In blocking mode, the error is directly thrown, even, if there is a leading decodable data portion. The file pointer is advanced just before the encoding error. -An eventual well decoded data chunk before the encoding error is lost. -It is proposed to return this portion within the additional key \fB-data\fR -in the error dictionary. +An eventual well decoded data chunk before the encoding error is returned +in the error option dictionary key \fB-data\fR. +The value of the key contains the empty string, if the error arises at the +first data position. .PP In non blocking mode, first, any data without encoding error is returned (without error state). In the next call, no data is returned and the \fBEILSEQ\fR error state is set. +The key \fB-data\fR is not present. .PP Here is an example with an encoding error in UTF-8 encoding, which is then introspected by a switch to the binary encoding. The test file contains a not continued multi-byte sequence at position 1 (\fBA \\xC3 B\fR): .PP @@ -85,11 +87,11 @@ file35a65a0 % fconfigure $f -encoding utf-8 -profile strict -blocking 1 % catch {read $f} e d 1 % set d --code 1 -level 0 +-data A -code 1 -level 0 -errorstack {INNER {invokeStk1 read file35a65a0}} -errorcode {POSIX EILSEQ {invalid or incomplete multibyte or wide character}} -errorinfo {...} -errorline 1 % tell $f 1 @@ -96,10 +98,15 @@ % fconfigure $f -encoding binary -profile strict % read $f ÃB % close $f .CE +The already decoded data "A" is returned in the error options dictionary key +\fB-data\fR. +The file position is advanced on the encoding error position 1. +The data at the error position is thus recovered by the next \fBread\fR command. +.PP Non blocking example . .CS % set f [open test_A_195_B.txt r] file35a65a0 Index: generic/tclIO.c ================================================================== --- generic/tclIO.c +++ generic/tclIO.c @@ -4932,11 +4932,11 @@ * "&& !GotFlag(statePtr, CHANNEL_NONBLOCKING)" * In case of an encoding error with leading correct bytes, we pass here * two times, as gs.bytesWrote is not 0 on the first pass. This feels * once to much, as the data is anyway not used. */ - + /* Set eol to the position that caused the encoding error, and then * continue to gotEOL, which stores the data that was decoded * without error to objPtr. This allows the caller to do something * useful with the data decoded so far, and also results in the * position of the file being the first byte that was not @@ -7614,10 +7614,37 @@ return 0; } return GotFlag(statePtr, CHANNEL_EOF) ? 1 : 0; } + +/* + *---------------------------------------------------------------------- + * + * TclChannelGetBlockingMode -- + * + * Returns 1 if the channel is in blocking mode (default), 0 otherwise. + * + * Results: + * 1 or 0, always. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +int +TclChannelGetBlockingMode( + Tcl_Channel chan) +{ + ChannelState *statePtr = ((Channel *) chan)->state; + /* State of real channel structure. */ + + return GotFlag(statePtr, CHANNEL_NONBLOCKING) ? 0 : 1; +} + /* *---------------------------------------------------------------------- * * Tcl_InputBlocked -- * Index: generic/tclIOCmd.c ================================================================== --- generic/tclIOCmd.c +++ generic/tclIOCmd.c @@ -457,11 +457,16 @@ TclNewObj(resultPtr); TclChannelPreserve(chan); charactersRead = Tcl_ReadChars(chan, resultPtr, toRead, 0); if (charactersRead == TCL_IO_FAILURE) { - Tcl_DecrRefCount(resultPtr); + Tcl_Obj *returnOptsPtr = NULL; + if (TclChannelGetBlockingMode(chan)) { + returnOptsPtr = Tcl_NewDictObj(); + Tcl_DictObjPut(NULL, returnOptsPtr, Tcl_NewStringObj("-data", -1), + resultPtr); + } /* * TIP #219. * Capture error messages put by the driver into the bypass area and * put them into the regular interpreter result. Fall back to the * regular message if nothing was found in the bypass. @@ -471,10 +476,13 @@ Tcl_SetObjResult(interp, Tcl_ObjPrintf( "error reading \"%s\": %s", TclGetString(chanObjPtr), Tcl_PosixError(interp))); } TclChannelRelease(chan); + if (returnOptsPtr) { + Tcl_SetReturnOptions(interp, returnOptsPtr); + } return TCL_ERROR; } /* * If requested, remove the last newline in the channel if at EOF. Index: generic/tclInt.h ================================================================== --- generic/tclInt.h +++ generic/tclInt.h @@ -3110,10 +3110,11 @@ Tcl_Size strLen, const unsigned char *pattern, Tcl_Size ptnLen, int flags); MODULE_SCOPE double TclCeil(const void *a); MODULE_SCOPE void TclChannelPreserve(Tcl_Channel chan); MODULE_SCOPE void TclChannelRelease(Tcl_Channel chan); +MODULE_SCOPE int TclChannelGetBlockingMode(Tcl_Channel chan); MODULE_SCOPE int TclCheckArrayTraces(Tcl_Interp *interp, Var *varPtr, Var *arrayPtr, Tcl_Obj *name, int index); MODULE_SCOPE int TclCheckBadOctal(Tcl_Interp *interp, const char *value); MODULE_SCOPE int TclCheckEmptyString(Tcl_Obj *objPtr); Index: tests/io.test ================================================================== --- tests/io.test +++ tests/io.test @@ -9300,17 +9300,17 @@ flush $f seek $f 0 fconfigure $f -encoding utf-8 -buffering none -eofchar {} -translation lf \ -profile strict } -body { - list [catch {read $f} msg] $msg + list [catch {read $f} msg data] $msg [dict get $data -data] } -cleanup { close $f removeFile io-75.7 - unset msg f fn + unset msg data f fn } -match glob -result {1 {error reading "file*":\ - invalid or incomplete multibyte or wide character}} + invalid or incomplete multibyte or wide character} A} test io-75.8 {invalid utf-8 encoding eof first handling (-profile strict)} -setup { set fn [makeFile {} io-75.8] set f [open $fn w+] fconfigure $f -encoding binary @@ -9324,14 +9324,15 @@ } -body { set d [read $f] binary scan $d H* hd lappend hd [eof $f] lappend hd [read $f] - close $f set hd } -cleanup { + close $f removeFile io-75.8 + unset f d hd } -result {41 1 {}} test io-75.8.eoflater {invalid utf-8 encoding eof after handling (-profile strict)} -setup { set fn [makeFile {} io-75.8] set f [open $fn w+] @@ -9342,21 +9343,21 @@ flush $f seek $f 0 fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A \ -translation lf -profile strict } -body { - set res [list [catch {read $f} msg] [eof $f]] + set res [list [catch {read $f} msg data] [eof $f] [dict get $data -data]] chan configure $f -encoding iso8859-1 lappend res [read $f 1] chan configure $f -encoding utf-8 - lappend res [catch {read $f 1} msg] $msg + lappend res [catch {read $f 1} msg data] $msg [dict get $data -data] } -cleanup { close $f removeFile io-75.8 - unset res msg fn f -} -match glob -result "1 0 \x81 1 {error reading \"*\":\ - invalid or incomplete multibyte or wide character}" + unset res msg data fn f +} -match glob -result "1 0 A \x81 1 {error reading \"*\":\ + invalid or incomplete multibyte or wide character} {}" test io-strict-multibyte-eof { incomplete utf-8 sequence immediately prior to eof character @@ -9367,16 +9368,16 @@ puts -nonewline $chan \x81\x1A flush $chan seek $chan 0 chan configure $chan -encoding utf-8 -profile strict } -body { - list [catch {read $chan 1} msg] $msg + list [catch {read $chan 1} msg data] $msg [dict get $data -data] } -cleanup { close $chan - unset msg chan + unset msg chan data } -match glob -result {1 {error reading "*":\ - invalid or incomplete multibyte or wide character}} + invalid or incomplete multibyte or wide character} {}} test io-75.9 {unrepresentable character write throws error in strict profile} -setup { set fn [makeFile {} io-75.9] set f [open $fn w+] fconfigure $f -encoding iso8859-1 -profile strict @@ -9429,17 +9430,17 @@ fconfigure $f -encoding shiftjis -blocking 0 -eofchar {} -translation lf \ -profile strict } -body { set d [read $f] binary scan $d H* hd - lappend hd [catch {set d [read $f]} msg] $msg + lappend hd [catch {set d [read $f]} msg data] $msg [dict exists $data -data] } -cleanup { close $f removeFile io-75.11 - unset d hd msg f + unset d hd msg data f } -match glob -result {41 1 {error reading "file*":\ - invalid or incomplete multibyte or wide character}} + invalid or incomplete multibyte or wide character} 0} test io-75.12 {invalid utf-8 encoding read is ignored} -setup { set fn [makeFile {} io-75.12] set f [open $fn w+] fconfigure $f -encoding binary @@ -9470,17 +9471,17 @@ fconfigure $f -encoding utf-8 -blocking 0 -eofchar {} -translation lf \ -profile strict } -body { set d [read $f] binary scan $d H* hd - lappend hd [catch {read $f} msg] $msg + lappend hd [catch {read $f} msg data] $msg [dict exists $data -data] } -cleanup { close $f removeFile io-75.13 - unset d hd msg f fn + unset d hd msg data f fn } -match glob -result {41 1 {error reading "file*":\ - invalid or incomplete multibyte or wide character}} + invalid or incomplete multibyte or wide character} 0} test io-75.14 { [gets] succesfully returns lines prior to error invalid utf-8 encoding [gets] continues in non-strict mode after error @@ -9494,20 +9495,20 @@ fconfigure $chan -encoding utf-8 -buffering none -eofchar {} \ -translation auto -profile strict } -body { set res [gets $chan] lappend res [gets $chan] - lappend res [catch {gets $chan} msg] $msg + lappend res [catch {gets $chan} msg data] $msg [dict exists $data -data] chan configure $chan -profile tcl8 lappend res [gets $chan] lappend res [gets $chan] return $res } -cleanup { close $chan - unset chan res msg + unset chan res msg data } -match glob -result {a b 1 {error reading "*":\ - invalid or incomplete multibyte or wide character} cÀ d} + invalid or incomplete multibyte or wide character} 0 cÀ d} test io-75.15 { invalid utf-8 encoding strict gets does not hang gets succeeds for the first two lines @@ -9521,12 +9522,12 @@ } -body { #Now try to read it with [gets] fconfigure $chan -encoding utf-8 -profile strict lappend res [gets $chan] lappend res [gets $chan] - lappend res [catch {gets $chan} msg] $msg - lappend res [catch {gets $chan} msg] $msg + lappend res [catch {gets $chan} msg data] $msg [dict exists $data -data] + lappend res [catch {gets $chan} msg data] $msg [dict exists $data -data] chan configure $chan -translation binary set data [read $chan 4] foreach char [split $data {}] { scan $char %c ord lappend res [format %x $ord] @@ -9537,11 +9538,11 @@ return $res } -cleanup { close $chan unset chan res msg data } -match glob -result {hello AB 1 {error reading "*": invalid or incomplete multibyte or wide character}\ - 1 {error reading "*": invalid or incomplete multibyte or wide character} 43 44 c0 40 EF GHI} + 0 1 {error reading "*": invalid or incomplete multibyte or wide character} 0 43 44 c0 40 EF GHI} # ### ### ### ######### ######### #########