Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Changes In Branch py-b8f575aa23 Excluding Merge-Ins
This is equivalent to a diff from 41c19b8112 to 080b9a1c81
2023-02-02
| ||
22:51 | Fix for [b8f575aa2398b0e4] and [154ed7ce564a7b4c], double-[read]/[gets] problem. Partial-read funct... check-in: 11c7f071cb user: pooryorick tags: py-b8f575aa23-nopartial | |
2023-01-17
| ||
15:34 | Fix [read] error behaviour under issue [b8f575aa2398b0e4] by always returning returning successfully... check-in: 1a225bd50e user: pooryorick tags: py-b8f575aa23-errorinfo | |
2023-01-08
| ||
10:07 | For [read] and [gets] encoding errors, use "-result read" in return options dictionary instead of j... Closed-Leaf check-in: 080b9a1c81 user: pooryorick tags: py-b8f575aa23 | |
00:01 | For [read] and [gets] encoding errors, used "-result read" in return options dictionary instead of j... check-in: 64ab3097cc user: pooryorick tags: trunk-encodingdefaultstrict | |
2023-01-02
| ||
23:12 | Merge py-b8f575aa23: Fix for [154ed7ce56], Tcl 9: [gets] on -strictencoding 1 configured channel. check-in: 8c5d9bcc6e user: pooryorick tags: py-b8f575aa23 | |
2023-01-01
| ||
23:36 | Merge 8.6 check-in: b85d5ef2c8 user: jan.nijtmans tags: core-8-branch | |
2022-12-28
| ||
22:58 |
merge py-b8f575aa23 after updating fix so that all tests pass.
jn: let's wait for more review, si... check-in: 9b6c29fa98 user: pooryorick tags: py-b8f575aa23 | |
17:01 | merge 8.7 check-in: 81e8cdd1f0 user: dgp tags: core-8-7-b1-rc | |
12:07 | A better fix for [b8f575aa23], as it maintains the expectation that synchronous [read] results in a... check-in: 00995080d6 user: pooryorick tags: py-b8f575aa23 | |
2022-12-21
| ||
21:43 | Merge 8.7 check-in: fb6e7180d0 user: jan.nijtmans tags: trunk, main | |
21:07 | Make two more functions static check-in: 41c19b8112 user: jan.nijtmans tags: core-8-branch | |
20:43 | Add 'interp' argument to some arith functions, for better error-handling check-in: ccd603d584 user: jan.nijtmans tags: core-8-branch | |
Changes to generic/tclEncoding.c.
︙ | ︙ | |||
2382 2383 2384 2385 2386 2387 2388 | /* * Copy 7bit characters, but skip null-bytes when we are in input * mode, so that they get converted to 0xC080. */ *dst++ = *src++; } else if ((UCHAR(*src) == 0xC0) && (src + 1 < srcEnd) | > > > | > > | 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 | /* * Copy 7bit characters, but skip null-bytes when we are in input * mode, so that they get converted to 0xC080. */ *dst++ = *src++; } else if ((UCHAR(*src) == 0xC0) && (src + 1 < srcEnd) && (UCHAR(src[1]) == 0x80) && ( !(flags & TCL_ENCODING_MODIFIED) || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) )) { /* * If in input mode, and -strict is specified: This is an error. */ if (flags & TCL_ENCODING_MODIFIED) { result = TCL_CONVERT_SYNTAX; break; } |
︙ | ︙ |
Changes to generic/tclIO.c.
︙ | ︙ | |||
4652 4653 4654 4655 4656 4657 4658 | { GetsState gs; Channel *chanPtr = (Channel *) chan; ChannelState *statePtr = chanPtr->state; /* State info for channel */ ChannelBuffer *bufPtr; int inEofChar, skip, copiedTotal, oldFlags, oldRemoved; | > | > | 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 | { GetsState gs; Channel *chanPtr = (Channel *) chan; ChannelState *statePtr = chanPtr->state; /* State info for channel */ ChannelBuffer *bufPtr; int inEofChar, skip, copiedTotal, oldFlags, oldRemoved; int reportError = 0; size_t oldLength; Tcl_Encoding encoding; char *dst, *dstEnd, *eol, *eof; Tcl_EncodingState oldState; if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR)) { UpdateInterest(chanPtr); Tcl_SetErrno(EILSEQ); ResetFlag(statePtr, CHANNEL_ENCODING_ERROR); return TCL_INDEX_NONE; } if (CheckChannelErrors(statePtr, TCL_READABLE) != 0) { return TCL_INDEX_NONE; } |
︙ | ︙ | |||
4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 | Tcl_SetObjLength(objPtr, oldLength); CommonGetsCleanup(chanPtr); copiedTotal = -1; ResetFlag(statePtr, CHANNEL_BLOCKED|INPUT_SAW_CR); goto done; } goto gotEOL; } dst = dstEnd; } /* * Found EOL or EOF, but the output buffer may now contain too many UTF-8 * characters. We need to know how many raw bytes correspond to the number | > > > > > > > > > > > > > | 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 | Tcl_SetObjLength(objPtr, oldLength); CommonGetsCleanup(chanPtr); copiedTotal = -1; ResetFlag(statePtr, CHANNEL_BLOCKED|INPUT_SAW_CR); goto done; } goto gotEOL; } else if (gs.bytesWrote == 0 && GotFlag(statePtr, CHANNEL_ENCODING_ERROR)) { /* Set eol to the position that caused the encoding error, and then * coninue to gotEOL, which stores the data that was decoded * without error to objPtr. This allows the caller to do something * useful with the data decoded so far, and also results in the * position of the file being the first byte that was not * succesfully decoded, allowing further processing at exactly that * point, if desired. */ eol = dstEnd; reportError = 1; goto gotEOL; } dst = dstEnd; } /* * Found EOL or EOF, but the output buffer may now contain too many UTF-8 * characters. We need to know how many raw bytes correspond to the number |
︙ | ︙ | |||
4977 4978 4979 4980 4981 4982 4983 | /* * Recycle all the emptied buffers. */ Tcl_SetObjLength(objPtr, eol - objPtr->bytes); CommonGetsCleanup(chanPtr); ResetFlag(statePtr, CHANNEL_BLOCKED); | > > > > > > > > | > | 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 | /* * Recycle all the emptied buffers. */ Tcl_SetObjLength(objPtr, eol - objPtr->bytes); CommonGetsCleanup(chanPtr); ResetFlag(statePtr, CHANNEL_BLOCKED); if (reportError) { ResetFlag(statePtr, CHANNEL_BLOCKED|INPUT_SAW_CR|CHANNEL_ENCODING_ERROR); /* reset CHANNEL_ENCODING_ERROR to afford a chance to reconfigure * the channel and try again */ Tcl_SetErrno(EILSEQ); copiedTotal = -1; } else { copiedTotal = gs.totalChars + gs.charsWrote - skip; } goto done; /* * Couldn't get a complete line. This only happens if we get a error * reading from the channel or we are non-blocking and there wasn't an EOL * or EOF in the data available. */ |
︙ | ︙ | |||
6020 6021 6022 6023 6024 6025 6026 | */ TclGetString(objPtr); } } if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR)) { | | > | | | 6044 6045 6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 6079 6080 6081 6082 6083 6084 6085 6086 6087 6088 6089 6090 | */ TclGetString(objPtr); } } if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR)) { /* TODO: UpdateInterest not needed here? */ UpdateInterest(chanPtr); Tcl_SetErrno(EILSEQ); return -1; } /* * Early out when next read will see eofchar. * * NOTE: See DoRead for argument that it's a bug (one we're keeping) to * have this escape before the one for zero-char read request. */ if (GotFlag(statePtr, CHANNEL_STICKY_EOF)) { SetFlag(statePtr, CHANNEL_EOF); assert(statePtr->inputEncodingFlags & TCL_ENCODING_END); assert(!GotFlag(statePtr, CHANNEL_BLOCKED|INPUT_SAW_CR)); /* TODO: UpdateInterest not needed here? */ UpdateInterest(chanPtr); return 0; } /* * Special handling for zero-char read request. */ if (toRead == 0) { if (GotFlag(statePtr, CHANNEL_EOF)) { statePtr->inputEncodingFlags |= TCL_ENCODING_START; } ResetFlag(statePtr, CHANNEL_BLOCKED|CHANNEL_EOF); statePtr->inputEncodingFlags &= ~TCL_ENCODING_END; /* TODO: UpdateInterest not needed here? */ UpdateInterest(chanPtr); return 0; } /* * This operation should occur at the top of a channel stack. */ |
︙ | ︙ | |||
6082 6083 6084 6085 6086 6087 6088 | if (binaryMode) { copiedNow = ReadBytes(statePtr, objPtr, toRead); } else { copiedNow = ReadChars(statePtr, objPtr, toRead, &factor); } /* | | > > > > > > > > > > > > > > > > > > | 6107 6108 6109 6110 6111 6112 6113 6114 6115 6116 6117 6118 6119 6120 6121 6122 6123 6124 6125 6126 6127 6128 6129 6130 6131 6132 6133 6134 6135 6136 6137 6138 6139 6140 6141 6142 6143 6144 6145 6146 6147 6148 6149 6150 6151 | if (binaryMode) { copiedNow = ReadBytes(statePtr, objPtr, toRead); } else { copiedNow = ReadChars(statePtr, objPtr, toRead, &factor); } /* * Recycle current buffer if empty. */ bufPtr = statePtr->inQueueHead; if (IsBufferEmpty(bufPtr)) { ChannelBuffer *nextPtr = bufPtr->nextPtr; RecycleBuffer(statePtr, bufPtr, 0); statePtr->inQueueHead = nextPtr; if (nextPtr == NULL) { statePtr->inQueueTail = NULL; } } /* * If CHANNEL_ENCODING_ERROR and CHANNEL_STICKY_EOF are both set, * then CHANNEL_ENCODING_ERROR was caused by data that occurred * after the EOF character was encountered, so it doesn't count as * a real error. */ if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR) && !GotFlag(statePtr, CHANNEL_STICKY_EOF) && !GotFlag(statePtr, CHANNEL_NONBLOCKING)) { /* Channel is synchronous. Return an error so that callers * like [read] can return an error. */ Tcl_SetErrno(EILSEQ); copied = -1; goto finish; } } if (copiedNow < 0) { if (GotFlag(statePtr, CHANNEL_EOF)) { break; } if (GotFlag(statePtr, CHANNEL_NONBLOCKING|CHANNEL_BLOCKED) |
︙ | ︙ | |||
6123 6124 6125 6126 6127 6128 6129 6130 6131 6132 6133 6134 6135 6136 | } } else { copied += copiedNow; toRead -= copiedNow; } } /* * Failure to fill a channel buffer may have left channel reporting a * "blocked" state, but so long as we fulfilled the request here, the * caller does not consider us blocked. */ if (toRead == 0) { | > | 6166 6167 6168 6169 6170 6171 6172 6173 6174 6175 6176 6177 6178 6179 6180 | } } else { copied += copiedNow; toRead -= copiedNow; } } finish: /* * Failure to fill a channel buffer may have left channel reporting a * "blocked" state, but so long as we fulfilled the request here, the * caller does not consider us blocked. */ if (toRead == 0) { |
︙ | ︙ | |||
6801 6802 6803 6804 6805 6806 6807 6808 6809 6810 6811 | *srcLenPtr = srcLen; if (srcStart + srcLen == eof) { /* * EOF character was seen in EOL translated range. Leave current file * position pointing at the EOF character, but don't store the EOF * character in the output string. */ SetFlag(statePtr, CHANNEL_EOF | CHANNEL_STICKY_EOF); statePtr->inputEncodingFlags |= TCL_ENCODING_END; | > > > | | 6845 6846 6847 6848 6849 6850 6851 6852 6853 6854 6855 6856 6857 6858 6859 6860 6861 6862 6863 6864 6865 6866 | *srcLenPtr = srcLen; if (srcStart + srcLen == eof) { /* * EOF character was seen in EOL translated range. Leave current file * position pointing at the EOF character, but don't store the EOF * character in the output string. * * If CHANNEL_ENCODING_ERROR is set, it can only be because of data * encountered after the EOF character, so it is nonsense. Unset it. */ SetFlag(statePtr, CHANNEL_EOF | CHANNEL_STICKY_EOF); statePtr->inputEncodingFlags |= TCL_ENCODING_END; ResetFlag(statePtr, CHANNEL_BLOCKED|INPUT_SAW_CR|CHANNEL_ENCODING_ERROR); } } /* *---------------------------------------------------------------------- * * Tcl_Ungets -- |
︙ | ︙ |
Changes to generic/tclIOCmd.c.
︙ | ︙ | |||
291 292 293 294 295 296 297 | Tcl_Interp *interp, /* Current interpreter. */ int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ { Tcl_Channel chan; /* The channel to read from. */ int lineLen; /* Length of line just read. */ int mode; /* Mode in which channel is opened. */ | | | 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 | Tcl_Interp *interp, /* Current interpreter. */ int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ { Tcl_Channel chan; /* The channel to read from. */ int lineLen; /* Length of line just read. */ int mode; /* Mode in which channel is opened. */ Tcl_Obj *linePtr, *chanObjPtr, *resultDictPtr, *returnOptsPtr; int code = TCL_OK; if ((objc != 2) && (objc != 3)) { Tcl_WrongNumArgs(interp, 1, objv, "channelId ?varName?"); return TCL_ERROR; } chanObjPtr = objv[1]; |
︙ | ︙ | |||
314 315 316 317 318 319 320 | } TclChannelPreserve(chan); TclNewObj(linePtr); lineLen = Tcl_GetsObj(chan, linePtr); if (lineLen < 0) { if (!Tcl_Eof(chan) && !Tcl_InputBlocked(chan)) { | < > > > > > > > | 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 | } TclChannelPreserve(chan); TclNewObj(linePtr); lineLen = Tcl_GetsObj(chan, linePtr); if (lineLen < 0) { if (!Tcl_Eof(chan) && !Tcl_InputBlocked(chan)) { /* * TIP #219. * Capture error messages put by the driver into the bypass area * and put them into the regular interpreter result. Fall back to * the regular message if nothing was found in the bypass. */ if (!TclChanCaughtErrorBypass(interp, chan)) { Tcl_SetObjResult(interp, Tcl_ObjPrintf( "error reading \"%s\": %s", TclGetString(chanObjPtr), Tcl_PosixError(interp))); } resultDictPtr = Tcl_NewDictObj(); Tcl_DictObjPut(NULL, resultDictPtr, Tcl_NewStringObj("read", -1) , linePtr); returnOptsPtr = Tcl_NewDictObj(); Tcl_DictObjPut(NULL, returnOptsPtr, Tcl_NewStringObj("-result", -1) , resultDictPtr); code = TCL_ERROR; Tcl_SetReturnOptions(interp, returnOptsPtr); goto done; } lineLen = TCL_INDEX_NONE; } if (objc == 3) { if (Tcl_ObjSetVar2(interp, objv[2], NULL, linePtr, TCL_LEAVE_ERR_MSG) == NULL) { |
︙ | ︙ | |||
377 378 379 380 381 382 383 | Tcl_Obj *const objv[]) /* Argument objects. */ { Tcl_Channel chan; /* The channel to read from. */ int newline, i; /* Discard newline at end? */ int toRead; /* How many bytes to read? */ int charactersRead; /* How many characters were read? */ int mode; /* Mode in which channel is opened. */ | | | 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 | Tcl_Obj *const objv[]) /* Argument objects. */ { Tcl_Channel chan; /* The channel to read from. */ int newline, i; /* Discard newline at end? */ int toRead; /* How many bytes to read? */ int charactersRead; /* How many characters were read? */ int mode; /* Mode in which channel is opened. */ Tcl_Obj *resultPtr, *resultDictPtr, *returnOptsPtr, *chanObjPtr; if ((objc != 2) && (objc != 3)) { Interp *iPtr; argerror: iPtr = (Interp *) interp; Tcl_WrongNumArgs(interp, 1, objv, "channelId ?numChars?"); |
︙ | ︙ | |||
466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 | */ if (!TclChanCaughtErrorBypass(interp, chan)) { Tcl_SetObjResult(interp, Tcl_ObjPrintf( "error reading \"%s\": %s", TclGetString(chanObjPtr), Tcl_PosixError(interp))); } TclChannelRelease(chan); Tcl_DecrRefCount(resultPtr); return TCL_ERROR; } /* * If requested, remove the last newline in the channel if at EOF. */ | > > > > > > > | 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 | */ if (!TclChanCaughtErrorBypass(interp, chan)) { Tcl_SetObjResult(interp, Tcl_ObjPrintf( "error reading \"%s\": %s", TclGetString(chanObjPtr), Tcl_PosixError(interp))); } resultDictPtr = Tcl_NewDictObj(); Tcl_DictObjPut(NULL, resultDictPtr, Tcl_NewStringObj("read", -1) , resultPtr); returnOptsPtr = Tcl_NewDictObj(); Tcl_DictObjPut(NULL, returnOptsPtr, Tcl_NewStringObj("-result", -1) , resultDictPtr); TclChannelRelease(chan); Tcl_DecrRefCount(resultPtr); Tcl_SetReturnOptions(interp, returnOptsPtr); return TCL_ERROR; } /* * If requested, remove the last newline in the channel if at EOF. */ |
︙ | ︙ |
Changes to tests/io.test.
︙ | ︙ | |||
1543 1544 1545 1546 1547 1548 1549 | close $f set f [open $path(test1)] fconfigure $f -encoding utf-8 -buffersize 10 set in [read $f] close $f scan [string index $in end] %c } 160 | > > > > | > | | | | | | > | | > > > | > > | | > > > > | > > > > > > > > > | 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 | close $f set f [open $path(test1)] fconfigure $f -encoding utf-8 -buffersize 10 set in [read $f] close $f scan [string index $in end] %c } 160 apply [list {} { set template { test io-12.9.@variant@ {ReadChars: multibyte chars split, default (strict)} -body { set res {} set f [open $path(test1) w] fconfigure $f -translation binary puts -nonewline $f [string repeat a 9]\xC2 close $f set f [open $path(test1)] fconfigure $f -encoding utf-8 @strict@ -buffersize 10 set status [catch {read $f} cres copts] set in [dict get $copts -result] lappend res $in lappend res $status $cres set status [catch {read $f} cres copts] set in [dict get $copts -result] lappend res $in lappend res $status $cres set res } -cleanup { catch {close $f} } -match glob -result {{read aaaaaaaaa} 1\ {error reading "*": illegal byte sequence}\ {read {}} 1 {error reading "*": illegal byte sequence}} } # strict encoding may be the default in Tcl 9, but in 8 it is not foreach variant {encodingstrict} strict {{-strictencoding 1}} { set script [string map [ list @variant@ $variant @strict@ $strict] $template] uplevel 1 $script } } [namespace current]] test io-12.10 {ReadChars: multibyte chars split} -body { set f [open $path(test1) w] fconfigure $f -translation binary puts -nonewline $f [string repeat a 9]\xC2 close $f set f [open $path(test1)] fconfigure $f -encoding utf-8 -buffersize 11 |
︙ | ︙ | |||
9052 9053 9054 9055 9056 9057 9058 | fconfigure $f -encoding binary # \x81 is invalid in utf-8 puts -nonewline $f A\x81 flush $f seek $f 0 fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -strictencoding 1 } -body { | > | < < | > > | | | > | > > | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | < < | | | > > > > > > > > > > > > > > > > > > > > > > > > | > > | | | > | > > > > > > > > > > > > > > > > > > > > | > > | | | > > > > > > > > > > > > > > > > > > > > > > | > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 9076 9077 9078 9079 9080 9081 9082 9083 9084 9085 9086 9087 9088 9089 9090 9091 9092 9093 9094 9095 9096 9097 9098 9099 9100 9101 9102 9103 9104 9105 9106 9107 9108 9109 9110 9111 9112 9113 9114 9115 9116 9117 9118 9119 9120 9121 9122 9123 9124 9125 9126 9127 9128 9129 9130 9131 9132 9133 9134 9135 9136 9137 9138 9139 9140 9141 9142 9143 9144 9145 9146 9147 9148 9149 9150 9151 9152 9153 9154 9155 9156 9157 9158 9159 9160 9161 9162 9163 9164 9165 9166 9167 9168 9169 9170 9171 9172 9173 9174 9175 9176 9177 9178 9179 9180 9181 9182 9183 9184 9185 9186 9187 9188 9189 9190 9191 9192 9193 9194 9195 9196 9197 9198 9199 9200 9201 9202 9203 9204 9205 9206 9207 9208 9209 9210 9211 9212 9213 9214 9215 9216 9217 9218 9219 9220 9221 9222 9223 9224 9225 9226 9227 9228 9229 9230 9231 9232 9233 9234 9235 9236 9237 9238 9239 9240 9241 9242 9243 9244 9245 9246 9247 9248 9249 9250 9251 9252 9253 9254 9255 9256 9257 9258 9259 9260 9261 9262 9263 9264 9265 9266 9267 9268 9269 9270 9271 9272 9273 9274 9275 9276 9277 9278 9279 9280 9281 9282 9283 9284 9285 9286 9287 9288 9289 9290 9291 9292 9293 9294 9295 9296 9297 9298 9299 9300 9301 9302 9303 9304 9305 9306 9307 9308 9309 9310 9311 9312 9313 9314 9315 9316 9317 9318 9319 9320 9321 9322 9323 9324 9325 9326 9327 9328 9329 9330 9331 9332 9333 9334 9335 9336 9337 9338 9339 9340 9341 9342 9343 9344 9345 9346 9347 9348 9349 9350 9351 9352 9353 9354 9355 9356 9357 9358 9359 9360 9361 9362 9363 9364 9365 9366 | fconfigure $f -encoding binary # \x81 is invalid in utf-8 puts -nonewline $f A\x81 flush $f seek $f 0 fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -strictencoding 1 } -body { set status [catch {read $f} cres copts] set d [dict get $copts -result read] binary scan $d H* hd lappend hd $status $cres } -cleanup { close $f removeFile io-75.6 } -match glob -result {41 1 {error reading "*": illegal byte sequence}} test io-75.7 {invalid utf-8 encoding eof handling (-strictencoding 1)} -setup { set fn [makeFile {} io-75.7] set f [open $fn w+] fconfigure $f -encoding binary # \xA1 is invalid in utf-8. -eofchar is not detected, because it comes later. puts -nonewline $f A\xA1\x1A flush $f seek $f 0 fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A -translation lf -strictencoding 1 } -body { set status [catch {read $f} cres copts] set d [dict get $copts -result read] binary scan $d H* hd lappend hd [eof $f] lappend hd $status lappend hd $cres fconfigure $f -encoding iso8859-1 lappend hd [read $f];# We changed encoding, so now we can read the \xA1 close $f set hd } -cleanup { removeFile io-75.7 } -match glob -result {41 0 1 {error reading "*": illegal byte sequence} ¡} test io-75.8.incomplete { incomplete uft-8 char after eof char is not an error (-strictencoding 1) } -setup { set hd {} set fn [makeFile {} io-75.8] set f [open $fn w+] fconfigure $f -encoding binary # \x81 is invalid and also incomplete utf-8 data, but because the eof # character \x1A appears first, it's not an error. puts -nonewline $f A\x1A\x81 flush $f seek $f 0 fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A -translation lf -strictencoding 1 } -body { set d [read $f] binary scan $d H* hd lappend hd [eof $f] # there should be no error on additional reads lappend hd [read $f] close $f set hd } -cleanup { removeFile io-75.8 } -result {41 1 {}} test io-75.8.invalid {invalid utf-8 after eof char is not an error (-strictencoding 1)} -setup { set res {} set fn [makeFile {} io-75.8] set f [open $fn w+] fconfigure $f -encoding binary # \xc0\x80 is invalid utf-8 data, but because the eof character \x1A # appears first, it's not an error. puts -nonewline $f A\x1a\xc0\x80 flush $f seek $f 0 fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A -translation lf -strictencoding 1 } -body { set d [read $f] foreach char [split $d {}] { lappend res [format %x [scan $char %c]] } lappend res [eof $f] # there should be no error on additional reads lappend res [read $f] close $f set res } -cleanup { removeFile io-75.8 } -result {41 1 {}} test io-75.9 {unrepresentable character write passes and is replaced by ?} -setup { set fn [makeFile {} io-75.9] set f [open $fn w+] fconfigure $f -encoding iso8859-1 -strictencoding 1 } -body { catch {puts -nonewline $f "A\u2022"} msg flush $f seek $f 0 list [read $f] $msg } -cleanup { close $f removeFile io-75.9 } -match glob -result [list {A} {error writing "*": illegal byte sequence}] test io-75.10 {incomplete multibyte encoding read is ignored} -setup { set fn [makeFile {} io-75.10] set f [open $fn w+] fconfigure $f -encoding binary puts -nonewline $f A\xC0 flush $f seek $f 0 fconfigure $f -encoding utf-8 -strictencoding 0 -buffering none } -body { set d [read $f] close $f binary scan $d H* hd set hd } -cleanup { removeFile io-75.10 } -result 41c0 test io-75.10_strict {incomplete multibyte encoding read is an error} -setup { set res {} set fn [makeFile {} io-75.10] set f [open $fn w+] fconfigure $f -encoding binary puts -nonewline $f A\xC0 flush $f seek $f 0 fconfigure $f -encoding utf-8 -strictencoding 1 -buffering none } -body { set status [catch {read $f} cres copts] set d [dict get $copts -result read] binary scan $d H* hd lappend res $hd $cres chan configure $f -encoding iso8859-1 set d [read $f] binary scan $d H* hd lappend res $hd close $f return $res } -cleanup { removeFile io-75.10 } -match glob -result {41 {error reading "*": illegal byte sequence} c0} # As utf-8 has a special treatment in multi-byte decoding, also test another # one. test io-75.11 {shiftjis encoding error read results in raw bytes} -setup { set fn [makeFile {} io-75.11] set f [open $fn w+] fconfigure $f -encoding binary # In shiftjis, \x81 starts a two-byte sequence. # But 2nd byte \xFF is not allowed puts -nonewline $f A\x81\xFFA flush $f seek $f 0 fconfigure $f -encoding shiftjis -buffering none -eofchar "" \ -translation lf -strictencoding 1 } -body { set status [catch {read $f} cres copts] set d [dict get $copts -result read] binary scan $d H* hd lappend hd $status lappend hd $cres } -cleanup { close $f removeFile io-75.11 } -match glob -result {41 1 {error reading "*": illegal byte sequence}} test io-75.12 {invalid utf-8 encoding read is an error} -setup { set res {} set fn [makeFile {} io-75.12] set f [open $fn w+] fconfigure $f -encoding binary puts -nonewline $f A\x81 flush $f seek $f 0 fconfigure $f -encoding utf-8 -buffering none -eofchar {} -translation lf \ -strictencoding 1 } -body { set status [catch {read $f} cres copts] set d [dict get $copts -result read] close $f binary scan $d H* hd lappend res $hd $status $cres return $res } -cleanup { removeFile io-75.12 } -match glob -result {41 1 {error reading "*": illegal byte sequence}} test io-75.12_ignore {invalid utf-8 encoding read is ignored} -setup { set fn [makeFile {} io-75.12] set f [open $fn w+] fconfigure $f -encoding binary puts -nonewline $f A\x81 flush $f seek $f 0 fconfigure $f -encoding utf-8 -buffering none -eofchar {} \ -translation lf -strictencoding 0 } -body { set d [read $f] close $f binary scan $d H* hd set hd } -cleanup { removeFile io-75.12 } -result 4181 test io-75.13 {invalid utf-8 encoding read is not ignored (-strictencoding 1)} -setup { set fn [makeFile {} io-75.13] set f [open $fn w+] fconfigure $f -encoding binary # \x81 is invalid in utf-8 puts -nonewline $f "A\x81" flush $f seek $f 0 fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -strictencoding 1 } -body { set status [catch {read $f} cres copts] set d [dict get $copts -result read] binary scan $d H* hd lappend hd $status close $f lappend hd $cres } -cleanup { removeFile io-75.13 } -match glob -result {41 1 {error reading "*": illegal byte sequence}} test io-75.14 {invalid utf-8 encoding [gets] coninues in non-strict mode after error} -setup { set res {} set fn [makeFile {} io-75.14] set f [open $fn w+] fconfigure $f -encoding binary # \xc0 is invalid in utf-8 puts -nonewline $f a\nb\xc0\nc\n flush $f seek $f 0 fconfigure $f -encoding utf-8 -buffering none -eofchar {} -translation lf -strictencoding 1 } -body { lappend res [gets $f] set status [catch {gets $f} cres copts] lappend res $status $cres chan configure $f -strictencoding 0 lappend res [gets $f] lappend res [gets $f] close $f return $res } -cleanup { removeFile io-75.14 } -match glob -result {a 1 {error reading "*": illegal byte sequence} bÀ c} test io-75.15 {invalid utf-8 encoding strict gets should not hang} -setup { set res {} set fn [makeFile {} io-75.15] set chan [open $fn w+] fconfigure $chan -encoding binary # This is not valid UTF-8 puts $chan hello\nAB\xc0\x40CD\nEFG close $chan } -body { #Now try to read it with [gets] set chan [open $fn] fconfigure $chan -encoding utf-8 -strictencoding 1 lappend res [gets $chan] set status [catch {gets $chan} cres copts] lappend res $status $cres set status [catch {gets $chan} cres copts] lappend res $status $cres lappend res [dict get $copts -result] chan configur $chan -encoding binary foreach char [split [read $chan 2] {}] { lappend res [format %x [scan $char %c]] } return $res } -cleanup { close $chan removeFile io-75.15 } -match glob -result {hello 1 {error reading "*": illegal byte sequence}\ 1 {error reading "*": illegal byte sequence} {read AB} c0 40} test io-76.0 {channel modes} -setup { set datafile [makeFile {some characters} dummy] set f [open $datafile r] } -constraints testchannel -body { testchannel mode $f |
︙ | ︙ |