Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Changes In Branch tip-653
Excluding Merge-Ins
This is equivalent to a diff from
94257f6465
to b72ee15583
2023-11-18
| | |
23:32 |
|
check-in: 630511c7f7 user: jan.nijtmans tags: core-8-branch
|
2023-11-14
| | |
19:56 |
|
check-in: c20395db07 user: dkf tags: core-8-branch
|
19:04 |
|
Closed-Leaf
check-in: b72ee15583 user: jan.nijtmans tags: tip-653
|
11:07 |
|
check-in: 648e73b3a6 user: jan.nijtmans tags: tip-653
|
2023-11-13
| | |
19:48 |
|
check-in: 2686a93c26 user: dgp tags: mistake
|
17:38 |
|
check-in: 8d4d3146c9 user: oehhar tags: bug-a173f922-fcopy-tell
|
17:36 |
|
check-in: f09428139a user: dgp tags: dgp-zipfs-init
|
16:19 |
|
check-in: 054ebbf73b user: oehhar tags: trunk, main
|
15:35 |
|
check-in: 94257f6465 user: oehhar tags: core-8-branch
|
13:46 |
|
check-in: b1c20cc84e user: jan.nijtmans tags: core-8-branch
|
12:48 |
|
Closed-Leaf
check-in: dbcafe5259 user: oehhar tags: bug-c4eb46a1
|
| | |
Changes to doc/read.n.
︙ | | |
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
|
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
|
-
+
-
-
+
+
+
+
-
+
+
+
+
+
+
|
possible by changing to an encoding (or encoding profile), which accepts
the data.
An encoding error is reported by the POSIX error code \fBEILSEQ\fR.
.PP
In blocking mode, the error is directly thrown, even, if there is a
leading decodable data portion.
The file pointer is advanced just before the encoding error.
An eventual well decoded data chunk before the encoding error is lost.
An eventual well decoded data chunk before the encoding error is returned
It is proposed to return this portion within the additional key \fB-data\fR
in the error dictionary.
in the error option dictionary key \fB-data\fR.
The value of the key contains the empty string, if the error arises at the
first data position.
.PP
In non blocking mode, first, any data without encoding error is returned
(without error state).
In the next call, no data is returned and the \fBEILSEQ\fR error state is set.
The key \fB-data\fR is not present.
.PP
Here is an example with an encoding error in UTF-8 encoding, which is then
introspected by a switch to the binary encoding. The test file contains a not
continued multi-byte sequence at position 1 (\fBA \\xC3 B\fR):
.PP
File creation for examples
.
.CS
% set f [open test_A_195_B.txt wb]; puts -nonewline $f A\\xC3B; close $f
.CE
Blocking example
.
.CS
% set f [open test_A_195_B.txt r]
file35a65a0
% fconfigure $f -encoding utf-8 -profile strict -blocking 1
% catch {read $f} e d
1
% set d
-code 1 -level 0
-data A -code 1 -level 0
-errorstack {INNER {invokeStk1 read file35a65a0}}
-errorcode {POSIX EILSEQ {invalid or incomplete multibyte or wide character}}
-errorinfo {...} -errorline 1
% tell $f
1
% fconfigure $f -encoding binary -profile strict
% read $f
ÃB
% close $f
.CE
The already decoded data "A" is returned in the error options dictionary key
\fB-data\fR.
The file position is advanced on the encoding error position 1.
The data at the error position is thus recovered by the next \fBread\fR command.
.PP
Non blocking example
.
.CS
% set f [open test_A_195_B.txt r]
file35a65a0
% fconfigure $f -encoding utf-8 -profile strict -blocking 0
% read $f
|
︙ | | |
Changes to generic/tclIO.c.
︙ | | |
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
|
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
|
-
+
|
* this while-loop.
* Removed the following from the upper condition:
* "&& !GotFlag(statePtr, CHANNEL_NONBLOCKING)"
* In case of an encoding error with leading correct bytes, we pass here
* two times, as gs.bytesWrote is not 0 on the first pass. This feels
* once to much, as the data is anyway not used.
*/
/* Set eol to the position that caused the encoding error, and then
* continue to gotEOL, which stores the data that was decoded
* without error to objPtr. This allows the caller to do something
* useful with the data decoded so far, and also results in the
* position of the file being the first byte that was not
* successfully decoded, allowing further processing at exactly that
* point, if desired.
|
︙ | | |
7612
7613
7614
7615
7616
7617
7618
7619
7620
7621
7622
7623
7624
7625
|
7612
7613
7614
7615
7616
7617
7618
7619
7620
7621
7622
7623
7624
7625
7626
7627
7628
7629
7630
7631
7632
7633
7634
7635
7636
7637
7638
7639
7640
7641
7642
7643
7644
7645
7646
7647
7648
7649
7650
7651
7652
|
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
|
if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR)) {
return 0;
}
return GotFlag(statePtr, CHANNEL_EOF) ? 1 : 0;
}
/*
*----------------------------------------------------------------------
*
* TclChannelGetBlockingMode --
*
* Returns 1 if the channel is in blocking mode (default), 0 otherwise.
*
* Results:
* 1 or 0, always.
*
* Side effects:
* None.
*
*----------------------------------------------------------------------
*/
int
TclChannelGetBlockingMode(
Tcl_Channel chan)
{
ChannelState *statePtr = ((Channel *) chan)->state;
/* State of real channel structure. */
return GotFlag(statePtr, CHANNEL_NONBLOCKING) ? 0 : 1;
}
/*
*----------------------------------------------------------------------
*
* Tcl_InputBlocked --
*
* Returns 1 if input is blocked on this channel, 0 otherwise.
*
|
︙ | | |
Changes to generic/tclIOCmd.c.
︙ | | |
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
|
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
|
+
+
+
+
-
+
+
+
+
+
|
}
}
TclNewObj(resultPtr);
TclChannelPreserve(chan);
charactersRead = Tcl_ReadChars(chan, resultPtr, toRead, 0);
if (charactersRead == TCL_IO_FAILURE) {
Tcl_Obj *returnOptsPtr = NULL;
if (TclChannelGetBlockingMode(chan)) {
returnOptsPtr = Tcl_NewDictObj();
Tcl_DictObjPut(NULL, returnOptsPtr, Tcl_NewStringObj("-data", -1),
Tcl_DecrRefCount(resultPtr);
resultPtr);
}
/*
* TIP #219.
* Capture error messages put by the driver into the bypass area and
* put them into the regular interpreter result. Fall back to the
* regular message if nothing was found in the bypass.
*/
if (!TclChanCaughtErrorBypass(interp, chan)) {
Tcl_SetObjResult(interp, Tcl_ObjPrintf(
"error reading \"%s\": %s",
TclGetString(chanObjPtr), Tcl_PosixError(interp)));
}
TclChannelRelease(chan);
if (returnOptsPtr) {
Tcl_SetReturnOptions(interp, returnOptsPtr);
}
return TCL_ERROR;
}
/*
* If requested, remove the last newline in the channel if at EOF.
*/
|
︙ | | |
Changes to generic/tclInt.h.
︙ | | |
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
|
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
|
+
|
MODULE_SCOPE double TclBignumToDouble(const void *bignum);
MODULE_SCOPE int TclByteArrayMatch(const unsigned char *string,
Tcl_Size strLen, const unsigned char *pattern,
Tcl_Size ptnLen, int flags);
MODULE_SCOPE double TclCeil(const void *a);
MODULE_SCOPE void TclChannelPreserve(Tcl_Channel chan);
MODULE_SCOPE void TclChannelRelease(Tcl_Channel chan);
MODULE_SCOPE int TclChannelGetBlockingMode(Tcl_Channel chan);
MODULE_SCOPE int TclCheckArrayTraces(Tcl_Interp *interp, Var *varPtr,
Var *arrayPtr, Tcl_Obj *name, int index);
MODULE_SCOPE int TclCheckBadOctal(Tcl_Interp *interp,
const char *value);
MODULE_SCOPE int TclCheckEmptyString(Tcl_Obj *objPtr);
MODULE_SCOPE int TclChanCaughtErrorBypass(Tcl_Interp *interp,
Tcl_Channel chan);
|
︙ | | |
Changes to tests/io.test.
︙ | | |
9298
9299
9300
9301
9302
9303
9304
9305
9306
9307
9308
9309
9310
9311
9312
9313
9314
9315
9316
9317
9318
9319
9320
9321
9322
9323
9324
9325
9326
9327
9328
9329
9330
9331
9332
9333
9334
9335
9336
9337
9338
9339
9340
9341
9342
9343
9344
9345
9346
9347
9348
9349
9350
9351
9352
9353
9354
9355
9356
9357
9358
9359
9360
9361
9362
9363
9364
9365
9366
9367
9368
9369
9370
9371
9372
9373
9374
9375
9376
9377
9378
9379
9380
9381
9382
9383
9384
|
9298
9299
9300
9301
9302
9303
9304
9305
9306
9307
9308
9309
9310
9311
9312
9313
9314
9315
9316
9317
9318
9319
9320
9321
9322
9323
9324
9325
9326
9327
9328
9329
9330
9331
9332
9333
9334
9335
9336
9337
9338
9339
9340
9341
9342
9343
9344
9345
9346
9347
9348
9349
9350
9351
9352
9353
9354
9355
9356
9357
9358
9359
9360
9361
9362
9363
9364
9365
9366
9367
9368
9369
9370
9371
9372
9373
9374
9375
9376
9377
9378
9379
9380
9381
9382
9383
9384
9385
|
-
+
-
+
-
+
-
+
+
-
+
-
+
-
-
-
+
+
+
-
+
-
+
-
+
|
# \x81 is invalid in utf-8
puts -nonewline $f A\x81
flush $f
seek $f 0
fconfigure $f -encoding utf-8 -buffering none -eofchar {} -translation lf \
-profile strict
} -body {
list [catch {read $f} msg] $msg
list [catch {read $f} msg data] $msg [dict get $data -data]
} -cleanup {
close $f
removeFile io-75.7
unset msg f fn
unset msg data f fn
} -match glob -result {1 {error reading "file*":\
invalid or incomplete multibyte or wide character}}
invalid or incomplete multibyte or wide character} A}
test io-75.8 {invalid utf-8 encoding eof first handling (-profile strict)} -setup {
set fn [makeFile {} io-75.8]
set f [open $fn w+]
fconfigure $f -encoding binary
# \x81 is invalid in utf-8, but since \x1A comes first, -eofchar takes
# precedence.
puts -nonewline $f A\x1A\x81
flush $f
seek $f 0
fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A \
-translation lf -profile strict
} -body {
set d [read $f]
binary scan $d H* hd
lappend hd [eof $f]
lappend hd [read $f]
close $f
set hd
} -cleanup {
close $f
removeFile io-75.8
unset f d hd
} -result {41 1 {}}
test io-75.8.eoflater {invalid utf-8 encoding eof after handling (-profile strict)} -setup {
set fn [makeFile {} io-75.8]
set f [open $fn w+]
# This also configures the channel encoding profile as strict.
fconfigure $f -encoding binary
# \x81 is invalid in utf-8. -eofchar is not detected, because it comes later.
puts -nonewline $f A\x81\x81\x1A
flush $f
seek $f 0
fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A \
-translation lf -profile strict
} -body {
set res [list [catch {read $f} msg] [eof $f]]
set res [list [catch {read $f} msg data] [eof $f] [dict get $data -data]]
chan configure $f -encoding iso8859-1
lappend res [read $f 1]
chan configure $f -encoding utf-8
lappend res [catch {read $f 1} msg] $msg
lappend res [catch {read $f 1} msg data] $msg [dict get $data -data]
} -cleanup {
close $f
removeFile io-75.8
unset res msg fn f
} -match glob -result "1 0 \x81 1 {error reading \"*\":\
invalid or incomplete multibyte or wide character}"
unset res msg data fn f
} -match glob -result "1 0 A \x81 1 {error reading \"*\":\
invalid or incomplete multibyte or wide character} {}"
test io-strict-multibyte-eof {
incomplete utf-8 sequence immediately prior to eof character
See issue 25cdcb7e8fb381fb
} -setup {
set chan [file tempfile];
fconfigure $chan -encoding binary
puts -nonewline $chan \x81\x1A
flush $chan
seek $chan 0
chan configure $chan -encoding utf-8 -profile strict
} -body {
list [catch {read $chan 1} msg] $msg
list [catch {read $chan 1} msg data] $msg [dict get $data -data]
} -cleanup {
close $chan
unset msg chan
unset msg chan data
} -match glob -result {1 {error reading "*":\
invalid or incomplete multibyte or wide character}}
invalid or incomplete multibyte or wide character} {}}
test io-75.9 {unrepresentable character write throws error in strict profile} -setup {
set fn [makeFile {} io-75.9]
set f [open $fn w+]
fconfigure $f -encoding iso8859-1 -profile strict
} -body {
catch {puts -nonewline $f "A\u2022"} msg
|
︙ | | |
9427
9428
9429
9430
9431
9432
9433
9434
9435
9436
9437
9438
9439
9440
9441
9442
9443
9444
9445
9446
9447
|
9428
9429
9430
9431
9432
9433
9434
9435
9436
9437
9438
9439
9440
9441
9442
9443
9444
9445
9446
9447
9448
|
-
+
-
+
-
+
|
flush $f
seek $f 0
fconfigure $f -encoding shiftjis -blocking 0 -eofchar {} -translation lf \
-profile strict
} -body {
set d [read $f]
binary scan $d H* hd
lappend hd [catch {set d [read $f]} msg] $msg
lappend hd [catch {set d [read $f]} msg data] $msg [dict exists $data -data]
} -cleanup {
close $f
removeFile io-75.11
unset d hd msg f
unset d hd msg data f
} -match glob -result {41 1 {error reading "file*":\
invalid or incomplete multibyte or wide character}}
invalid or incomplete multibyte or wide character} 0}
test io-75.12 {invalid utf-8 encoding read is ignored} -setup {
set fn [makeFile {} io-75.12]
set f [open $fn w+]
fconfigure $f -encoding binary
puts -nonewline $f A\x81
flush $f
|
︙ | | |
9468
9469
9470
9471
9472
9473
9474
9475
9476
9477
9478
9479
9480
9481
9482
9483
9484
9485
9486
9487
9488
9489
9490
9491
9492
9493
9494
9495
9496
9497
9498
9499
9500
9501
9502
9503
9504
9505
9506
9507
9508
9509
9510
9511
9512
9513
9514
9515
9516
9517
9518
9519
9520
9521
9522
9523
9524
9525
9526
9527
9528
9529
9530
9531
9532
9533
9534
9535
9536
9537
9538
9539
9540
9541
9542
9543
9544
9545
9546
9547
9548
9549
|
9469
9470
9471
9472
9473
9474
9475
9476
9477
9478
9479
9480
9481
9482
9483
9484
9485
9486
9487
9488
9489
9490
9491
9492
9493
9494
9495
9496
9497
9498
9499
9500
9501
9502
9503
9504
9505
9506
9507
9508
9509
9510
9511
9512
9513
9514
9515
9516
9517
9518
9519
9520
9521
9522
9523
9524
9525
9526
9527
9528
9529
9530
9531
9532
9533
9534
9535
9536
9537
9538
9539
9540
9541
9542
9543
9544
9545
9546
9547
9548
9549
9550
|
-
+
-
+
-
+
-
+
-
+
-
+
-
-
+
+
-
+
|
flush $f
seek $f 0
fconfigure $f -encoding utf-8 -blocking 0 -eofchar {} -translation lf \
-profile strict
} -body {
set d [read $f]
binary scan $d H* hd
lappend hd [catch {read $f} msg] $msg
lappend hd [catch {read $f} msg data] $msg [dict exists $data -data]
} -cleanup {
close $f
removeFile io-75.13
unset d hd msg f fn
unset d hd msg data f fn
} -match glob -result {41 1 {error reading "file*":\
invalid or incomplete multibyte or wide character}}
invalid or incomplete multibyte or wide character} 0}
test io-75.14 {
[gets] succesfully returns lines prior to error
invalid utf-8 encoding [gets] continues in non-strict mode after error
} -setup {
set chan [file tempfile]
fconfigure $chan -encoding binary
# \xC0\n is an invalid utf-8 sequence
puts -nonewline $chan a\nb\nc\xC0\nd\n
flush $chan
seek $chan 0
fconfigure $chan -encoding utf-8 -buffering none -eofchar {} \
-translation auto -profile strict
} -body {
set res [gets $chan]
lappend res [gets $chan]
lappend res [catch {gets $chan} msg] $msg
lappend res [catch {gets $chan} msg data] $msg [dict exists $data -data]
chan configure $chan -profile tcl8
lappend res [gets $chan]
lappend res [gets $chan]
return $res
} -cleanup {
close $chan
unset chan res msg
unset chan res msg data
} -match glob -result {a b 1 {error reading "*":\
invalid or incomplete multibyte or wide character} cÀ d}
invalid or incomplete multibyte or wide character} 0 cÀ d}
test io-75.15 {
invalid utf-8 encoding strict
gets does not hang
gets succeeds for the first two lines
} -setup {
set res {}
set chan [file tempfile]
fconfigure $chan -encoding binary
# \xC0\x40 is an invalid utf-8 sequence
puts $chan hello\nAB\nCD\xC0\x40EF\nGHI
seek $chan 0
} -body {
#Now try to read it with [gets]
fconfigure $chan -encoding utf-8 -profile strict
lappend res [gets $chan]
lappend res [gets $chan]
lappend res [catch {gets $chan} msg] $msg
lappend res [catch {gets $chan} msg] $msg
lappend res [catch {gets $chan} msg data] $msg [dict exists $data -data]
lappend res [catch {gets $chan} msg data] $msg [dict exists $data -data]
chan configure $chan -translation binary
set data [read $chan 4]
foreach char [split $data {}] {
scan $char %c ord
lappend res [format %x $ord]
}
fconfigure $chan -encoding utf-8 -profile strict -translation auto
lappend res [gets $chan]
lappend res [gets $chan]
return $res
} -cleanup {
close $chan
unset chan res msg data
} -match glob -result {hello AB 1 {error reading "*": invalid or incomplete multibyte or wide character}\
1 {error reading "*": invalid or incomplete multibyte or wide character} 43 44 c0 40 EF GHI}
0 1 {error reading "*": invalid or incomplete multibyte or wide character} 0 43 44 c0 40 EF GHI}
# ### ### ### ######### ######### #########
test io-76.0 {channel modes} -setup {
set datafile [makeFile {some characters} dummy]
|
︙ | | |