Tcl Source Code

Check-in [d481d08ed9]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix for [a7a89d422a4f5dd3], Under strict encoding, [gets] returns an error instead of returning the second line. Also, ensure that position in file does not change if [gets] return an error.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | main
Files: files | file ages | folders
SHA3-256: d481d08ed90385db4a1d1e24b42a62930a5cc58f413e06adf3c7f97d01df131a
User & Date: pooryorick 2023-04-15 11:54:27
References
2023-04-15
11:57 Pending ticket [a7a89d422a]: Under strict encoding, gets returns an error instead of returning the second line plus 5 other changes artifact: 5b53efe494 user: pooryorick
Context
2023-04-15
13:25
Extend test io-75.15 to read invalid bytes in binary mode and then use [gets] to retrieve last two ... check-in: 433637e689 user: pooryorick tags: trunk, main
12:15
Merge trunk. check-in: a1bccf546d user: pooryorick tags: tip-653
11:54
Fix for [a7a89d422a4f5dd3], Under strict encoding, [gets] returns an error instead of returning the ... check-in: d481d08ed9 user: pooryorick tags: trunk, main
2023-04-14
21:42
Merge 8.7 check-in: 5b5f4f1482 user: jan.nijtmans tags: trunk, main
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to generic/tclIO.c.

4612
4613
4614
4615
4616
4617
4618

4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
    Tcl_Size oldLength;
    Tcl_Encoding encoding;
    char *dst, *dstEnd, *eol, *eof;
    Tcl_EncodingState oldState;

    if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR)) {
	UpdateInterest(chanPtr);

	Tcl_SetErrno(EILSEQ);
	return TCL_INDEX_NONE;
    }

    if (CheckChannelErrors(statePtr, TCL_READABLE) != 0) {
	return TCL_INDEX_NONE;
    }

    /*
     * If we're sitting ready to read the eofchar, there's no need to
     * do it.
     */

    if (GotFlag(statePtr, CHANNEL_STICKY_EOF)) {
	SetFlag(statePtr, CHANNEL_EOF);
	assert(statePtr->inputEncodingFlags & TCL_ENCODING_END);
	assert(!GotFlag(statePtr, CHANNEL_BLOCKED|INPUT_SAW_CR));

	/* TODO: Do we need this? */
	UpdateInterest(chanPtr);
	return TCL_INDEX_NONE;
    }

    /*







>
















|







4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
    Tcl_Size oldLength;
    Tcl_Encoding encoding;
    char *dst, *dstEnd, *eol, *eof;
    Tcl_EncodingState oldState;

    if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR)) {
	UpdateInterest(chanPtr);
	ResetFlag(statePtr, CHANNEL_ENCODING_ERROR);
	Tcl_SetErrno(EILSEQ);
	return TCL_INDEX_NONE;
    }

    if (CheckChannelErrors(statePtr, TCL_READABLE) != 0) {
	return TCL_INDEX_NONE;
    }

    /*
     * If we're sitting ready to read the eofchar, there's no need to
     * do it.
     */

    if (GotFlag(statePtr, CHANNEL_STICKY_EOF)) {
	SetFlag(statePtr, CHANNEL_EOF);
	assert(statePtr->inputEncodingFlags & TCL_ENCODING_END);
assert(!GotFlag(statePtr, CHANNEL_BLOCKED|INPUT_SAW_CR));

	/* TODO: Do we need this? */
	UpdateInterest(chanPtr);
	return TCL_INDEX_NONE;
    }

    /*
4994
4995
4996
4997
4998
4999
5000

5001
5002
5003

5004
5005
5006
5007
5008
5009
5010
	TclChannelRelease((Tcl_Channel)chanPtr);
	chanPtr = statePtr->topChanPtr;
	TclChannelPreserve((Tcl_Channel)chanPtr);
    }
    UpdateInterest(chanPtr);
    TclChannelRelease((Tcl_Channel)chanPtr);
    if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR) && gs.bytesWrote == 0) {

	Tcl_SetErrno(EILSEQ);
	copiedTotal = -1;
    }

    return copiedTotal;
}

/*
 *---------------------------------------------------------------------------
 *
 * TclGetsObjBinary --







>



>







4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
	TclChannelRelease((Tcl_Channel)chanPtr);
	chanPtr = statePtr->topChanPtr;
	TclChannelPreserve((Tcl_Channel)chanPtr);
    }
    UpdateInterest(chanPtr);
    TclChannelRelease((Tcl_Channel)chanPtr);
    if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR) && gs.bytesWrote == 0) {
	bufPtr->nextRemoved = oldRemoved;
	Tcl_SetErrno(EILSEQ);
	copiedTotal = -1;
    }
    ResetFlag(statePtr, CHANNEL_ENCODING_ERROR);
    return copiedTotal;
}

/*
 *---------------------------------------------------------------------------
 *
 * TclGetsObjBinary --
5458
5459
5460
5461
5462
5463
5464


5465
5466
5467
5468
5469
5470
5471
    result = Tcl_ExternalToUtf(NULL, gsPtr->encoding, raw, rawLen,
	    statePtr->inputEncodingFlags | TCL_ENCODING_NO_TERMINATE,
	    &statePtr->inputEncodingState, dst, spaceLeft, &gsPtr->rawRead,
	    &gsPtr->bytesWrote, &gsPtr->charsWrote);

	if (result == TCL_CONVERT_UNKNOWN || result == TCL_CONVERT_SYNTAX) {
	    SetFlag(statePtr, CHANNEL_ENCODING_ERROR);


	    result = TCL_OK;
	}

    /*
     * Make sure that if we go through 'gets', that we reset the
     * TCL_ENCODING_START flag still. [Bug #523988]
     */







>
>







5461
5462
5463
5464
5465
5466
5467
5468
5469
5470
5471
5472
5473
5474
5475
5476
    result = Tcl_ExternalToUtf(NULL, gsPtr->encoding, raw, rawLen,
	    statePtr->inputEncodingFlags | TCL_ENCODING_NO_TERMINATE,
	    &statePtr->inputEncodingState, dst, spaceLeft, &gsPtr->rawRead,
	    &gsPtr->bytesWrote, &gsPtr->charsWrote);

	if (result == TCL_CONVERT_UNKNOWN || result == TCL_CONVERT_SYNTAX) {
	    SetFlag(statePtr, CHANNEL_ENCODING_ERROR);
	    ResetFlag(statePtr, CHANNEL_STICKY_EOF);
	    ResetFlag(statePtr, CHANNEL_EOF);
	    result = TCL_OK;
	}

    /*
     * Make sure that if we go through 'gets', that we reset the
     * TCL_ENCODING_START flag still. [Bug #523988]
     */
7585
7586
7587
7588
7589
7590
7591
7592
7593
7594
7595
7596
7597
7598
7599

    for (bytesBuffered = 0, bufPtr = statePtr->inQueueHead; bufPtr != NULL;
	    bufPtr = bufPtr->nextPtr) {
	bytesBuffered += BytesLeft(bufPtr);
    }

    /*
     * Don't forget the bytes in the topmost pushback area.
     */

    for (bufPtr = statePtr->topChanPtr->inQueueHead; bufPtr != NULL;
	    bufPtr = bufPtr->nextPtr) {
	bytesBuffered += BytesLeft(bufPtr);
    }








|







7590
7591
7592
7593
7594
7595
7596
7597
7598
7599
7600
7601
7602
7603
7604

    for (bytesBuffered = 0, bufPtr = statePtr->inQueueHead; bufPtr != NULL;
	    bufPtr = bufPtr->nextPtr) {
	bytesBuffered += BytesLeft(bufPtr);
    }

    /*
     * Remember the bytes in the topmost pushback area.
     */

    for (bufPtr = statePtr->topChanPtr->inQueueHead; bufPtr != NULL;
	    bufPtr = bufPtr->nextPtr) {
	bytesBuffered += BytesLeft(bufPtr);
    }

Changes to tests/io.test.

9262
9263
9264
9265
9266
9267
9268
9269
9270
9271
9272
9273
9274
9275

9276
9277
9278
9279
9280
9281
9282
    removeFile io-75.5
} -result 4181

test io-75.6 {invalid utf-8 encoding, gets is not ignored (-profile strict)} -setup {
    set fn [makeFile {} io-75.6]
    set f [open $fn w+]
    fconfigure $f -encoding binary
    # \x81 is invalid in utf-8
    puts -nonewline $f A\x81
    flush $f
    seek $f 0
    fconfigure $f -encoding utf-8 -buffering none -eofchar {} \
	-translation lf -profile strict
} -body {

    gets $f
} -cleanup {
    close $f
    removeFile io-75.6
} -match glob -returnCodes 1 -result {error reading "file*":\
	invalid or incomplete multibyte or wide character}








|






>







9262
9263
9264
9265
9266
9267
9268
9269
9270
9271
9272
9273
9274
9275
9276
9277
9278
9279
9280
9281
9282
9283
    removeFile io-75.5
} -result 4181

test io-75.6 {invalid utf-8 encoding, gets is not ignored (-profile strict)} -setup {
    set fn [makeFile {} io-75.6]
    set f [open $fn w+]
    fconfigure $f -encoding binary
    # \x81 is an incomplete byte sequence in utf-8
    puts -nonewline $f A\x81
    flush $f
    seek $f 0
    fconfigure $f -encoding utf-8 -buffering none -eofchar {} \
	-translation lf -profile strict
} -body {
after 1
    gets $f
} -cleanup {
    close $f
    removeFile io-75.6
} -match glob -returnCodes 1 -result {error reading "file*":\
	invalid or incomplete multibyte or wide character}

9448
9449
9450
9451
9452
9453
9454
9455
9456
9457
9458
9459
9460

9461
9462
9463
9464
9465
9466
9467
9468
9469
9470
9471
9472
9473
9474

9475
9476
9477
9478
9479
9480
9481
9482
9483
9484

9485
9486
9487
9488
9489
9490
9491
9492
9493
9494
9495
9496
9497
9498
9499
9500
9501
9502
9503
9504
9505
	[gets] succesfully returns lines prior to error

	invalid utf-8 encoding [gets] continues in non-strict mode after error
} -setup {
    set chan  [file tempfile]
    fconfigure $f -encoding binary
    # \xc0\n is an invalid utf-8 sequence
    puts -nonewline $f a\nb\xc0\nc\n
    flush $f
    seek $f 0
    fconfigure $f -encoding utf-8 -buffering none -eofchar {} \
	-translation lf -profile strict
} -body {

    lappend res [gets $f]
    set status [catch {gets $f} cres copts]
    lappend res $status $cres
    chan configure $f -profile tcl8 
    lappend res [gets $f]
    lappend res [gets $f]
    close $f
    return $res
} -match glob -result {a 1 {error reading "*":\
    invalid or incomplete multibyte or wide character} bÀ c}

test io-75.15 {
    invalid utf-8 encoding strict gets does not hang
    after error reconfigure and read

} -setup {
    set res {}
    set chan [file tempfile]
    fconfigure $chan -encoding binary
    # \xc0\x40 is an invalid utf-8 sequence
    puts $chan hello\nAB\xc0\x40CD\nEFG
	seek $chan 0
} -body {
    #Now try to read it with [gets]
    fconfigure $chan -encoding utf-8 -profile strict

    lappend res [gets $chan]
    set status [catch {gets $chan} cres copts]
    lappend res $status $cres
    set status [catch {gets $chan} cres copts]
    lappend res $status $cres
    chan configure $chan -encoding binary
    foreach char [split [read $chan 6] {}] {
	lappend res [format %x [scan $char %c]]
    }
    return $res
} -cleanup {
    close $chan
} -match glob -result {hello 1 {error reading "*": invalid or incomplete multibyte or wide character}\
    1 {error reading "*": invalid or incomplete multibyte or wide character} 41 42 c0 40 43 44}

# ### ### ### ######### ######### #########



test io-76.0 {channel modes} -setup {
    set datafile [makeFile {some characters} dummy]







|





>








|
|


|
|
>





|




>





<
<
<
<



|
|







9449
9450
9451
9452
9453
9454
9455
9456
9457
9458
9459
9460
9461
9462
9463
9464
9465
9466
9467
9468
9469
9470
9471
9472
9473
9474
9475
9476
9477
9478
9479
9480
9481
9482
9483
9484
9485
9486
9487
9488
9489
9490
9491
9492
9493




9494
9495
9496
9497
9498
9499
9500
9501
9502
9503
9504
9505
	[gets] succesfully returns lines prior to error

	invalid utf-8 encoding [gets] continues in non-strict mode after error
} -setup {
    set chan  [file tempfile]
    fconfigure $f -encoding binary
    # \xc0\n is an invalid utf-8 sequence
    puts -nonewline $f a\nb\nc\xc0\nd\n
    flush $f
    seek $f 0
    fconfigure $f -encoding utf-8 -buffering none -eofchar {} \
	-translation lf -profile strict
} -body {
    lappend res [gets $f]
    lappend res [gets $f]
    set status [catch {gets $f} cres copts]
    lappend res $status $cres
    chan configure $f -profile tcl8 
    lappend res [gets $f]
    lappend res [gets $f]
    close $f
    return $res
} -match glob -result {a b 1 {error reading "*":\
    invalid or incomplete multibyte or wide character} cÀ d}

test io-75.15 {
    invalid utf-8 encoding strict
    gets does not hang
    gets succeeds for the first two lines
} -setup {
    set res {}
    set chan [file tempfile]
    fconfigure $chan -encoding binary
    # \xc0\x40 is an invalid utf-8 sequence
    puts $chan hello\nAB\nCD\xc0\x40EF\nGHI
	seek $chan 0
} -body {
    #Now try to read it with [gets]
    fconfigure $chan -encoding utf-8 -profile strict
    lappend res [gets $chan]
    lappend res [gets $chan]
    set status [catch {gets $chan} cres copts]
    lappend res $status $cres
    set status [catch {gets $chan} cres copts]
    lappend res $status $cres




    return $res
} -cleanup {
    close $chan
} -match glob -result {hello AB 1 {error reading "*": invalid or incomplete multibyte or wide character}\
    1 {error reading "*": invalid or incomplete multibyte or wide character}}

# ### ### ### ######### ######### #########



test io-76.0 {channel modes} -setup {
    set datafile [makeFile {some characters} dummy]