Tcl Source Code

Check-in [a02c5b9f8b]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix for [25cdcb7e8fb381fb]: Incomplete utf-8 sequence followed by eofchar results in failed assertion.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | main
Files: files | file ages | folders
SHA3-256: a02c5b9f8b6877aaca48efe8dbfab0ab802e318c14b185d0c17ac32598b046ec
User & Date: pooryorick 2023-04-18 22:00:10
References
2023-04-18
22:01 Pending ticket [25cdcb7e8f]: incomplete utf-8 sequence followed by eofchar results in failed assertion plus 4 other changes artifact: 4bbe3b2bfc user: pooryorick
Context
2023-04-20
18:13
sync with trunk. Fix lseq size bugs. check-in: a48444ee91 user: griffin tags: bug-fa00fbbbabe
2023-04-18
22:35
In DoReadChars() reset CHANNEL_ENCODING_ERROR instead of CHANNEL_BLOCKED. check-in: 883464ea32 user: pooryorick tags: trunk, main
22:00
Fix for [25cdcb7e8fb381fb]: Incomplete utf-8 sequence followed by eofchar results in failed asserti... check-in: a02c5b9f8b user: pooryorick tags: trunk, main
21:16
Eliminate unnecessary clearance of CHANNEL_STICKY_EOF flag. check-in: af256f4469 user: pooryorick tags: trunk, main
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to generic/tclIO.c.

6306
6307
6308
6309
6310
6311
6312
6313





6314
6315
6316
6317
6318
6319
6320
	assert(bufPtr->nextPtr == NULL || BytesLeft(bufPtr->nextPtr) == 0
		|| (statePtr->inputEncodingFlags & TCL_ENCODING_END) == 0);

	code = Tcl_ExternalToUtf(NULL, encoding, src, srcLen,
		flags, &statePtr->inputEncodingState,
		dst, dstLimit, &srcRead, &dstDecoded, &numChars);

	if (code == TCL_CONVERT_UNKNOWN || code == TCL_CONVERT_SYNTAX) {





	    SetFlag(statePtr, CHANNEL_ENCODING_ERROR);
	    code = TCL_OK;
	}

	/*
	 * Perform the translation transformation in place.  Read no more than
	 * the dstDecoded bytes the encoding transformation actually produced.







|
>
>
>
>
>







6306
6307
6308
6309
6310
6311
6312
6313
6314
6315
6316
6317
6318
6319
6320
6321
6322
6323
6324
6325
	assert(bufPtr->nextPtr == NULL || BytesLeft(bufPtr->nextPtr) == 0
		|| (statePtr->inputEncodingFlags & TCL_ENCODING_END) == 0);

	code = Tcl_ExternalToUtf(NULL, encoding, src, srcLen,
		flags, &statePtr->inputEncodingState,
		dst, dstLimit, &srcRead, &dstDecoded, &numChars);

	if (code == TCL_CONVERT_UNKNOWN || code == TCL_CONVERT_SYNTAX
	    || (
		code == TCL_CONVERT_MULTIBYTE
		&& GotFlag(statePtr, CHANNEL_EOF
	    ))
	) {
	    SetFlag(statePtr, CHANNEL_ENCODING_ERROR);
	    code = TCL_OK;
	}

	/*
	 * Perform the translation transformation in place.  Read no more than
	 * the dstDecoded bytes the encoding transformation actually produced.

Changes to tests/io.test.

9322
9323
9324
9325
9326
9327
9328

9329
9330
9331
9332
9333
9334
9335
9336
9337
9338
9339
9340
9341
9342



9343
9344
9345
9346
9347























9348
9349
9350
9351
9352
9353
9354
    removeFile io-75.8
} -result {41 1 {}}

test io-75.8.eoflater {invalid utf-8 encoding eof handling (-profile strict)} -setup {
	set res {}
    set fn [makeFile {} io-75.8]
    set f [open $fn w+]

    fconfigure $f -encoding binary
    # \x81 is invalid in utf-8. -eofchar is not detected, because it comes later.
    puts -nonewline $f A\x81\x1A
    flush $f
    seek $f 0
    fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A \
	-translation lf -profile strict
} -body {
    after 1
    set status [catch {read $f} cres copts]
    lappend res $status
    lappend res [eof $f]
    chan configure $f -encoding iso8859-1
    lappend res [read $f]



    close $f
    set res
} -cleanup {
    removeFile io-75.8
} -result "1 0 \x81"
























test io-75.9 {unrepresentable character write passes and is replaced by ?} -setup {
    set fn [makeFile {} io-75.9]
    set f [open $fn w+]
    fconfigure $f -encoding iso8859-1 -profile strict
} -body {
    catch {puts -nonewline $f "A\u2022"} msg







>


|





<




|
>
>
>




|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







9322
9323
9324
9325
9326
9327
9328
9329
9330
9331
9332
9333
9334
9335
9336
9337

9338
9339
9340
9341
9342
9343
9344
9345
9346
9347
9348
9349
9350
9351
9352
9353
9354
9355
9356
9357
9358
9359
9360
9361
9362
9363
9364
9365
9366
9367
9368
9369
9370
9371
9372
9373
9374
9375
9376
9377
9378
9379
9380
    removeFile io-75.8
} -result {41 1 {}}

test io-75.8.eoflater {invalid utf-8 encoding eof handling (-profile strict)} -setup {
	set res {}
    set fn [makeFile {} io-75.8]
    set f [open $fn w+]
    # This also configures the channel encoding profile as strict.
    fconfigure $f -encoding binary
    # \x81 is invalid in utf-8. -eofchar is not detected, because it comes later.
    puts -nonewline $f A\x81\x81\x1A
    flush $f
    seek $f 0
    fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A \
	-translation lf -profile strict
} -body {

    set status [catch {read $f} cres copts]
    lappend res $status
    lappend res [eof $f]
    chan configure $f -encoding iso8859-1
    lappend res [read $f 1]
    chan configure $f -encoding utf-8
    catch {read $f 1} cres
    lappend res $cres
    close $f
    set res
} -cleanup {
    removeFile io-75.8
} -match glob -result "1 0 \x81 {error reading \"*\":\
    invalid or incomplete multibyte or wide character}"


test io-strict-multibyte-eof {
    incomplete utf-8 sequence immediately prior to eof character

    See issue 25cdcb7e8fb381fb
} -setup {
    set res {}
    set chan [file tempfile];
    fconfigure $chan -encoding binary
    puts -nonewline $chan \x81\x1A
    flush $chan
    seek $chan 0
    chan configure $chan -encoding utf-8 -profile strict
} -body {
    set status [catch {read $chan 1} cres]
    lappend res $status $cres
} -cleanup {
    close $chan
    unset res
} -match glob -result {1 {error reading "*":\
    invalid or incomplete multibyte or wide character}}

test io-75.9 {unrepresentable character write passes and is replaced by ?} -setup {
    set fn [makeFile {} io-75.9]
    set f [open $fn w+]
    fconfigure $f -encoding iso8859-1 -profile strict
} -body {
    catch {puts -nonewline $f "A\u2022"} msg