Tcl Source Code

Check-in [18437bdc82]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix [1bedc53c8c]: synchronous [read] with -strictencoding does not produce an error on invalid input
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core-8-branch
Files: files | file ages | folders
SHA3-256: 18437bdc82d0152c8257b240d006967b7d9d87798fbbc9f9c250bd401395fa27
User & Date: jan.nijtmans 2023-03-21 16:28:57
References
2023-03-24
21:59 Closed ticket [6978c01b65]: Channel encoding difference 8.6 <-> 9.0 plus 7 other changes artifact: 3b7246038f user: jan.nijtmans
2023-03-21
17:51 Closed ticket [1bedc53c8c]: synchronous read with -strictencoding does not produce an error on invalid input plus 7 other changes artifact: 3a53c0d261 user: jan.nijtmans
Context
2023-03-21
23:49
Remove unneeded Tcl_IncrRefCount and TclDecrRefCount. TclPtrSetVarIdx takes ownership of newValuePtr... check-in: c0778ccb77 user: pooryorick tags: core-8-branch
16:54
Merge 8.7 check-in: 78beedf7e2 user: jan.nijtmans tags: trunk, main
16:28
Fix [1bedc53c8c]: synchronous [read] with -strictencoding does not produce an error on invalid input check-in: 18437bdc82 user: jan.nijtmans tags: core-8-branch
16:26
Fix indenting. More use of TCL_INDEX_NONE check-in: 4e6e2301fa user: jan.nijtmans tags: core-8-branch
11:11
Some test-cases, which test for partial read without throwing EILSEQ immediately, only work with ""-... Closed-Leaf check-in: ac6ce395d7 user: jan.nijtmans tags: bug-1bedc53c8c
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to generic/tclIO.c.

6074
6075
6076
6077
6078
6079
6080

















6081
6082
6083
6084
6085
6086
6087

		RecycleBuffer(statePtr, bufPtr, 0);
		statePtr->inQueueHead = nextPtr;
		if (nextPtr == NULL) {
		    statePtr->inQueueTail = NULL;
		}
	    }

















	}

	if (copiedNow < 0) {
	    if (GotFlag(statePtr, CHANNEL_EOF)) {
		break;
	    }
	    if (GotFlag(statePtr, CHANNEL_NONBLOCKING|CHANNEL_BLOCKED)







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







6074
6075
6076
6077
6078
6079
6080
6081
6082
6083
6084
6085
6086
6087
6088
6089
6090
6091
6092
6093
6094
6095
6096
6097
6098
6099
6100
6101
6102
6103
6104

		RecycleBuffer(statePtr, bufPtr, 0);
		statePtr->inQueueHead = nextPtr;
		if (nextPtr == NULL) {
		    statePtr->inQueueTail = NULL;
		}
	    }

	    /*
	     * If CHANNEL_ENCODING_ERROR and CHANNEL_STICKY_EOF are both set,
	     * then CHANNEL_ENCODING_ERROR was caused by data that occurred
	     * after the EOF character was encountered, so it doesn't count as
	     * a real error.
	     */

	    if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR)
		    && !GotFlag(statePtr, CHANNEL_STICKY_EOF)
		    && !GotFlag(statePtr, CHANNEL_NONBLOCKING)) {
		/* Channel is blocking.  Return an error so that callers
		 * like [read] can return an error.
		*/
		Tcl_SetErrno(EILSEQ);
		goto finish;
	    }
	}

	if (copiedNow < 0) {
	    if (GotFlag(statePtr, CHANNEL_EOF)) {
		break;
	    }
	    if (GotFlag(statePtr, CHANNEL_NONBLOCKING|CHANNEL_BLOCKED)
6102
6103
6104
6105
6106
6107
6108

6109
6110
6111
6112
6113
6114
6115
	    }
	} else {
	    copied += copiedNow;
	    toRead -= copiedNow;
	}
    }


    /*
     * Failure to fill a channel buffer may have left channel reporting a
     * "blocked" state, but so long as we fulfilled the request here, the
     * caller does not consider us blocked.
     */

    if (toRead == 0) {







>







6119
6120
6121
6122
6123
6124
6125
6126
6127
6128
6129
6130
6131
6132
6133
	    }
	} else {
	    copied += copiedNow;
	    toRead -= copiedNow;
	}
    }

finish:
    /*
     * Failure to fill a channel buffer may have left channel reporting a
     * "blocked" state, but so long as we fulfilled the request here, the
     * caller does not consider us blocked.
     */

    if (toRead == 0) {
6135
6136
6137
6138
6139
6140
6141





6142
6143
6144
6145
6146
6147
6148
    assert(!GotFlag(statePtr, CHANNEL_EOF)
	    || GotFlag(statePtr, CHANNEL_STICKY_EOF)
	    || GotFlag(statePtr, CHANNEL_ENCODING_ERROR)
	    || Tcl_InputBuffered((Tcl_Channel)chanPtr) == 0);
    assert(!(GotFlag(statePtr, CHANNEL_EOF|CHANNEL_BLOCKED)
            == (CHANNEL_EOF|CHANNEL_BLOCKED)));
    UpdateInterest(chanPtr);





    TclChannelRelease((Tcl_Channel)chanPtr);
    return copied;
}

/*
 *---------------------------------------------------------------------------
 *







>
>
>
>
>







6153
6154
6155
6156
6157
6158
6159
6160
6161
6162
6163
6164
6165
6166
6167
6168
6169
6170
6171
    assert(!GotFlag(statePtr, CHANNEL_EOF)
	    || GotFlag(statePtr, CHANNEL_STICKY_EOF)
	    || GotFlag(statePtr, CHANNEL_ENCODING_ERROR)
	    || Tcl_InputBuffered((Tcl_Channel)chanPtr) == 0);
    assert(!(GotFlag(statePtr, CHANNEL_EOF|CHANNEL_BLOCKED)
            == (CHANNEL_EOF|CHANNEL_BLOCKED)));
    UpdateInterest(chanPtr);
    if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR)
	    && (!copied || !GotFlag(statePtr, CHANNEL_NONBLOCKING))) {
	Tcl_SetErrno(EILSEQ);
	copied = -1;
    }
    TclChannelRelease((Tcl_Channel)chanPtr);
    return copied;
}

/*
 *---------------------------------------------------------------------------
 *
6765
6766
6767
6768
6769
6770
6771



6772
6773
6774
6775
6776
6777
6778
6779
6780
6781
6782
6783
    *srcLenPtr = srcLen;

    if (srcStart + srcLen == eof) {
	/*
	 * EOF character was seen in EOL translated range. Leave current file
	 * position pointing at the EOF character, but don't store the EOF
	 * character in the output string.



	 */

	SetFlag(statePtr, CHANNEL_EOF | CHANNEL_STICKY_EOF);
	statePtr->inputEncodingFlags |= TCL_ENCODING_END;
	ResetFlag(statePtr, CHANNEL_BLOCKED|INPUT_SAW_CR);
    }
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_Ungets --







>
>
>




|







6788
6789
6790
6791
6792
6793
6794
6795
6796
6797
6798
6799
6800
6801
6802
6803
6804
6805
6806
6807
6808
6809
    *srcLenPtr = srcLen;

    if (srcStart + srcLen == eof) {
	/*
	 * EOF character was seen in EOL translated range. Leave current file
	 * position pointing at the EOF character, but don't store the EOF
	 * character in the output string.
	 *
	 * If CHANNEL_ENCODING_ERROR is set, it can only be because of data
	 * encountered after the EOF character, so it is nonsense.  Unset it.
	 */

	SetFlag(statePtr, CHANNEL_EOF | CHANNEL_STICKY_EOF);
	statePtr->inputEncodingFlags |= TCL_ENCODING_END;
	ResetFlag(statePtr, CHANNEL_BLOCKED|INPUT_SAW_CR|CHANNEL_ENCODING_ERROR);
    }
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_Ungets --

Changes to tests/io.test.

9106
9107
9108
9109
9110
9111
9112
9113
9114
9115
9116

9117
9118
9119
9120
9121
9122
9123
    fconfigure $f -encoding binary
    puts -nonewline $f "A\xC0"
    flush $f
    seek $f 0
    fconfigure $f -encoding utf-8 -buffering none -profile tcl8
} -body {
    set d [read $f]
    close $f
    binary scan $d H* hd
    set hd
} -cleanup {

    removeFile io-75.3
} -result 41c0

# As utf-8 has a special treatment in multi-byte decoding, also test another
# one.
test io-75.4 {shiftjis encoding error read results in raw bytes (-profile tcl8)} -setup {
    set fn [makeFile {} io-75.4]







<



>







9106
9107
9108
9109
9110
9111
9112

9113
9114
9115
9116
9117
9118
9119
9120
9121
9122
9123
    fconfigure $f -encoding binary
    puts -nonewline $f "A\xC0"
    flush $f
    seek $f 0
    fconfigure $f -encoding utf-8 -buffering none -profile tcl8
} -body {
    set d [read $f]

    binary scan $d H* hd
    set hd
} -cleanup {
    close $f
    removeFile io-75.3
} -result 41c0

# As utf-8 has a special treatment in multi-byte decoding, also test another
# one.
test io-75.4 {shiftjis encoding error read results in raw bytes (-profile tcl8)} -setup {
    set fn [makeFile {} io-75.4]
9144
9145
9146
9147
9148
9149
9150
9151
9152
9153
9154

9155
9156
9157
9158
9159
9160
9161
9162
9163
9164
9165
9166
9167
9168
9169
9170
9171
9172
















9173
9174
9175
9176
9177
9178
9179
    fconfigure $f -encoding binary
    puts -nonewline $f A\x81
    flush $f
    seek $f 0
    fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -profile tcl8
} -body {
    set d [read $f]
    close $f
    binary scan $d H* hd
    set hd
} -cleanup {

    removeFile io-75.5
} -result 4181

test io-75.6 {invalid utf-8 encoding gets is not ignored (-profile strict)} -setup {
    set fn [makeFile {} io-75.6]
    set f [open $fn w+]
    fconfigure $f -encoding binary
    # \x81 is invalid in utf-8
    puts -nonewline $f A\x81
    flush $f
    seek $f 0
    fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -profile strict
} -body {
    gets $f
} -cleanup {
    close $f
    removeFile io-75.6
} -match glob -returnCodes 1 -result {error reading "*": illegal byte sequence}

















test io-75.8 {invalid utf-8 encoding eof handling (-profile strict)} -setup {
    set fn [makeFile {} io-75.8]
    set f [open $fn w+]
    fconfigure $f -encoding binary
    # \x81 is invalid in utf-8, but since \x1A comes first, -eofchar takes precedence.
    puts -nonewline $f A\x1A\x81







<



>


















>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







9144
9145
9146
9147
9148
9149
9150

9151
9152
9153
9154
9155
9156
9157
9158
9159
9160
9161
9162
9163
9164
9165
9166
9167
9168
9169
9170
9171
9172
9173
9174
9175
9176
9177
9178
9179
9180
9181
9182
9183
9184
9185
9186
9187
9188
9189
9190
9191
9192
9193
9194
9195
    fconfigure $f -encoding binary
    puts -nonewline $f A\x81
    flush $f
    seek $f 0
    fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -profile tcl8
} -body {
    set d [read $f]

    binary scan $d H* hd
    set hd
} -cleanup {
    close $f
    removeFile io-75.5
} -result 4181

test io-75.6 {invalid utf-8 encoding gets is not ignored (-profile strict)} -setup {
    set fn [makeFile {} io-75.6]
    set f [open $fn w+]
    fconfigure $f -encoding binary
    # \x81 is invalid in utf-8
    puts -nonewline $f A\x81
    flush $f
    seek $f 0
    fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -profile strict
} -body {
    gets $f
} -cleanup {
    close $f
    removeFile io-75.6
} -match glob -returnCodes 1 -result {error reading "*": illegal byte sequence}

test io-75.7 {invalid utf-8 encoding gets is not ignored (-profile strict)} -setup {
    set fn [makeFile {} io-75.7]
    set f [open $fn w+]
    fconfigure $f -encoding binary
    # \x81 is invalid in utf-8
    puts -nonewline $f A\x81
    flush $f
    seek $f 0
    fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -profile strict
} -body {
    read $f
} -cleanup {
    close $f
    removeFile io-75.7
} -match glob -returnCodes 1 -result {error reading "*": illegal byte sequence}

test io-75.8 {invalid utf-8 encoding eof handling (-profile strict)} -setup {
    set fn [makeFile {} io-75.8]
    set f [open $fn w+]
    fconfigure $f -encoding binary
    # \x81 is invalid in utf-8, but since \x1A comes first, -eofchar takes precedence.
    puts -nonewline $f A\x1A\x81
9214
9215
9216
9217
9218
9219
9220
9221
9222
9223
9224

9225
9226
9227
9228
9229
9230
9231
9232
9233
9234
9235
9236
9237
9238
9239
9240
9241
9242
9243
9244
9245
9246
9247
9248
    fconfigure $f -encoding binary
    puts -nonewline $f A\xC0
    flush $f
    seek $f 0
    fconfigure $f -encoding utf-8 -buffering none
} -body {
    set d [read $f]
    close $f
    binary scan $d H* hd
    set hd
} -cleanup {

    removeFile io-75.10
} -result 41c0
# The current result returns the orphan byte as byte.
# This may be expected due to special utf-8 handling.

# As utf-8 has a special treatment in multi-byte decoding, also test another
# one.
test io-75.11 {shiftjis encoding error read results in raw bytes} -setup {
    set fn [makeFile {} io-75.11]
    set f [open $fn w+]
    fconfigure $f -encoding binary
    # In shiftjis, \x81 starts a two-byte sequence.
    # But 2nd byte \xFF is not allowed
    puts -nonewline $f A\x81\xFFA
    flush $f
    seek $f 0
    fconfigure $f -encoding shiftjis -buffering none -eofchar "" -translation lf -profile strict
} -body {
    set d [read $f]
    binary scan $d H* hd
    lappend hd [catch {set d [read $f]} msg]
    lappend hd $msg
} -cleanup {
    close $f







<



>
















|







9230
9231
9232
9233
9234
9235
9236

9237
9238
9239
9240
9241
9242
9243
9244
9245
9246
9247
9248
9249
9250
9251
9252
9253
9254
9255
9256
9257
9258
9259
9260
9261
9262
9263
9264
    fconfigure $f -encoding binary
    puts -nonewline $f A\xC0
    flush $f
    seek $f 0
    fconfigure $f -encoding utf-8 -buffering none
} -body {
    set d [read $f]

    binary scan $d H* hd
    set hd
} -cleanup {
    close $f
    removeFile io-75.10
} -result 41c0
# The current result returns the orphan byte as byte.
# This may be expected due to special utf-8 handling.

# As utf-8 has a special treatment in multi-byte decoding, also test another
# one.
test io-75.11 {shiftjis encoding error read results in raw bytes} -setup {
    set fn [makeFile {} io-75.11]
    set f [open $fn w+]
    fconfigure $f -encoding binary
    # In shiftjis, \x81 starts a two-byte sequence.
    # But 2nd byte \xFF is not allowed
    puts -nonewline $f A\x81\xFFA
    flush $f
    seek $f 0
    fconfigure $f -encoding shiftjis -blocking 0 -eofchar "" -translation lf -profile strict
} -body {
    set d [read $f]
    binary scan $d H* hd
    lappend hd [catch {set d [read $f]} msg]
    lappend hd $msg
} -cleanup {
    close $f
9269
9270
9271
9272
9273
9274
9275
9276
9277
9278
9279
9280
9281
9282
9283
    set fn [makeFile {} io-75.13]
    set f [open $fn w+]
    fconfigure $f -encoding binary
    # \x81 is invalid in utf-8
    puts -nonewline $f "A\x81"
    flush $f
    seek $f 0
    fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -profile strict
} -body {
    set d [read $f]
    binary scan $d H* hd
    lappend hd [catch {read $f} msg]
    lappend hd $msg
} -cleanup {
    close $f







|







9285
9286
9287
9288
9289
9290
9291
9292
9293
9294
9295
9296
9297
9298
9299
    set fn [makeFile {} io-75.13]
    set f [open $fn w+]
    fconfigure $f -encoding binary
    # \x81 is invalid in utf-8
    puts -nonewline $f "A\x81"
    flush $f
    seek $f 0
    fconfigure $f -encoding utf-8 -blocking 0 -eofchar "" -translation lf -profile strict
} -body {
    set d [read $f]
    binary scan $d H* hd
    lappend hd [catch {read $f} msg]
    lappend hd $msg
} -cleanup {
    close $f