Tcl Source Code

Changes On Branch trunk-encodingdefaultstrict
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Changes In Branch trunk-encodingdefaultstrict Excluding Merge-Ins

This is equivalent to a diff from 7dc0ea8ca5 to c499122331

2023-06-18
21:30
Merge trunk-encoding-defaultstrict [c499122331]. Leaf check-in: 22400aa71b user: pooryorick tags: unchained
2023-06-12
10:19
Merge trunk [7dc0ea8ca5]. Leaf check-in: c499122331 user: pooryorick tags: trunk-encodingdefaultstrict
04:10
Revert unwarranted change in Tcl_CreateChannel() from [1776327edd]. check-in: f413252375 user: pooryorick tags: trunk, main
2023-06-11
21:31
Merge trunk [19e22d9bc5]. check-in: 377114dc42 user: pooryorick tags: trunk-encodingdefaultstrict
02:46
Merge trunk check-in: b1f00c4901 user: apnadkarni tags: tip-671
02:34
Tests for invalidly encoded file names and env (TIP 671 motivation) check-in: 7dc0ea8ca5 user: apnadkarni tags: trunk, main
2023-06-04
22:58
Merge 8.7 check-in: 8333622e92 user: pointsman tags: trunk, main

Changes to generic/tcl.h.

1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
 * Reserve top byte for profile values (disjoint, not a mask). In case of
 * changes, ensure ENCODING_PROFILE_* macros in tclInt.h are modified if
 * necessary.
 */
#define TCL_ENCODING_PROFILE_TCL8     0x01000000
#define TCL_ENCODING_PROFILE_STRICT   0x02000000
#define TCL_ENCODING_PROFILE_REPLACE  0x03000000
#if TCL_MAJOR_VERSION < 9
#define TCL_ENCODING_PROFILE_DEFAULT  TCL_ENCODING_PROFILE_TCL8
#else
#define TCL_ENCODING_PROFILE_DEFAULT  TCL_ENCODING_PROFILE_TCL8
#endif

/*
 * The following definitions are the error codes returned by the conversion
 * routines:
 *
 * TCL_OK -			All characters were converted.
 * TCL_CONVERT_NOSPACE -	The output buffer would not have been large







<
|
<
<
<







1971
1972
1973
1974
1975
1976
1977

1978



1979
1980
1981
1982
1983
1984
1985
 * Reserve top byte for profile values (disjoint, not a mask). In case of
 * changes, ensure ENCODING_PROFILE_* macros in tclInt.h are modified if
 * necessary.
 */
#define TCL_ENCODING_PROFILE_TCL8     0x01000000
#define TCL_ENCODING_PROFILE_STRICT   0x02000000
#define TCL_ENCODING_PROFILE_REPLACE  0x03000000

#define TCL_ENCODING_PROFILE_DEFAULT  TCL_ENCODING_PROFILE_STRICT




/*
 * The following definitions are the error codes returned by the conversion
 * routines:
 *
 * TCL_OK -			All characters were converted.
 * TCL_CONVERT_NOSPACE -	The output buffer would not have been large

Changes to generic/tclCmdAH.c.

431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
)
{
    static const char *const options[] = {"-profile", "-failindex", NULL};
    enum convertfromOptions { PROFILE, FAILINDEX } optIndex;
    Tcl_Encoding encoding;
    Tcl_Obj *dataObj;
    Tcl_Obj *failVarObj;
    int profile = TCL_ENCODING_PROFILE_TCL8;

    /*
     * Possible combinations:
     * 1) data						-> objc = 2
     * 2) ?options? encoding data			-> objc >= 3
     * It is intentional that specifying option forces encoding to be
     * specified. Less prone to user error. This should have always been







|







431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
)
{
    static const char *const options[] = {"-profile", "-failindex", NULL};
    enum convertfromOptions { PROFILE, FAILINDEX } optIndex;
    Tcl_Encoding encoding;
    Tcl_Obj *dataObj;
    Tcl_Obj *failVarObj;
    int profile = TCL_ENCODING_PROFILE_STRICT;

    /*
     * Possible combinations:
     * 1) data						-> objc = 2
     * 2) ?options? encoding data			-> objc >= 3
     * It is intentional that specifying option forces encoding to be
     * specified. Less prone to user error. This should have always been

Changes to generic/tclEncoding.c.

1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
    const char *src,		/* Source string in specified encoding. */
    Tcl_Size srcLen,		/* Source string length in bytes, or < 0 for
				 * encoding-specific string length. */
    Tcl_DString *dstPtr)	/* Uninitialized or free DString in which the
				 * converted string is stored. */
{
    Tcl_ExternalToUtfDStringEx(
	NULL, encoding, src, srcLen, TCL_ENCODING_PROFILE_TCL8, dstPtr, NULL);
    return Tcl_DStringValue(dstPtr);
}


/*
 *-------------------------------------------------------------------------
 *







|







1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
    const char *src,		/* Source string in specified encoding. */
    Tcl_Size srcLen,		/* Source string length in bytes, or < 0 for
				 * encoding-specific string length. */
    Tcl_DString *dstPtr)	/* Uninitialized or free DString in which the
				 * converted string is stored. */
{
    Tcl_ExternalToUtfDStringEx(
	NULL, encoding, src, srcLen, TCL_ENCODING_PROFILE_STRICT, dstPtr, NULL);
    return Tcl_DStringValue(dstPtr);
}


/*
 *-------------------------------------------------------------------------
 *

Changes to generic/tclIO.c.

1660
1661
1662
1663
1664
1665
1666




1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
	strcpy(tmp, chanName);
    } else {
	tmp = (char *)Tcl_Alloc(7);
	tmp[0] = '\0';
    }
    statePtr->channelName = tmp;
    statePtr->flags = mask;




    statePtr->maxPerms = mask; /* Save max privileges for close callback */

    /*
     * Set the channel to system default encoding.
     *
     * Note the strange bit of protection taking place here. If the system
     * encoding name is reported back as "binary", something weird is
     * happening. Tcl provides no "binary" encoding, so someone else has
     * provided one. We ignore it so as not to interfere with the "magic"
     * interpretation that Tcl_Channels give to the "-encoding binary" option.
     */

    name = Tcl_GetEncodingName(NULL);
    statePtr->encoding = Tcl_GetEncoding(NULL, name);
    statePtr->inputEncodingState  = NULL;
    statePtr->inputEncodingFlags  = TCL_ENCODING_START;
    ENCODING_PROFILE_SET(statePtr->inputEncodingFlags,
			     TCL_ENCODING_PROFILE_TCL8);
    statePtr->outputEncodingState = NULL;
    statePtr->outputEncodingFlags = TCL_ENCODING_START;
    ENCODING_PROFILE_SET(statePtr->outputEncodingFlags,
			     TCL_ENCODING_PROFILE_TCL8);

    /*
     * Set the channel up initially in AUTO input translation mode to accept
     * "\n", "\r" and "\r\n". Output translation mode is set to a platform
     * specific default value. The eofChar is set to 0 for both input and
     * output, so that Tcl does not look for an in-file EOF indicator (e.g.,
     * ^Z) and does not append an EOF indicator to files.







>
>
>
>

















|



|







1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
	strcpy(tmp, chanName);
    } else {
	tmp = (char *)Tcl_Alloc(7);
	tmp[0] = '\0';
    }
    statePtr->channelName = tmp;
    statePtr->flags = mask;
	/* uncomment this to make default encoding error handling strict */
	/*
    statePtr->flags |= CHANNEL_ENCODING_STRICT;
	*/
    statePtr->maxPerms = mask; /* Save max privileges for close callback */

    /*
     * Set the channel to system default encoding.
     *
     * Note the strange bit of protection taking place here. If the system
     * encoding name is reported back as "binary", something weird is
     * happening. Tcl provides no "binary" encoding, so someone else has
     * provided one. We ignore it so as not to interfere with the "magic"
     * interpretation that Tcl_Channels give to the "-encoding binary" option.
     */

    name = Tcl_GetEncodingName(NULL);
    statePtr->encoding = Tcl_GetEncoding(NULL, name);
    statePtr->inputEncodingState  = NULL;
    statePtr->inputEncodingFlags  = TCL_ENCODING_START;
    ENCODING_PROFILE_SET(statePtr->inputEncodingFlags,
			     TCL_ENCODING_PROFILE_DEFAULT);
    statePtr->outputEncodingState = NULL;
    statePtr->outputEncodingFlags = TCL_ENCODING_START;
    ENCODING_PROFILE_SET(statePtr->outputEncodingFlags,
			     TCL_ENCODING_PROFILE_DEFAULT);

    /*
     * Set the channel up initially in AUTO input translation mode to accept
     * "\n", "\r" and "\r\n". Output translation mode is set to a platform
     * specific default value. The eofChar is set to 0 for both input and
     * output, so that Tcl does not look for an in-file EOF indicator (e.g.,
     * ^Z) and does not append an EOF indicator to files.
5927
5928
5929
5930
5931
5932
5933

5934
5935
5936
5937
5938
5939
5940
#define UTF_EXPANSION_FACTOR	1024
    int factor = UTF_EXPANSION_FACTOR;

    if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR)) {
	ResetFlag(statePtr, CHANNEL_EOF|CHANNEL_ENCODING_ERROR);
	/* TODO: UpdateInterest not needed here? */
	UpdateInterest(chanPtr);

	Tcl_SetErrno(EILSEQ);
	return -1;
    }
    /*
     * Early out when next read will see eofchar.
     *
     * NOTE: See DoRead for argument that it's a bug (one we're keeping) to







>







5931
5932
5933
5934
5935
5936
5937
5938
5939
5940
5941
5942
5943
5944
5945
#define UTF_EXPANSION_FACTOR	1024
    int factor = UTF_EXPANSION_FACTOR;

    if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR)) {
	ResetFlag(statePtr, CHANNEL_EOF|CHANNEL_ENCODING_ERROR);
	/* TODO: UpdateInterest not needed here? */
	UpdateInterest(chanPtr);

	Tcl_SetErrno(EILSEQ);
	return -1;
    }
    /*
     * Early out when next read will see eofchar.
     *
     * NOTE: See DoRead for argument that it's a bug (one we're keeping) to
6027
6028
6029
6030
6031
6032
6033

6034
6035
6036
6037
6038
6039
6040
	     * after the EOF character was encountered, so it doesn't count as
	     * a real error.
	     */

	    if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR)
		    && !GotFlag(statePtr, CHANNEL_STICKY_EOF)
		    && (!GotFlag(statePtr, CHANNEL_NONBLOCKING))) {

		goto finish;
	    }
	}

	if (copiedNow < 0) {
	    if (GotFlag(statePtr, CHANNEL_EOF)) {
		break;







>







6032
6033
6034
6035
6036
6037
6038
6039
6040
6041
6042
6043
6044
6045
6046
	     * after the EOF character was encountered, so it doesn't count as
	     * a real error.
	     */

	    if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR)
		    && !GotFlag(statePtr, CHANNEL_STICKY_EOF)
		    && (!GotFlag(statePtr, CHANNEL_NONBLOCKING))) {
		copied = -1;
		goto finish;
	    }
	}

	if (copiedNow < 0) {
	    if (GotFlag(statePtr, CHANNEL_EOF)) {
		break;
6106
6107
6108
6109
6110
6111
6112



6113
6114
6115
6116
6117
6118
6119
	 * like [read] can also return an error.
	*/
	ResetFlag(statePtr, CHANNEL_EOF|CHANNEL_ENCODING_ERROR);
	Tcl_SetErrno(EILSEQ);
	copied = -1;
    }
    TclChannelRelease((Tcl_Channel)chanPtr);



    return copied;
}

/*
 *---------------------------------------------------------------------------
 *
 * ReadBytes --







>
>
>







6112
6113
6114
6115
6116
6117
6118
6119
6120
6121
6122
6123
6124
6125
6126
6127
6128
	 * like [read] can also return an error.
	*/
	ResetFlag(statePtr, CHANNEL_EOF|CHANNEL_ENCODING_ERROR);
	Tcl_SetErrno(EILSEQ);
	copied = -1;
    }
    TclChannelRelease((Tcl_Channel)chanPtr);
    if (copied == TCL_INDEX_NONE) {
	ResetFlag(statePtr, CHANNEL_ENCODING_ERROR|CHANNEL_EOF);
    }
    return copied;
}

/*
 *---------------------------------------------------------------------------
 *
 * ReadBytes --

Changes to generic/tclIOCmd.c.

279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
    Tcl_Interp *interp,		/* Current interpreter. */
    int objc,			/* Number of arguments. */
    Tcl_Obj *const objv[])	/* Argument objects. */
{
    Tcl_Channel chan;		/* The channel to read from. */
    Tcl_Size lineLen;		/* Length of line just read. */
    int mode;			/* Mode in which channel is opened. */
    Tcl_Obj *linePtr, *chanObjPtr;
    int code = TCL_OK;

    if ((objc != 2) && (objc != 3)) {
	Tcl_WrongNumArgs(interp, 1, objv, "channelId ?varName?");
	return TCL_ERROR;
    }
    chanObjPtr = objv[1];







|







279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
    Tcl_Interp *interp,		/* Current interpreter. */
    int objc,			/* Number of arguments. */
    Tcl_Obj *const objv[])	/* Argument objects. */
{
    Tcl_Channel chan;		/* The channel to read from. */
    Tcl_Size lineLen;		/* Length of line just read. */
    int mode;			/* Mode in which channel is opened. */
    Tcl_Obj *linePtr, *chanObjPtr, *resultDictPtr, *returnOptsPtr;
    int code = TCL_OK;

    if ((objc != 2) && (objc != 3)) {
	Tcl_WrongNumArgs(interp, 1, objv, "channelId ?varName?");
	return TCL_ERROR;
    }
    chanObjPtr = objv[1];
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322






323

324
325
326
327
328
329
330
    }

    TclChannelPreserve(chan);
    TclNewObj(linePtr);
    lineLen = Tcl_GetsObj(chan, linePtr);
    if (lineLen == TCL_IO_FAILURE) {
	if (!Tcl_Eof(chan) && !Tcl_InputBlocked(chan)) {
	    Tcl_DecrRefCount(linePtr);

	    /*
	     * TIP #219.
	     * Capture error messages put by the driver into the bypass area
	     * and put them into the regular interpreter result. Fall back to
	     * the regular message if nothing was found in the bypass.
	     */

	    if (!TclChanCaughtErrorBypass(interp, chan)) {
		Tcl_SetObjResult(interp, Tcl_ObjPrintf(
			"error reading \"%s\": %s",
			TclGetString(chanObjPtr), Tcl_PosixError(interp)));
	    }






	    code = TCL_ERROR;

	    goto done;
	}
	lineLen = TCL_IO_FAILURE;
    }
    if (objc == 3) {
	if (Tcl_ObjSetVar2(interp, objv[2], NULL, linePtr,
		TCL_LEAVE_ERR_MSG) == NULL) {







<













>
>
>
>
>
>

>







302
303
304
305
306
307
308

309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
    }

    TclChannelPreserve(chan);
    TclNewObj(linePtr);
    lineLen = Tcl_GetsObj(chan, linePtr);
    if (lineLen == TCL_IO_FAILURE) {
	if (!Tcl_Eof(chan) && !Tcl_InputBlocked(chan)) {


	    /*
	     * TIP #219.
	     * Capture error messages put by the driver into the bypass area
	     * and put them into the regular interpreter result. Fall back to
	     * the regular message if nothing was found in the bypass.
	     */

	    if (!TclChanCaughtErrorBypass(interp, chan)) {
		Tcl_SetObjResult(interp, Tcl_ObjPrintf(
			"error reading \"%s\": %s",
			TclGetString(chanObjPtr), Tcl_PosixError(interp)));
	    }
	    resultDictPtr = Tcl_NewDictObj();
	    Tcl_DictObjPut(NULL, resultDictPtr, Tcl_NewStringObj("read", -1)
	    , linePtr);
	    returnOptsPtr = Tcl_NewDictObj();
	    Tcl_DictObjPut(NULL, returnOptsPtr, Tcl_NewStringObj("-result", -1)
	    , resultDictPtr);
	    code = TCL_ERROR;
	    Tcl_SetReturnOptions(interp, returnOptsPtr);
	    goto done;
	}
	lineLen = TCL_IO_FAILURE;
    }
    if (objc == 3) {
	if (Tcl_ObjSetVar2(interp, objv[2], NULL, linePtr,
		TCL_LEAVE_ERR_MSG) == NULL) {
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
    Tcl_Obj *const objv[])	/* Argument objects. */
{
    Tcl_Channel chan;		/* The channel to read from. */
    int newline, i;		/* Discard newline at end? */
    Tcl_WideInt toRead;			/* How many bytes to read? */
    Tcl_Size charactersRead;		/* How many characters were read? */
    int mode;			/* Mode in which channel is opened. */
    Tcl_Obj *resultPtr, *chanObjPtr;

    if ((objc != 2) && (objc != 3)) {
	Interp *iPtr;

    argerror:
	iPtr = (Interp *) interp;
	Tcl_WrongNumArgs(interp, 1, objv, "channelId ?numChars?");







|







373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
    Tcl_Obj *const objv[])	/* Argument objects. */
{
    Tcl_Channel chan;		/* The channel to read from. */
    int newline, i;		/* Discard newline at end? */
    Tcl_WideInt toRead;			/* How many bytes to read? */
    Tcl_Size charactersRead;		/* How many characters were read? */
    int mode;			/* Mode in which channel is opened. */
    Tcl_Obj *resultPtr, *resultDictPtr, *returnOptsPtr, *chanObjPtr;

    if ((objc != 2) && (objc != 3)) {
	Interp *iPtr;

    argerror:
	iPtr = (Interp *) interp;
	Tcl_WrongNumArgs(interp, 1, objv, "channelId ?numChars?");
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448






449

450
451
452
453
454
455
456
	}
    }

    TclNewObj(resultPtr);
    TclChannelPreserve(chan);
    charactersRead = Tcl_ReadChars(chan, resultPtr, toRead, 0);
    if (charactersRead == TCL_IO_FAILURE) {
	Tcl_DecrRefCount(resultPtr);
	/*
	 * TIP #219.
	 * Capture error messages put by the driver into the bypass area and
	 * put them into the regular interpreter result. Fall back to the
	 * regular message if nothing was found in the bypass.
	 */

	if (!TclChanCaughtErrorBypass(interp, chan)) {
	    Tcl_SetObjResult(interp, Tcl_ObjPrintf(
		    "error reading \"%s\": %s",
		    TclGetString(chanObjPtr), Tcl_PosixError(interp)));
	}






	TclChannelRelease(chan);

	return TCL_ERROR;
    }

    /*
     * If requested, remove the last newline in the channel if at EOF.
     */








<












>
>
>
>
>
>

>







435
436
437
438
439
440
441

442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
	}
    }

    TclNewObj(resultPtr);
    TclChannelPreserve(chan);
    charactersRead = Tcl_ReadChars(chan, resultPtr, toRead, 0);
    if (charactersRead == TCL_IO_FAILURE) {

	/*
	 * TIP #219.
	 * Capture error messages put by the driver into the bypass area and
	 * put them into the regular interpreter result. Fall back to the
	 * regular message if nothing was found in the bypass.
	 */

	if (!TclChanCaughtErrorBypass(interp, chan)) {
	    Tcl_SetObjResult(interp, Tcl_ObjPrintf(
		    "error reading \"%s\": %s",
		    TclGetString(chanObjPtr), Tcl_PosixError(interp)));
	}
	resultDictPtr = Tcl_NewDictObj();
	Tcl_DictObjPut(NULL, resultDictPtr, Tcl_NewStringObj("read", -1)
	    , resultPtr);
	returnOptsPtr = Tcl_NewDictObj();
	Tcl_DictObjPut(NULL, returnOptsPtr, Tcl_NewStringObj("-result", -1)
	    , resultDictPtr);
	TclChannelRelease(chan);
	Tcl_SetReturnOptions(interp, returnOptsPtr);
	return TCL_ERROR;
    }

    /*
     * If requested, remove the last newline in the channel if at EOF.
     */

Changes to generic/tclIOUtil.c.

1729
1730
1731
1732
1733
1734
1735





1736
1737
1738
1739
1740
1741
1742
    if (encodingName == NULL) {
	encodingName = "utf-8";
    }
    if (Tcl_SetChannelOption(interp, chan, "-encoding", encodingName)
	    != TCL_OK) {
	Tcl_CloseEx(interp,chan,0);
	return result;





    }

    TclNewObj(objPtr);
    Tcl_IncrRefCount(objPtr);

    /*
     * Read first character of stream to check for utf-8 BOM







>
>
>
>
>







1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
    if (encodingName == NULL) {
	encodingName = "utf-8";
    }
    if (Tcl_SetChannelOption(interp, chan, "-encoding", encodingName)
	    != TCL_OK) {
	Tcl_CloseEx(interp,chan,0);
	return result;
    }
    if (Tcl_SetChannelOption(interp, chan, "-profile", "strict")
	    != TCL_OK) {
	Tcl_CloseEx(interp,chan,0);
	return result;
    }

    TclNewObj(objPtr);
    Tcl_IncrRefCount(objPtr);

    /*
     * Read first character of stream to check for utf-8 BOM

Changes to library/http/http.tcl.

1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
	    ##Log socket opened, now fconfigure - token $token
	    set delay [expr {[clock milliseconds] - $pre}]
	    if {$delay > 3000} {
		Log socket delay $delay - token $token
	    }
	    fconfigure $sock -translation {auto crlf} \
			     -buffersize $state(-blocksize)
	    if {[package vsatisfies [package provide Tcl] 9.0-]} {
		fconfigure $sock -profile tcl8
	    }
	    ##Log socket opened, DONE fconfigure - token $token
        }

        Log "Using $sock for $state(socketinfo) - token $token" \
	    [expr {$state(-keepalive)?"keepalive":""}]

        # Code above has set state(sock) $sock







<
<
<







1742
1743
1744
1745
1746
1747
1748



1749
1750
1751
1752
1753
1754
1755
	    ##Log socket opened, now fconfigure - token $token
	    set delay [expr {[clock milliseconds] - $pre}]
	    if {$delay > 3000} {
		Log socket delay $delay - token $token
	    }
	    fconfigure $sock -translation {auto crlf} \
			     -buffersize $state(-blocksize)



	    ##Log socket opened, DONE fconfigure - token $token
        }

        Log "Using $sock for $state(socketinfo) - token $token" \
	    [expr {$state(-keepalive)?"keepalive":""}]

        # Code above has set state(sock) $sock
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179

    # Send data in cr-lf format, but accept any line terminators.
    # Initialisation to {auto *} now done in geturl, KeepSocket and DoneRequest.
    # We are concerned here with the request (write) not the response (read).
    lassign [fconfigure $sock -translation] trRead trWrite
    fconfigure $sock -translation [list $trRead crlf] \
		     -buffersize $state(-blocksize)
    if {[package vsatisfies [package provide Tcl] 9.0-]} {
	fconfigure $sock -profile tcl8
    }

    # The following is disallowed in safe interpreters, but the socket is
    # already in non-blocking mode in that case.

    catch {fconfigure $sock -blocking off}
    set how GET
    if {$isQuery} {







<
<
<







2160
2161
2162
2163
2164
2165
2166



2167
2168
2169
2170
2171
2172
2173

    # Send data in cr-lf format, but accept any line terminators.
    # Initialisation to {auto *} now done in geturl, KeepSocket and DoneRequest.
    # We are concerned here with the request (write) not the response (read).
    lassign [fconfigure $sock -translation] trRead trWrite
    fconfigure $sock -translation [list $trRead crlf] \
		     -buffersize $state(-blocksize)




    # The following is disallowed in safe interpreters, but the socket is
    # already in non-blocking mode in that case.

    catch {fconfigure $sock -blocking off}
    set how GET
    if {$isQuery} {
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
    set tk [namespace tail $token]
    set sock $state(sock)

    #Log ---- $state(socketinfo) >> conn to $token for HTTP response
    lassign [fconfigure $sock -translation] trRead trWrite
    fconfigure $sock -translation [list auto $trWrite] \
		     -buffersize $state(-blocksize)
    if {[package vsatisfies [package provide Tcl] 9.0-]} {
	fconfigure $sock -profile tcl8
    }
    Log ^D$tk begin receiving response - token $token

    coroutine ${token}--EventCoroutine http::Event $sock $token
    if {[info exists state(-handler)] || [info exists state(-progress)]} {
        fileevent $sock readable [list http::EventGateway $sock $token]
    } else {
        fileevent $sock readable ${token}--EventCoroutine







<
<
<







2550
2551
2552
2553
2554
2555
2556



2557
2558
2559
2560
2561
2562
2563
    set tk [namespace tail $token]
    set sock $state(sock)

    #Log ---- $state(socketinfo) >> conn to $token for HTTP response
    lassign [fconfigure $sock -translation] trRead trWrite
    fconfigure $sock -translation [list auto $trWrite] \
		     -buffersize $state(-blocksize)



    Log ^D$tk begin receiving response - token $token

    coroutine ${token}--EventCoroutine http::Event $sock $token
    if {[info exists state(-handler)] || [info exists state(-progress)]} {
        fileevent $sock readable [list http::EventGateway $sock $token]
    } else {
        fileevent $sock readable ${token}--EventCoroutine
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
	    # If we are getting text, set the incoming channel's encoding
	    # correctly.  iso8859-1 is the RFC default, but this could be any
	    # IANA charset.  However, we only know how to convert what we have
	    # encodings for.

	    set enc [CharsetToEncoding $state(charset)]
	    if {$enc ne "binary"} {
		if {[package vsatisfies [package provide Tcl] 9.0-]} {
		    set state(body) [encoding convertfrom -profile tcl8 $enc $state(body)]
		} else {
		    set state(body) [encoding convertfrom $enc $state(body)]
		}
	    }

	    # Translate text line endings.
	    set state(body) [string map {\r\n \n \r \n} $state(body)]
	}
	if {[info exists state(-guesstype)] && $state(-guesstype)} {
	    GuessType $token







<
<
<
|
<







4541
4542
4543
4544
4545
4546
4547



4548

4549
4550
4551
4552
4553
4554
4555
	    # If we are getting text, set the incoming channel's encoding
	    # correctly.  iso8859-1 is the RFC default, but this could be any
	    # IANA charset.  However, we only know how to convert what we have
	    # encodings for.

	    set enc [CharsetToEncoding $state(charset)]
	    if {$enc ne "binary"} {



		set state(body) [encoding convertfrom $enc $state(body)]

	    }

	    # Translate text line endings.
	    set state(body) [string map {\r\n \n \r \n} $state(body)]
	}
	if {[info exists state(-guesstype)] && $state(-guesstype)} {
	    GuessType $token
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
            set res $value
        }
    }
    set enc [CharsetToEncoding $res]
    if {$enc eq "binary"} {
        return 0
    }
    if {[package vsatisfies [package provide Tcl] 9.0-]} {
	set state(body) [encoding convertfrom -profile tcl8 $enc $state(body)]
    } else {
	set state(body) [encoding convertfrom $enc $state(body)]
    }
    set state(body) [string map {\r\n \n \r \n} $state(body)]
    set state(type) application/xml
    set state(binary) 0
    set state(charset) $res
    return 1
}








<
<
<
|
<







4624
4625
4626
4627
4628
4629
4630



4631

4632
4633
4634
4635
4636
4637
4638
            set res $value
        }
    }
    set enc [CharsetToEncoding $res]
    if {$enc eq "binary"} {
        return 0
    }



    set state(body) [encoding convertfrom $enc $state(body)]

    set state(body) [string map {\r\n \n \r \n} $state(body)]
    set state(type) application/xml
    set state(binary) 0
    set state(charset) $res
    return 1
}

4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
    variable http
    variable formMap

    # The spec says: "non-alphanumeric characters are replaced by '%HH'". Use
    # a pre-computed map and [string map] to do the conversion (much faster
    # than [regsub]/[subst]). [Bug 1020491]

    if {[package vsatisfies [package provide Tcl] 9.0-]} {
	set string [encoding convertto -profile tcl8 $http(-urlencoding) $string]
    } else {
	set string [encoding convertto $http(-urlencoding) $string]
    }
    return [string map $formMap $string]
}

# http::ProxyRequired --
#	Default proxy filter.
#
# Arguments:







<
<
<
|
<







4705
4706
4707
4708
4709
4710
4711



4712

4713
4714
4715
4716
4717
4718
4719
    variable http
    variable formMap

    # The spec says: "non-alphanumeric characters are replaced by '%HH'". Use
    # a pre-computed map and [string map] to do the conversion (much faster
    # than [regsub]/[subst]). [Bug 1020491]




    set string [encoding convertto $http(-urlencoding) $string]

    return [string map $formMap $string]
}

# http::ProxyRequired --
#	Default proxy filter.
#
# Arguments:

Changes to library/tcltest/tcltest.tcl.

396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
	    stderr -
	    stdout {
		set outputChannel $filename
	    }
	    default {
		set outputChannel [open $filename a]
		if {[package vsatisfies [package provide Tcl] 8.7-]} {
		    fconfigure $outputChannel -profile tcl8 -encoding utf-8
		}
		set ChannelsWeOpened($outputChannel) 1

		# If we created the file in [temporaryDirectory], then
		# [cleanupTests] will delete it, unless we claim it was
		# already there.
		set outdir [normalizePath [file dirname \







|







396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
	    stderr -
	    stdout {
		set outputChannel $filename
	    }
	    default {
		set outputChannel [open $filename a]
		if {[package vsatisfies [package provide Tcl] 8.7-]} {
		    fconfigure $outputChannel -encoding utf-8
		}
		set ChannelsWeOpened($outputChannel) 1

		# If we created the file in [temporaryDirectory], then
		# [cleanupTests] will delete it, unless we claim it was
		# already there.
		set outdir [normalizePath [file dirname \
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
	    stderr -
	    stdout {
		set errorChannel $filename
	    }
	    default {
		set errorChannel [open $filename a]
		if {[package vsatisfies [package provide Tcl] 8.7-]} {
		    fconfigure $errorChannel -profile tcl8 -encoding utf-8
		}
		set ChannelsWeOpened($errorChannel) 1

		# If we created the file in [temporaryDirectory], then
		# [cleanupTests] will delete it, unless we claim it was
		# already there.
		set outdir [normalizePath [file dirname \







|







443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
	    stderr -
	    stdout {
		set errorChannel $filename
	    }
	    default {
		set errorChannel [open $filename a]
		if {[package vsatisfies [package provide Tcl] 8.7-]} {
		    fconfigure $errorChannel -encoding utf-8
		}
		set ChannelsWeOpened($errorChannel) 1

		# If we created the file in [temporaryDirectory], then
		# [cleanupTests] will delete it, unless we claim it was
		# already there.
		set outdir [normalizePath [file dirname \
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
	return [AcceptReadable $file]
    }
    proc ReadLoadScript {args} {
	variable Option
	if {$Option(-loadfile) eq {}} {return}
	set tmp [open $Option(-loadfile) r]
	if {[package vsatisfies [package provide Tcl] 8.7-]} {
	    fconfigure $tmp -profile tcl8 -encoding utf-8
	}
	loadScript [read $tmp]
	close $tmp
    }
    Option -loadfile {} {
	Read the script to load the tested commands from the specified file.
    } AcceptLoadFile loadFile







|







788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
	return [AcceptReadable $file]
    }
    proc ReadLoadScript {args} {
	variable Option
	if {$Option(-loadfile) eq {}} {return}
	set tmp [open $Option(-loadfile) r]
	if {[package vsatisfies [package provide Tcl] 8.7-]} {
	    fconfigure $tmp -encoding utf-8
	}
	loadScript [read $tmp]
	close $tmp
    }
    Option -loadfile {} {
	Read the script to load the tested commands from the specified file.
    } AcceptLoadFile loadFile
1129
1130
1131
1132
1133
1134
1135

1136
1137
1138
1139
1140
1141
1142
    if {$n2 eq {}} {return}
    if {![info exists testConstraints($n2)]} {
	if {[catch {testConstraint $n2 [eval [ConstraintInitializer $n2]]}]} {
	    testConstraint $n2 0
	}
    }
}


# tcltest::Asciify --
#
#       Transforms the passed string to contain only printable ascii characters.
#       Useful for printing to terminals. Non-printables are mapped to
#       \x, \u or \U sequences.
#







>







1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
    if {$n2 eq {}} {return}
    if {![info exists testConstraints($n2)]} {
	if {[catch {testConstraint $n2 [eval [ConstraintInitializer $n2]]}]} {
	    testConstraint $n2 0
	}
    }
}


# tcltest::Asciify --
#
#       Transforms the passed string to contain only printable ascii characters.
#       Useful for printing to terminals. Non-printables are mapped to
#       \x, \u or \U sequences.
#
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
	set code
    }

    ConstraintInitializer stdio {
	set code 0
	if {![catch {set f [open "|[list [interpreter]]" w]}]} {
	    if {[package vsatisfies [package provide Tcl] 8.7-]} {
		fconfigure $f -profile tcl8 -encoding utf-8
	    }
	    if {![catch {puts $f exit}]} {
		if {![catch {close $f}]} {
		    set code 1
		}
	    }
	}







|







1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
	set code
    }

    ConstraintInitializer stdio {
	set code 0
	if {![catch {set f [open "|[list [interpreter]]" w]}]} {
	    if {[package vsatisfies [package provide Tcl] 8.7-]} {
		fconfigure $f -encoding utf-8
	    }
	    if {![catch {puts $f exit}]} {
		if {![catch {close $f}]} {
		    set code 1
		}
	    }
	}
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
	    set testFile [dict get $testFrame file]
	    set testLine [dict get $testFrame line]
	} else {
	    set testFile [file normalize [uplevel 1 {info script}]]
	    if {[file readable $testFile]} {
		set testFd [open $testFile r]
		if {[package vsatisfies [package provide Tcl] 8.7-]} {
		    fconfigure $testFd -profile tcl8 -encoding utf-8
		}
		set testLine [expr {[lsearch -regexp \
			[split [read $testFd] "\n"] \
			"^\[ \t\]*test [string map {. \\.} $name] "] + 1}]
		close $testFd
	    }
	}







|







2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
	    set testFile [dict get $testFrame file]
	    set testLine [dict get $testFrame line]
	} else {
	    set testFile [file normalize [uplevel 1 {info script}]]
	    if {[file readable $testFile]} {
		set testFd [open $testFile r]
		if {[package vsatisfies [package provide Tcl] 8.7-]} {
		    fconfigure $testFd -encoding utf-8
		}
		set testLine [expr {[lsearch -regexp \
			[split [read $testFd] "\n"] \
			"^\[ \t\]*test [string map {. \\.} $name] "] + 1}]
		close $testFd
	    }
	}
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
	    puts [outputChannel] "---- errorCode(setup): $errorCodeRes(setup)"
	}
    }
    if {$processTest && $scriptFailure} {
	if {$scriptCompare} {
	    puts [outputChannel] "---- Error testing result: $scriptMatch"
	} else {
	    if {[catch {
		puts [outputChannel] "---- Result was:\n[Asciify $actualAnswer]"
	    } errMsg]} {
		puts [outputChannel] "\n---- Result was:\n<error printing result: $errMsg>"
	    }
	    puts [outputChannel] "---- Result should have been\
		    ($match matching):\n[Asciify $result]"
	}
    }
    if {$errorCodeFailure} {
	puts [outputChannel] "---- Error code was: '$errorCodeRes(body)'"
	puts [outputChannel] "---- Error code should have been: '$errorCode'"







<
|
<
<
<







2249
2250
2251
2252
2253
2254
2255

2256



2257
2258
2259
2260
2261
2262
2263
	    puts [outputChannel] "---- errorCode(setup): $errorCodeRes(setup)"
	}
    }
    if {$processTest && $scriptFailure} {
	if {$scriptCompare} {
	    puts [outputChannel] "---- Error testing result: $scriptMatch"
	} else {

	    puts [outputChannel] "---- Result was:\n[Asciify $actualAnswer]"



	    puts [outputChannel] "---- Result should have been\
		    ($match matching):\n[Asciify $result]"
	}
    }
    if {$errorCodeFailure} {
	puts [outputChannel] "---- Error code was: '$errorCodeRes(body)'"
	puts [outputChannel] "---- Error code should have been: '$errorCode'"
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
		lappend childargv $opt $value
	    }
	    set cmd [linsert $childargv 0 | $shell $file]
	    if {[catch {
		incr numTestFiles
		set pipeFd [open $cmd "r"]
		if {[package vsatisfies [package provide Tcl] 8.7-]} {
		    fconfigure $pipeFd -profile tcl8 -encoding utf-8
		}
		while {[gets $pipeFd line] >= 0} {
		    if {[regexp [join {
			    {^([^:]+):\t}
			    {Total\t([0-9]+)\t}
			    {Passed\t([0-9]+)\t}
			    {Skipped\t([0-9]+)\t}







|







2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
		lappend childargv $opt $value
	    }
	    set cmd [linsert $childargv 0 | $shell $file]
	    if {[catch {
		incr numTestFiles
		set pipeFd [open $cmd "r"]
		if {[package vsatisfies [package provide Tcl] 8.7-]} {
		    fconfigure $pipeFd -encoding utf-8
		}
		while {[gets $pipeFd line] >= 0} {
		    if {[regexp [join {
			    {^([^:]+):\t}
			    {Total\t([0-9]+)\t}
			    {Passed\t([0-9]+)\t}
			    {Skipped\t([0-9]+)\t}
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146

    DebugPuts 3 "[lindex [info level 0] 0]:\
	     putting ``$contents'' into $fullName"

    set fd [open $fullName w]
    fconfigure $fd -translation lf
    if {[package vsatisfies [package provide Tcl] 8.7-]} {
	fconfigure $fd -profile tcl8 -encoding utf-8
    }
    if {[string index $contents end] eq "\n"} {
	puts -nonewline $fd $contents
    } else {
	puts $fd $contents
    }
    close $fd







|







3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143

    DebugPuts 3 "[lindex [info level 0] 0]:\
	     putting ``$contents'' into $fullName"

    set fd [open $fullName w]
    fconfigure $fd -translation lf
    if {[package vsatisfies [package provide Tcl] 8.7-]} {
	fconfigure $fd -encoding utf-8
    }
    if {[string index $contents end] eq "\n"} {
	puts -nonewline $fd $contents
    } else {
	puts $fd $contents
    }
    close $fd
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
    FillFilesExisted
    if {[llength [info level 0]] == 2} {
	set directory [temporaryDirectory]
    }
    set fullName [file join $directory $name]
    set f [open $fullName]
    if {[package vsatisfies [package provide Tcl] 8.7-]} {
	fconfigure $f -profile tcl8 -encoding utf-8
    }
    set data [read -nonewline $f]
    close $f
    return $data
}

# tcltest::bytestring --







|







3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
    FillFilesExisted
    if {[llength [info level 0]] == 2} {
	set directory [temporaryDirectory]
    }
    set fullName [file join $directory $name]
    set f [open $fullName]
    if {[package vsatisfies [package provide Tcl] 8.7-]} {
	fconfigure $f -encoding utf-8
    }
    set data [read -nonewline $f]
    close $f
    return $data
}

# tcltest::bytestring --

Changes to tests/cmdAH.test.

330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
    set system [encoding system]
} -body {
    encoding system iso8859-1
    encoding system
} -cleanup {
    encoding system $system
} -result iso8859-1

#
# encoding convertfrom 4.3.*

# Odd number of args is always invalid since last two args
# are ENCODING DATA and all options take a value
badnumargs cmdAH-4.3.1 {encoding convertfrom} {}
badnumargs cmdAH-4.3.2 {encoding convertfrom} {-failindex VAR ABC}







<







330
331
332
333
334
335
336

337
338
339
340
341
342
343
    set system [encoding system]
} -body {
    encoding system iso8859-1
    encoding system
} -cleanup {
    encoding system $system
} -result iso8859-1

#
# encoding convertfrom 4.3.*

# Odd number of args is always invalid since last two args
# are ENCODING DATA and all options take a value
badnumargs cmdAH-4.3.1 {encoding convertfrom} {}
badnumargs cmdAH-4.3.2 {encoding convertfrom} {-failindex VAR ABC}

Changes to tests/encoding.test.

775
776
777
778
779
780
781
782
783



784
785
786
787
788
789
790
791
    fconfigure $f -encoding iso2022-jp
    set count [gets $f line]
    close $f
    removeFile iso2022.tcl
    list $count [viewable $line]
} [list 3 "乎乞也 (\\u4E4E\\u4E5E\\u4E5F)"]

test encoding-24.4 {Parse valid or invalid utf-8} {
    string length [encoding convertfrom utf-8 "\xC0\x80"]



} 1
test encoding-24.5 {Parse valid or invalid utf-8} {
    string length [encoding convertfrom -profile tcl8 utf-8 "\xC0\x81"]
} 2
test encoding-24.6 {Parse valid or invalid utf-8} {
    string length [encoding convertfrom -profile tcl8 utf-8 "\xC1\xBF"]
} 2
test encoding-24.7 {Parse valid or invalid utf-8} {







|
|
>
>
>
|







775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
    fconfigure $f -encoding iso2022-jp
    set count [gets $f line]
    close $f
    removeFile iso2022.tcl
    list $count [viewable $line]
} [list 3 "乎乞也 (\\u4E4E\\u4E5E\\u4E5F)"]

test {encoding-24.4 utf-8 invalid strict} {Parse invalid utf-8, strict} -body {
    string length [encoding convertfrom -profile strict utf-8 "\xC0\x80"]
} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC0'}
test {encoding-24.4 utf-8 invalid tcl8} {UtfToUtfProc utf-8} {
    encoding convertfrom -profile tcl8 utf-8 \xC0\x80
} \x00
test encoding-24.5 {Parse valid or invalid utf-8} {
    string length [encoding convertfrom -profile tcl8 utf-8 "\xC0\x81"]
} 2
test encoding-24.6 {Parse valid or invalid utf-8} {
    string length [encoding convertfrom -profile tcl8 utf-8 "\xC1\xBF"]
} 2
test encoding-24.7 {Parse valid or invalid utf-8} {
808
809
810
811
812
813
814



815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834



835
836
837
838
839
840
841
} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC0'}
test encoding-24.13 {Parse invalid utf-8} -body {
    encoding convertfrom -profile strict utf-8 "\xC1\xBF"
} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC1'}
test encoding-24.14 {Parse valid utf-8} {
    expr {[encoding convertfrom utf-8 "\xC2\x80"] eq "\u80"}
} 1



test encoding-24.15.strict {Parse invalid utf-8, -profile strict} -body {
    encoding convertfrom -profile strict utf-8 "Z\xE0\x80"
} -returnCodes 1 -result "unexpected byte sequence starting at index 1: '\\xE0'"
test encoding-24.15.tcl8 {Parse invalid utf-8, -profile tcl8} -body {
    encoding convertfrom -profile tcl8 utf-8 "Z\xE0\x80"
} -result Z\xE0\u20AC
test encoding-24.16 {Parse valid or invalid utf-8} -constraints testbytestring -body {
    encoding convertto utf-8 [testbytestring "Z\u4343\x80"]
} -returnCodes 1 -result {expected byte sequence but character 1 was '䍃€' (U+004343)}
test encoding-24.17 {Parse valid or invalid utf-8} -constraints testbytestring -body {
    encoding convertto utf-8 [testbytestring "Z\xE0\x80"]
} -result "Z\xC3\xA0\xE2\x82\xAC"
test encoding-24.18 {Parse valid or invalid utf-8} -constraints testbytestring -body {
    encoding convertto utf-8 [testbytestring "Z\xE0\x80xxxxxx"]
} -result "Z\xC3\xA0\xE2\x82\xACxxxxxx"
test encoding-24.19.1 {Parse valid or invalid utf-8} -body {
    encoding convertto -profile tcl8 utf-8 "ZX\uD800"
} -result ZX\xED\xA0\x80
test encoding-24.19.2 {Parse valid or invalid utf-8} -body {
    encoding convertto -profile strict utf-8 "ZX\uD800"



} -returnCodes 1 -match glob -result "unexpected character at index 2: 'U+00D800'"
test encoding-24.20 {Parse with -profile tcl8 but without providing encoding} -body {
    encoding convertfrom -profile tcl8 "\x20"
} -result {wrong # args: should be "::tcl::encoding::convertfrom ?-profile profile? ?-failindex var? encoding data" or "::tcl::encoding::convertfrom data"} -returnCodes error
test encoding-24.21 {Parse with -profile tcl8 but without providing encoding} -body {
    string length [encoding convertto -profile tcl8 "\x20"]
} -result {wrong # args: should be "::tcl::encoding::convertto ?-profile profile? ?-failindex var? encoding data" or "::tcl::encoding::convertto data"} -returnCodes error







>
>
>




















>
>
>







811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC0'}
test encoding-24.13 {Parse invalid utf-8} -body {
    encoding convertfrom -profile strict utf-8 "\xC1\xBF"
} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC1'}
test encoding-24.14 {Parse valid utf-8} {
    expr {[encoding convertfrom utf-8 "\xC2\x80"] eq "\u80"}
} 1
test encoding-24.15.default {Parse invalid utf-8, default} -body {
    encoding convertfrom -profile strict utf-8 "Z\xE0\x80"
} -returnCodes 1 -result "unexpected byte sequence starting at index 1: '\\xE0'"
test encoding-24.15.strict {Parse invalid utf-8, -profile strict} -body {
    encoding convertfrom -profile strict utf-8 "Z\xE0\x80"
} -returnCodes 1 -result "unexpected byte sequence starting at index 1: '\\xE0'"
test encoding-24.15.tcl8 {Parse invalid utf-8, -profile tcl8} -body {
    encoding convertfrom -profile tcl8 utf-8 "Z\xE0\x80"
} -result Z\xE0\u20AC
test encoding-24.16 {Parse valid or invalid utf-8} -constraints testbytestring -body {
    encoding convertto utf-8 [testbytestring "Z\u4343\x80"]
} -returnCodes 1 -result {expected byte sequence but character 1 was '䍃€' (U+004343)}
test encoding-24.17 {Parse valid or invalid utf-8} -constraints testbytestring -body {
    encoding convertto utf-8 [testbytestring "Z\xE0\x80"]
} -result "Z\xC3\xA0\xE2\x82\xAC"
test encoding-24.18 {Parse valid or invalid utf-8} -constraints testbytestring -body {
    encoding convertto utf-8 [testbytestring "Z\xE0\x80xxxxxx"]
} -result "Z\xC3\xA0\xE2\x82\xACxxxxxx"
test encoding-24.19.1 {Parse valid or invalid utf-8} -body {
    encoding convertto -profile tcl8 utf-8 "ZX\uD800"
} -result ZX\xED\xA0\x80
test encoding-24.19.2 {Parse valid or invalid utf-8} -body {
    encoding convertto -profile strict utf-8 "ZX\uD800"
} -returnCodes 1 -match glob -result "unexpected character at index 2: 'U+00D800'"
test encoding-24.19.3 {Parse valid or invalid utf-8} -body {
    encoding convertto utf-8 "ZX\uD800"
} -returnCodes 1 -match glob -result "unexpected character at index 2: 'U+00D800'"
test encoding-24.20 {Parse with -profile tcl8 but without providing encoding} -body {
    encoding convertfrom -profile tcl8 "\x20"
} -result {wrong # args: should be "::tcl::encoding::convertfrom ?-profile profile? ?-failindex var? encoding data" or "::tcl::encoding::convertfrom data"} -returnCodes error
test encoding-24.21 {Parse with -profile tcl8 but without providing encoding} -body {
    string length [encoding convertto -profile tcl8 "\x20"]
} -result {wrong # args: should be "::tcl::encoding::convertto ?-profile profile? ?-failindex var? encoding data" or "::tcl::encoding::convertto data"} -returnCodes error
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
} -result \uFFFF
test encoding-24.30 {Parse noncharacter with -profile strict} -body {
    encoding convertfrom -profile strict utf-8 \xEF\xBF\xBF
} -result \uFFFF
test encoding-24.31 {Parse invalid utf-8 with -profile tcl8} -body {
    encoding convertfrom -profile tcl8 utf-8 \xEF\xBF\xBF
} -result \uFFFF
test encoding-24.32 {Try to generate invalid utf-8} -body {
    encoding convertto utf-8 \uFFFF
} -result \xEF\xBF\xBF
test encoding-24.33 {Try to generate invalid utf-8} -body {
    encoding convertto -profile strict utf-8 \uFFFF
} -result \xEF\xBF\xBF
test encoding-24.34 {Try to generate invalid utf-8 with -profile tcl8} -body {
    encoding convertto -profile tcl8 utf-8 \uFFFF







|







874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
} -result \uFFFF
test encoding-24.30 {Parse noncharacter with -profile strict} -body {
    encoding convertfrom -profile strict utf-8 \xEF\xBF\xBF
} -result \uFFFF
test encoding-24.31 {Parse invalid utf-8 with -profile tcl8} -body {
    encoding convertfrom -profile tcl8 utf-8 \xEF\xBF\xBF
} -result \uFFFF
test encoding-24.33 {Try to generate invalid utf-8} -body {
    encoding convertto utf-8 \uFFFF
} -result \xEF\xBF\xBF
test encoding-24.33 {Try to generate invalid utf-8} -body {
    encoding convertto -profile strict utf-8 \uFFFF
} -result \xEF\xBF\xBF
test encoding-24.34 {Try to generate invalid utf-8 with -profile tcl8} -body {
    encoding convertto -profile tcl8 utf-8 \uFFFF

Changes to tests/encodingVectors.tcl.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# This file contains test vectors for verifying various encodings. They are
# stored in a common file so that they can be sourced into the various test
# modules that are dependent on encodings. This file contains statically defined
# test vectors. In addition, it sources the ICU-generated test vectors from
# icuUcmTests.tcl.
#
# Note that sourcing the file will reinitialize any existing encoding test
# vectors.
#

# List of defined encoding profiles
set encProfiles {tcl8 strict replace}
set encDefaultProfile tcl8; # Should reflect the default from implementation

# encValidStrings - Table of valid strings.
#
# Each row is <ENCODING STR BYTES CTRL COMMENT>
# The pair <ENCODING,STR> should be unique for generated test ids to be unique.
# STR is a string that can be encoded in the encoding ENCODING resulting
# in the byte sequence BYTES. The CTRL field is a list that controls test












|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# This file contains test vectors for verifying various encodings. They are
# stored in a common file so that they can be sourced into the various test
# modules that are dependent on encodings. This file contains statically defined
# test vectors. In addition, it sources the ICU-generated test vectors from
# icuUcmTests.tcl.
#
# Note that sourcing the file will reinitialize any existing encoding test
# vectors.
#

# List of defined encoding profiles
set encProfiles {tcl8 strict replace}
set encDefaultProfile strict; # Should reflect the default from implementation

# encValidStrings - Table of valid strings.
#
# Each row is <ENCODING STR BYTES CTRL COMMENT>
# The pair <ENCODING,STR> should be unique for generated test ids to be unique.
# STR is a string that can be encoded in the encoding ENCODING resulting
# in the byte sequence BYTES. The CTRL field is a list that controls test

Changes to tests/io.test.

1610
1611
1612
1613
1614
1615
1616








































1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629

1630
1631
1632
1633
1634
1635
1636
    close $f
    set f [open $path(test1)]
    fconfigure $f -encoding utf-8 -buffersize 10
    set in [read $f]
    close $f
    scan [string index $in end] %c
} 160








































test {io-12.9 profile tcl8} {ReadChars: multibyte chars split} -body {
    set f [open $path(test1) w]
    fconfigure $f -translation binary
    puts -nonewline $f [string repeat a 9]\xC2
    close $f
    set f [open $path(test1)]
    fconfigure $f -encoding utf-8 -profile tcl8 -buffersize 10
    set in [read $f]
    read $f
    scan [string index $in end] %c
} -cleanup {
    catch {close $f}
} -result 194

test {io-12.10 strict} {ReadChars: multibyte chars split} -body {
    set f [open $path(test1) w]
    fconfigure $f -translation binary
    puts -nonewline $f [string repeat a 9]\xC2
    close $f
    set f [open $path(test1)]
    fconfigure $f -encoding utf-8 -profile strict -buffersize 10







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>













>







1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
    close $f
    set f [open $path(test1)]
    fconfigure $f -encoding utf-8 -buffersize 10
    set in [read $f]
    close $f
    scan [string index $in end] %c
} 160


apply [list {} {
    set template {
	test {io-12.9 @variant@} {ReadChars: multibyte chars split, default (strict)} -body {
	    set res {}
	    set f [open $path(test1) w]
	    fconfigure $f -translation binary
	    puts -nonewline $f [string repeat a 9]\xC2
	    close $f
	    set f [open $path(test1)]
	    fconfigure $f -encoding utf-8 @strict@ -buffersize 10
	    set status [catch {read $f} cres copts]
	    set in [dict get $copts -result]
	    lappend res $in
	    lappend res $status $cres
	    set status [catch {read $f} cres copts]
	    set in [dict get $copts -result]
	    lappend res $in
	    lappend res $status $cres
	    set res
	} -cleanup {
	    catch {close $f}
	} -match glob -result {{read aaaaaaaaa} 1 {error reading "file*":\
		invalid or incomplete multibyte or wide character}\
		{read {}} 1 {error reading "file*":\
		invalid or incomplete multibyte or wide character}}
    }

	# if default encoding is not currently to strict
    # foreach variant {default encodingstrict} strict {{} {-encodingstrict 1}} 
    foreach variant {{profile strict}} strict {{-profile strict}} {
	set script [string map [
	    list @variant@ $variant @strict@ $strict] $template] 
	uplevel 1 $script
    }
} [namespace current]]



test {io-12.9 profile tcl8} {ReadChars: multibyte chars split} -body {
    set f [open $path(test1) w]
    fconfigure $f -translation binary
    puts -nonewline $f [string repeat a 9]\xC2
    close $f
    set f [open $path(test1)]
    fconfigure $f -encoding utf-8 -profile tcl8 -buffersize 10
    set in [read $f]
    read $f
    scan [string index $in end] %c
} -cleanup {
    catch {close $f}
} -result 194

test {io-12.10 strict} {ReadChars: multibyte chars split} -body {
    set f [open $path(test1) w]
    fconfigure $f -translation binary
    puts -nonewline $f [string repeat a 9]\xC2
    close $f
    set f [open $path(test1)]
    fconfigure $f -encoding utf-8 -profile strict -buffersize 10
5875
5876
5877
5878
5879
5880
5881
5882
5883
5884
5885
5886
5887
5888
5889
    set l [list]
    set sock [socket -server [namespace code accept] -myaddr 127.0.0.1 0]
    lappend l [fconfigure $sock -eofchar] [fconfigure $sock -translation]
    close $sock
    set l
} {{} auto}
test io-39.24 {Tcl_SetChannelOption, server socket is not readable or
    writable so we can't change -eofchar or -translation } {
    set l [list]
    set sock [socket -server [namespace code accept] -myaddr 127.0.0.1 0]
    fconfigure $sock -eofchar D -translation lf
    lappend l [fconfigure $sock -eofchar] [fconfigure $sock -translation]
    close $sock
    set l
} {{} auto}







|







5916
5917
5918
5919
5920
5921
5922
5923
5924
5925
5926
5927
5928
5929
5930
    set l [list]
    set sock [socket -server [namespace code accept] -myaddr 127.0.0.1 0]
    lappend l [fconfigure $sock -eofchar] [fconfigure $sock -translation]
    close $sock
    set l
} {{} auto}
test io-39.24 {Tcl_SetChannelOption, server socket is not readable or
	writable so we can't change -eofchar or -translation } {
    set l [list]
    set sock [socket -server [namespace code accept] -myaddr 127.0.0.1 0]
    fconfigure $sock -eofchar D -translation lf
    lappend l [fconfigure $sock -eofchar] [fconfigure $sock -translation]
    close $sock
    set l
} {{} auto}
9276
9277
9278
9279
9280
9281
9282






















9283
9284
9285
9286
9287
9288
9289
9290
9291
9292
9293
9294
9295
9296
9297
9298
9299
9300
9301
9302
9303
9304
9305
9306
9307
9308
9309
9310
9311
9312
9313
9314
9315
9316
9317
9318
9319
9320
9321





























9322
9323
9324
9325
9326
9327
9328
9329
9330
9331
9332
9333
9334
9335

9336
9337
9338
9339
9340
9341
9342































9343

9344
9345
9346
9347
9348
9349
9350
    binary scan $d H* hd
    set hd
} -cleanup {
    close $f
    removeFile io-75.5
} -result 4181























test io-75.6 {invalid utf-8 encoding, gets is not ignored (-profile strict)} -setup {
    set fn [makeFile {} io-75.6]
    set f [open $fn w+]
    fconfigure $f -encoding binary
    # \x81 is an incomplete byte sequence in utf-8
    puts -nonewline $f A\x81
    flush $f
    seek $f 0
    fconfigure $f -encoding utf-8 -buffering none -eofchar {} \
	    -translation lf -profile strict
} -body {
    gets $f
} -cleanup {
    close $f
    removeFile io-75.6
} -match glob -returnCodes 1 -result {error reading "file*":\
	invalid or incomplete multibyte or wide character}

test io-75.7 {
    invalid utf-8 encoding gets is not ignored (-profile strict)
} -setup {
    set fn [makeFile {} io-75.7]
    set f [open $fn w+]
    fconfigure $f -encoding binary
    # \x81 is invalid in utf-8
    puts -nonewline $f A\x81
    flush $f
    seek $f 0
    fconfigure $f -encoding utf-8 -buffering none -eofchar {} -translation lf \
	    -profile strict
} -body {
    list [catch {read $f} msg] $msg
} -cleanup {
    close $f
    removeFile io-75.7
} -match glob -result {1 {error reading "file*":\
    invalid or incomplete multibyte or wide character}}

test io-75.8 {invalid utf-8 encoding eof handling (-profile strict)} -setup {





























    set fn [makeFile {} io-75.8]
    set f [open $fn w+]
    fconfigure $f -encoding binary
    # \x81 is invalid in utf-8, but since \x1A comes first, -eofchar takes
    # precedence.
    puts -nonewline $f A\x1A\x81
    flush $f
    seek $f 0
    fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A \
	-translation lf -profile strict
} -body {
    set d [read $f]
    binary scan $d H* hd
    lappend hd [eof $f]

    lappend hd [read $f]
    close $f
    set hd
} -cleanup {
    removeFile io-75.8
} -result {41 1 {}}
































test io-75.8.eoflater {invalid utf-8 encoding eof handling (-profile strict)} -setup {

    set fn [makeFile {} io-75.8]
    set f [open $fn w+]
    # This also configures the channel encoding profile as strict.
    fconfigure $f -encoding binary
    # \x81 is invalid in utf-8. -eofchar is not detected, because it comes later.
    puts -nonewline $f A\x81\x81\x1A
    flush $f







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|

















|



















|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>



|
|









>







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>







9317
9318
9319
9320
9321
9322
9323
9324
9325
9326
9327
9328
9329
9330
9331
9332
9333
9334
9335
9336
9337
9338
9339
9340
9341
9342
9343
9344
9345
9346
9347
9348
9349
9350
9351
9352
9353
9354
9355
9356
9357
9358
9359
9360
9361
9362
9363
9364
9365
9366
9367
9368
9369
9370
9371
9372
9373
9374
9375
9376
9377
9378
9379
9380
9381
9382
9383
9384
9385
9386
9387
9388
9389
9390
9391
9392
9393
9394
9395
9396
9397
9398
9399
9400
9401
9402
9403
9404
9405
9406
9407
9408
9409
9410
9411
9412
9413
9414
9415
9416
9417
9418
9419
9420
9421
9422
9423
9424
9425
9426
9427
9428
9429
9430
9431
9432
9433
9434
9435
9436
9437
9438
9439
9440
9441
9442
9443
9444
9445
9446
9447
9448
9449
9450
9451
9452
9453
9454
9455
9456
9457
9458
9459
9460
9461
9462
9463
9464
9465
9466
9467
9468
9469
9470
9471
9472
9473
9474
9475
    binary scan $d H* hd
    set hd
} -cleanup {
    close $f
    removeFile io-75.5
} -result 4181

test io-75.6.read {invalid utf-8 encoding, read is not ignored (-encodingstrict 1)} -setup {
    set fn [makeFile {} io-75.6]
    set f [open $fn w+]
    fconfigure $f -encoding binary
    # \x81 is invalid in utf-8
    puts -nonewline $f A\x81
    flush $f
    seek $f 0
    fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf \
	-profile strict
} -body {
    set status [catch {read $f} cres copts]
    set d [dict get $copts -result read]
    binary scan $d H* hd
    lappend hd $status $cres
} -cleanup {
    close $f
    removeFile io-75.6
} -match glob -result {41 1 {error reading "file*":\
	invalid or incomplete multibyte or wide character}}


test io-75.6.gets {invalid utf-8 encoding, gets is not ignored (-profile strict)} -setup {
    set fn [makeFile {} io-75.6]
    set f [open $fn w+]
    fconfigure $f -encoding binary
    # \x81 is an incomplete byte sequence in utf-8
    puts -nonewline $f A\x81
    flush $f
    seek $f 0
    fconfigure $f -encoding utf-8 -buffering none -eofchar {} \
	    -translation lf -profile strict
} -body {
    gets $f
} -cleanup {
    close $f
    removeFile io-75.6
} -match glob -returnCodes 1 -result {error reading "file*":\
	invalid or incomplete multibyte or wide character}

test io-75.7.gets {
    invalid utf-8 encoding gets is not ignored (-profile strict)
} -setup {
    set fn [makeFile {} io-75.7]
    set f [open $fn w+]
    fconfigure $f -encoding binary
    # \x81 is invalid in utf-8
    puts -nonewline $f A\x81
    flush $f
    seek $f 0
    fconfigure $f -encoding utf-8 -buffering none -eofchar {} -translation lf \
	    -profile strict
} -body {
    list [catch {read $f} msg] $msg
} -cleanup {
    close $f
    removeFile io-75.7
} -match glob -result {1 {error reading "file*":\
    invalid or incomplete multibyte or wide character}}

test io-75.7.read {invalid utf-8 encoding eof handling (-profile strict)} -setup {
    set fn [makeFile {} io-75.7]
    set f [open $fn w+]
    fconfigure $f -encoding binary
    # \xA1 is invalid in utf-8. -eofchar is not detected, because it comes later.
    puts -nonewline $f A\xA1\x1A
    flush $f
    seek $f 0
    fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A \
	-translation lf -profile strict
} -body {
    set status [catch {read $f} cres copts]
    set d [dict get $copts -result read]
    binary scan $d H* hd
    lappend hd [eof $f]
    lappend hd $status
    lappend hd $cres
    fconfigure $f -encoding iso8859-1
    lappend hd [read $f];# We changed encoding, so now we can read the \xA1
    close $f
    set hd
} -cleanup {
    removeFile io-75.7
} -match glob -result {41 0 1 {error reading "file*":\
	invalid or incomplete multibyte or wide character} ¡}

test io-75.8.incomplete {
    incomplete uft-8 char after eof char is not an error (-profile strict)
} -setup {
    set hd {}
    set fn [makeFile {} io-75.8]
    set f [open $fn w+]
    fconfigure $f -encoding binary
	# \x81 is invalid in utf-8, but since the eof character \x1A comes first,
	# -eofchar takes precedence.
    puts -nonewline $f A\x1A\x81
    flush $f
    seek $f 0
    fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A \
	-translation lf -profile strict
} -body {
    set d [read $f]
    binary scan $d H* hd
    lappend hd [eof $f]
    # there should be no error on additional reads
    lappend hd [read $f]
    close $f
    set hd
} -cleanup {
    removeFile io-75.8
} -result {41 1 {}}


test {io-75.8 {invalid after eof}} {
	invalid utf-8 after eof char is not an error (-profile strict)
} -setup {
    set res {}
    set fn [makeFile {} io-75.8]
    set f [open $fn w+]
    fconfigure $f -encoding binary
    # \xc0\x80 is invalid utf-8 data, but because the eof character \x1A
    # appears first, it's not an error.
    puts -nonewline $f A\x1a\xc0\x80
    flush $f
    seek $f 0
    fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A \
	-translation lf -profile strict
} -body {
    set d [read $f]
    foreach char [split $d {}] {
	lappend res [format %x [scan $char %c]]
    }
    lappend res [eof $f]
    # there should be no error on additional reads
    lappend res [read $f]
    close $f
    set res
} -cleanup {
    removeFile io-75.8
} -result {41 1 {}}


test {io-75.8 {invalid before eof}} {
	invalid utf-8 encoding eof handling (-profile strict)
} -setup {
    set fn [makeFile {} io-75.8]
    set f [open $fn w+]
    # This also configures the channel encoding profile as strict.
    fconfigure $f -encoding binary
    # \x81 is invalid in utf-8. -eofchar is not detected, because it comes later.
    puts -nonewline $f A\x81\x81\x1A
    flush $f
9381
9382
9383
9384
9385
9386
9387

9388
9389
9390
9391
9392
9393
9394
9395
9396
9397
9398
9399
9400
9401
9402
9403























9404












9405
9406
9407
9408
9409
9410
9411
} -body {
    list [catch {read $chan 1} cres] $cres
} -cleanup {
    close $chan
    unset res
} -match glob -result {1 {error reading "*":\
    invalid or incomplete multibyte or wide character}}


test io-75.9 {unrepresentable character write passes and is replaced by ?} -setup {
    set fn [makeFile {} io-75.9]
    set f [open $fn w+]
    fconfigure $f -encoding iso8859-1 -profile strict
} -body {
    catch {puts -nonewline $f "A\u2022"} msg
    flush $f
    seek $f 0
    list [read $f] $msg
} -cleanup {
    close $f
    removeFile io-75.9
} -match glob -result [list {A} {error writing "*":\
    invalid or incomplete multibyte or wide character}]
























test io-75.10 {












    incomplete multibyte encoding read is not ignored because "binary" sets
    profile to strict
} -setup {
    set res {}
    set fn [makeFile {} io-75.10]
    set f [open $fn w+]
    fconfigure $f -encoding binary







>
















>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>







9506
9507
9508
9509
9510
9511
9512
9513
9514
9515
9516
9517
9518
9519
9520
9521
9522
9523
9524
9525
9526
9527
9528
9529
9530
9531
9532
9533
9534
9535
9536
9537
9538
9539
9540
9541
9542
9543
9544
9545
9546
9547
9548
9549
9550
9551
9552
9553
9554
9555
9556
9557
9558
9559
9560
9561
9562
9563
9564
9565
9566
9567
9568
9569
9570
9571
9572
} -body {
    list [catch {read $chan 1} cres] $cres
} -cleanup {
    close $chan
    unset res
} -match glob -result {1 {error reading "*":\
    invalid or incomplete multibyte or wide character}}


test io-75.9 {unrepresentable character write passes and is replaced by ?} -setup {
    set fn [makeFile {} io-75.9]
    set f [open $fn w+]
    fconfigure $f -encoding iso8859-1 -profile strict
} -body {
    catch {puts -nonewline $f "A\u2022"} msg
    flush $f
    seek $f 0
    list [read $f] $msg
} -cleanup {
    close $f
    removeFile io-75.9
} -match glob -result [list {A} {error writing "*":\
    invalid or incomplete multibyte or wide character}]

apply [list {} {
    set template {
	test {io-75.10 ${mode}} {
	    incomplete multibyte encoding read is an error
	} -setup {
	    set res {}
	    set fn [makeFile {} io-75.10]
	    set f [open $fn w+]
	    fconfigure $f -encoding binary
	    puts -nonewline $f A\xC0
	    flush $f
	    seek $f 0
	    fconfigure $f -encoding utf-8 -buffering none {*}${option}
	} -body {
	    set status [catch {read $f} cres copts]
	    set d [dict get $copts -result read]
	    close $f
	    binary scan $d H* hd
	    lappend res $hd
	    lappend res $status
	    lappend res $cres
	    return $res
	} -cleanup {
	    removeFile io-75.10
	} -match glob -result {41 1 {error reading "file*":\
		invalid or incomplete multibyte or wide character}}
    }
	# the default encoding mode is not currently strict
    #foreach mode {default strict} option {{} {-encodingstrict 1}} 
    foreach mode {{profile strict}} option {{-profile strict}} {
	set test [string map [
	    list {${mode}} [list $mode] {${option}} [list $option]] $template]
	uplevel $test
    }
} [namespace current]]
test {io-75.10 {profile tcl8}} {
    incomplete multibyte encoding read is not ignored because "binary" sets
    profile to strict
} -setup {
    set res {}
    set fn [makeFile {} io-75.10]
    set f [open $fn w+]
    fconfigure $f -encoding binary
9451
9452
9453
9454
9455
9456
9457





















9458















9459
9460
9461
9462
9463
9464
9465
9466
9467
9468
9469

9470
9471
9472
9473
9474
9475
9476
9477
9478
9479
9480
9481
9482
9483
9484
9485























9486
9487
9488
9489
9490
9491
9492
    lappend hd [catch {set d [read $f]} msg] $msg
} -cleanup {
    close $f
    removeFile io-75.11
} -match glob -result {41 1 {error reading "file*":\
    invalid or incomplete multibyte or wide character}}






















test io-75.12 {















    invalid utf-8 encoding read is not ignored because setting the encoding to
    "binary" also set the profile to strict
} -setup {
    set res {}
    set fn [makeFile {} io-75.12]
    set f [open $fn w+]
    fconfigure $f -encoding binary
    puts -nonewline $f A\x81
    flush $f
    seek $f 0
    fconfigure $f -encoding utf-8 -buffering none -eofchar {} -translation lf

} -body {
    catch {read $f} errmsg
    lappend res $errmsg
    chan configure $f -profile tcl8
    seek $f 0
    set d [read $f]
    binary scan $d H* hd
    lappend res $hd
    return $res
} -cleanup {
    close $f
    removeFile io-75.12
    unset res
} -match glob -result {{error reading "file*":\
    invalid or incomplete multibyte or wide character} 4181}
test io-75.13 {























    In nonblocking mode when there is an encoding error the data that has been
    successfully read so far is returned first and then the error is returned
    on the next call to [read].
} -setup {
    set fn [makeFile {} io-75.13]
    set f [open $fn w+]
    fconfigure $f -encoding binary







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
|








|
>
















>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







9612
9613
9614
9615
9616
9617
9618
9619
9620
9621
9622
9623
9624
9625
9626
9627
9628
9629
9630
9631
9632
9633
9634
9635
9636
9637
9638
9639
9640
9641
9642
9643
9644
9645
9646
9647
9648
9649
9650
9651
9652
9653
9654
9655
9656
9657
9658
9659
9660
9661
9662
9663
9664
9665
9666
9667
9668
9669
9670
9671
9672
9673
9674
9675
9676
9677
9678
9679
9680
9681
9682
9683
9684
9685
9686
9687
9688
9689
9690
9691
9692
9693
9694
9695
9696
9697
9698
9699
9700
9701
9702
9703
9704
9705
9706
9707
9708
9709
9710
9711
9712
9713
    lappend hd [catch {set d [read $f]} msg] $msg
} -cleanup {
    close $f
    removeFile io-75.11
} -match glob -result {41 1 {error reading "file*":\
    invalid or incomplete multibyte or wide character}}


apply [list {} {
    set template {
	test {io-75.12 ${mode}} {invalid utf-8 encoding read returns an error} -setup {
	    set res {}
	    set fn [makeFile {} io-75.12]
	    set f [open $fn w+]
	    fconfigure $f -encoding binary
	    puts -nonewline $f A\x81
	    flush $f
	    seek $f 0
	    fconfigure $f -encoding utf-8 -buffering none -eofchar {} \
		    -translation lf {*}${option}
	} -body {
	    set status [catch {read $f} cres copts]
	    set d [dict get $copts -result read]
	    close $f
	    binary scan $d H* hd
	    lappend res $hd $status $cres
	    return $res
	} -cleanup {
	    removeFile io-75.12
	} -match glob -result {41 1 {error reading "file*":\
		invalid or incomplete multibyte or wide character}}
    }

    # the default encoding mod is not currently strict
    #foreach mode {default strict} option {{} {-encodingstrict 1}}
    foreach mode {{profile strict}} option {{-profile strict}} {
	set test [string map [
	    list {${mode}} [list $mode] {${option}} [list $option]] $template]
	uplevel $test
    }
} [namespace current]]


test {io-75.12 {profile tcl8}} {
    invalid utf-8 encoding read, is not ignored because setting the encoding to
    "binary" also sets the profile to strict
} -setup {
    set res {}
    set fn [makeFile {} io-75.12]
    set f [open $fn w+]
    fconfigure $f -encoding binary
    puts -nonewline $f A\x81
    flush $f
    seek $f 0
    fconfigure $f -encoding utf-8 -buffering none -eofchar {} \
	-translation lf
} -body {
    catch {read $f} errmsg
    lappend res $errmsg
    chan configure $f -profile tcl8
    seek $f 0
    set d [read $f]
    binary scan $d H* hd
    lappend res $hd
    return $res
} -cleanup {
    close $f
    removeFile io-75.12
    unset res
} -match glob -result {{error reading "file*":\
    invalid or incomplete multibyte or wide character} 4181}
test io-75.13 {
    In blocking mode [read] produces an error and leaves the data succesfully
    read so far in the return options dictionary.
} -setup {
    set fn [makeFile {} io-75.13]
    set f [open $fn w+]
    fconfigure $f -encoding binary
    # \x81 is invalid in utf-8
    puts -nonewline $f A\x81
    flush $f
    seek $f 0
    fconfigure $f -encoding utf-8 -eofchar "" -translation lf -profile strict
} -body {
    set status [catch {read $f} cres copts]
    set d [dict get $copts -result read]
    binary scan $d H* hd
    lappend hd $status
    lappend hd $cres
} -cleanup {
    close $f
    removeFile io-75.13
} -match glob -result {41 1 {error reading "file*":\
	invalid or incomplete multibyte or wide character}}
test io-75.13.nonblocking {
    In nonblocking mode when there is an encoding error the data that has been
    successfully read so far is returned first and then the error is returned
    on the next call to [read].
} -setup {
    set fn [makeFile {} io-75.13]
    set f [open $fn w+]
    fconfigure $f -encoding binary
9559
9560
9561
9562
9563
9564
9565























































9566
9567
9568
9569
9570
9571
9572
	lappend res [gets $chan]
	lappend res [gets $chan]
    return $res
} -cleanup {
    close $chan
} -match glob -result {hello AB 1 {error reading "*": invalid or incomplete multibyte or wide character}\
    1 {error reading "*": invalid or incomplete multibyte or wide character} 43 44 c0 40 EF GHI}
























































# ### ### ### ######### ######### #########



test io-76.0 {channel modes} -setup {
    set datafile [makeFile {some characters} dummy]







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







9780
9781
9782
9783
9784
9785
9786
9787
9788
9789
9790
9791
9792
9793
9794
9795
9796
9797
9798
9799
9800
9801
9802
9803
9804
9805
9806
9807
9808
9809
9810
9811
9812
9813
9814
9815
9816
9817
9818
9819
9820
9821
9822
9823
9824
9825
9826
9827
9828
9829
9830
9831
9832
9833
9834
9835
9836
9837
9838
9839
9840
9841
9842
9843
9844
9845
9846
9847
9848
	lappend res [gets $chan]
	lappend res [gets $chan]
    return $res
} -cleanup {
    close $chan
} -match glob -result {hello AB 1 {error reading "*": invalid or incomplete multibyte or wide character}\
    1 {error reading "*": invalid or incomplete multibyte or wide character} 43 44 c0 40 EF GHI}

test io-75.14 {invalid utf-8 encoding [gets] coninues in non-strict mode after error} -setup {
    set res {}
    set fn [makeFile {} io-75.14]
    set f [open $fn w+]
    fconfigure $f -encoding binary
    # \xc0 is invalid in utf-8
    puts -nonewline $f a\nb\xc0\nc\n
    flush $f
    seek $f 0
    fconfigure $f -encoding utf-8 -buffering none -eofchar {} -translation lf -profile strict
} -body {
    lappend res [gets $f]
    set status [catch {gets $f} cres copts]
    lappend res $status $cres
    chan configure $f -profile tcl8 
    lappend res [gets $f]
    lappend res [gets $f]
    close $f
    return $res
} -cleanup {
    removeFile io-75.14
} -match glob -result {a 1 {error reading "file*":\
	invalid or incomplete multibyte or wide character} bÀ c}


test io-75.15 {invalid utf-8 encoding strict gets should not hang} -setup {
    set res {}
    set fn [makeFile {} io-75.15]
    set chan [open $fn w+]
    fconfigure $chan -encoding binary
    # This is not valid UTF-8
    puts $chan hello\nAB\xc0\x40CD\nEFG
    close $chan
} -body {
    #Now try to read it with [gets]
    set chan [open $fn]
    fconfigure $chan -encoding utf-8 -profile strict
    lappend res [gets $chan]
    set status [catch {gets $chan} cres copts]
    lappend res $status $cres
    set status [catch {gets $chan} cres copts]
    lappend res $status $cres
	lappend res [dict get $copts -result]
	chan configure $chan -encoding binary
	foreach char [split [read $chan 2] {}] {
	    lappend res [format %x [scan $char %c]]
	}
    return $res
} -cleanup {
    close $chan
    removeFile io-75.15
} -match glob -result {hello 1 {error reading "file*":\
	invalid or incomplete multibyte or wide character} 1 {error reading "file*":\
	invalid or incomplete multibyte or wide character} {read AB} 41 42}

# ### ### ### ######### ######### #########



test io-76.0 {channel modes} -setup {
    set datafile [makeFile {some characters} dummy]

Changes to tests/ioCmd.test.

1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
    note [fconfigure $c]
    close $c
    rename foo {}
    set res
} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding * -eofchar {} -profile * -translation {auto *}}}
test iocmd-25.2 {chan configure, cgetall, no options} -match glob -body {
    set res {}
    proc foo args {oninit cget cgetall; onfinal; track; return ""}
    set c [chan create {r w} foo]
    note [fconfigure $c]
    close $c
    rename foo {}
    set res
} -result {{cgetall rc*} {-blocking 1 -buffering full -buffersize 4096 -encoding * -eofchar {} -profile * -translation {auto *}}}
test iocmd-25.3 {chan configure, cgetall, regular result} -match glob -body {







|







1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
    note [fconfigure $c]
    close $c
    rename foo {}
    set res
} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding * -eofchar {} -profile * -translation {auto *}}}
test iocmd-25.2 {chan configure, cgetall, no options} -match glob -body {
    set res {}
    proc foo args {oninit cget cgetall; onfinal; track; return {}}
    set c [chan create {r w} foo]
    note [fconfigure $c]
    close $c
    rename foo {}
    set res
} -result {{cgetall rc*} {-blocking 1 -buffering full -buffersize 4096 -encoding * -eofchar {} -profile * -translation {auto *}}}
test iocmd-25.3 {chan configure, cgetall, regular result} -match glob -body {