Index: doc/FileSystem.3 ================================================================== --- doc/FileSystem.3 +++ doc/FileSystem.3 @@ -200,10 +200,13 @@ .AP int recursive in Whether to remove subdirectories and their contents as well. .AP "const char" *encodingName in The encoding of the data stored in the file identified by \fIpathPtr\fR and to be evaluated. +Can also be set to \fBTCL_ENCODING_UTF8_STRICT\fR, +\fBTCL_ENCODING_UTF8_REPLACE\fR or \fBTCL_ENCODING_UTF8_TCL8\fR, +which selects `utf-8` in combination with one of the 3 possible profiles. .AP "const char" *pattern in Only files or directories matching this pattern will be returned. .AP Tcl_GlobTypeData *types in Only files or directories matching the type descriptions contained in this structure will be returned. This parameter may be NULL. @@ -421,15 +424,18 @@ function and asks them to return their list of root volumes. It accumulates the return values in a list which is returned to the caller (with a reference count of 0). .PP \fBTcl_FSEvalFileEx\fR reads the file given by \fIpathPtr\fR using -the encoding identified by \fIencodingName\fR and evaluates +the encoding/profile identified by \fIencodingName\fR and evaluates its contents as a Tcl script. It returns the same information as \fBTcl_EvalObjEx\fR. -If \fIencodingName\fR is NULL, the utf-8 encoding is used for -reading the file contents. +If \fIencodingName\fR is NULL, the utf-8 encoding and the +strict profile is used for reading the file contents. +If \fIencodingName\fR is set to \fBTCL_ENCODING_UTF8_STRICT\fR, +\fBTCL_ENCODING_UTF8_REPLACE\fR or \fBTCL_ENCODING_UTF8_TCL8\fR, the +profile is set to the given value, the encoding is utf-8. If the file could not be read then a Tcl error is returned to describe why the file could not be read. The eofchar for files is .QW \ex1A (^Z) for all platforms. Index: doc/Tcl_Main.3 ================================================================== --- doc/Tcl_Main.3 +++ doc/Tcl_Main.3 @@ -43,11 +43,13 @@ Address of an application-specific initialization procedure. The value for this argument is usually \fBTcl_AppInit\fR. .AP Tcl_Obj *path in Name of file to use as startup script, or NULL. .AP "const char" *encoding in -Encoding of file to use as startup script, or NULL. +Encoding of file to use as startup script, or NULL, or +\fBTCL_ENCODING_UTF8_STRICT\fR, \fBTCL_ENCODING_UTF8_REPLACE\fR +or \fBTCL_ENCODING_UTF8_TCL8\fR. .AP "const char" **encodingPtr out If non-NULL, location to write a copy of the (const char *) pointing to the encoding name. .AP Tcl_MainLoopProc *mainLoopProc in Address of an application-specific event loop procedure. Index: doc/open.n ================================================================== --- doc/open.n +++ doc/open.n @@ -58,11 +58,12 @@ \fBfconfigure\fR \fB\-translation binary\fR option, making the channel suitable for reading or writing of binary data. .PP In the second form, \fIaccess\fR consists of a list of any of the following flags, most of which have the standard POSIX meanings. -One of the flags must be either \fBRDONLY\fR, \fBWRONLY\fR or \fBRDWR\fR. +If none of \fBRDONLY\fR, \fBWRONLY\fR or \fBRDWR\fR are specified, +\fBRDONLY\fR is the default. .IP \fBRDONLY\fR Open the file for reading only. .IP \fBWRONLY\fR Open the file for writing only. .IP \fBRDWR\fR Index: doc/source.n ================================================================== --- doc/source.n +++ doc/source.n @@ -45,10 +45,13 @@ unicode encodings (utf-8, utf-16, ucs-2). .PP The \fB\-encoding\fR option is used to specify the encoding of the data stored in \fIfileName\fR. When the \fB\-encoding\fR option is omitted, the utf-8 encoding is assumed. +.PP +The \fB\-profile\fR option is used to specify the profile. Can be +"tcl8", "replace" or "strict". .SH EXAMPLE .PP Run the script in the file \fBfoo.tcl\fR and then the script in the file \fBbar.tcl\fR: .PP Index: generic/tcl.h ================================================================== --- generic/tcl.h +++ generic/tcl.h @@ -2028,10 +2028,14 @@ * necessary. */ #define TCL_ENCODING_PROFILE_STRICT TCL_ENCODING_STOPONERROR #define TCL_ENCODING_PROFILE_TCL8 0x01000000 #define TCL_ENCODING_PROFILE_REPLACE 0x02000000 + +#define TCL_ENCODING_UTF8_STRICT ((const char *)-1) +#define TCL_ENCODING_UTF8_REPLACE ((const char *)-2) +#define TCL_ENCODING_UTF8_TCL8 ((const char *)-3) /* * The following definitions are the error codes returned by the conversion * routines: * Index: generic/tclCmdMZ.c ================================================================== --- generic/tclCmdMZ.c +++ generic/tclCmdMZ.c @@ -1108,27 +1108,42 @@ int result; void **pkgFiles = NULL; void *names = NULL; if (objc < 2 || objc > 4) { - Tcl_WrongNumArgs(interp, 1, objv, "?-encoding encoding? fileName"); + Tcl_WrongNumArgs(interp, 1, objv, "?-encoding encoding|-profile profile? fileName"); return TCL_ERROR; } fileName = objv[objc-1]; if (objc == 4) { static const char *const options[] = { - "-encoding", NULL + "-encoding", "-profile", NULL }; int index; if (TCL_ERROR == Tcl_GetIndexFromObj(interp, objv[1], options, "option", TCL_EXACT, &index)) { return TCL_ERROR; } + if (index) { + int id; + result = TclEncodingProfileNameToId(interp, TclGetString(objv[2]), &id); + if (result != TCL_OK) { + return TCL_ERROR; + } + if (id == TCL_ENCODING_PROFILE_TCL8) { + encodingName = TCL_ENCODING_UTF8_TCL8; + } else if (id == TCL_ENCODING_PROFILE_REPLACE) { + encodingName = TCL_ENCODING_UTF8_REPLACE; + } else { + encodingName = NULL; + } + } else { encodingName = TclGetString(objv[2]); + } } else if (objc == 3) { /* Handle undocumented -nopkg option. This should only be * used by the internal ::tcl::Pkg::source utility function. */ static const char *const nopkgoptions[] = { "-nopkg", NULL Index: generic/tclIOCmd.c ================================================================== --- generic/tclIOCmd.c +++ generic/tclIOCmd.c @@ -1161,12 +1161,18 @@ default: Tcl_Panic("Tcl_OpenCmd: invalid mode value"); break; } chan = Tcl_OpenCommandChannel(interp, cmdObjc, cmdArgv, flags); - if ((modeFlags & CHANNEL_RAW_MODE) && chan) { - Tcl_SetChannelOption(interp, chan, "-translation", "binary"); + if (chan) { + if (modeFlags & CHANNEL_RAW_MODE) { + Tcl_SetChannelOption(interp, chan, "-translation", "binary"); + } else if (ENCODING_PROFILE_GET(modeFlags) == TCL_ENCODING_PROFILE_TCL8) { + Tcl_SetChannelOption(interp, chan, "-profile", "tcl8"); + } else if (ENCODING_PROFILE_GET(modeFlags) == TCL_ENCODING_PROFILE_REPLACE) { + Tcl_SetChannelOption(interp, chan, "-profile", "replace"); + } } } Tcl_Free((void *)cmdArgv); } if (chan == NULL) { Index: generic/tclIOUtil.c ================================================================== --- generic/tclIOUtil.c +++ generic/tclIOUtil.c @@ -1423,15 +1423,17 @@ * * Results: * The mode to pass to "open", or -1 if an error occurs. * * Side effects: - * Sets *modeFlagsPtr to 1 to tell the caller to - * seek to EOF after opening the file, or to 0 otherwise. + * Sets *modeFlagsPtr to the expected profile. 0 is the default. * - * Adds CHANNEL_RAW_MODE to *modeFlagsPtr to tell the caller - * to configure the channel as a binary channel. + * Adds 1 to *modeFlagsPtr to tell the caller to seek to EOF + * after opening the file. + * + * Adds 2 to *modeFlagsPtr to tell the caller to configure the + * channel as a binary channel. * * If there is an error and interp is not NULL, sets * interpreter result to an error message. * * Special note: @@ -1623,35 +1625,47 @@ if (mode & O_TRUNC) { goto accessFlagRepeated; } mode |= O_TRUNC; } else if ((c == 'B') && (strcmp(flag, "BINARY") == 0)) { - if (*modeFlagsPtr & CHANNEL_RAW_MODE) { - goto accessFlagRepeated; + if (*modeFlagsPtr & (ENCODING_PROFILE_MASK|CHANNEL_RAW_MODE)) { + goto invAccess; } *modeFlagsPtr |= CHANNEL_RAW_MODE; + } else if ((c == 'T') && (strcmp(flag, "TCL8") == 0)) { + if (*modeFlagsPtr & (ENCODING_PROFILE_MASK|CHANNEL_RAW_MODE)) { + invAccess: + if (interp != NULL) { + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "invalid access mode \"%s\": modes BINARY, " + "REPLACE, STRICT, and TCL8 cannot be combined", flag)); + } + goto invAccessMode; + } + *modeFlagsPtr |= TCL_ENCODING_PROFILE_TCL8; + } else if ((c == 'S') && (strcmp(flag, "STRICT") == 0)) { + if (*modeFlagsPtr & (ENCODING_PROFILE_MASK|CHANNEL_RAW_MODE)) { + goto invAccess; + } + *modeFlagsPtr |= TCL_ENCODING_PROFILE_STRICT; + } else if ((c == 'R') && (strcmp(flag, "REPLACE") == 0)) { + if (*modeFlagsPtr & (ENCODING_PROFILE_MASK|CHANNEL_RAW_MODE)) { + goto invAccess; + } + *modeFlagsPtr |= TCL_ENCODING_PROFILE_REPLACE; } else { if (interp != NULL) { Tcl_SetObjResult(interp, Tcl_ObjPrintf( "invalid access mode \"%s\": must be APPEND, BINARY, " "CREAT, EXCL, NOCTTY, NONBLOCK, RDONLY, RDWR, " - "TRUNC, or WRONLY", flag)); + "REPLACE, STRICT, TCL8, TRUNC, or WRONLY", flag)); } goto invAccessMode; } } Tcl_Free((void *)modeArgv); - - if (!gotRW) { - if (interp != NULL) { - Tcl_SetObjResult(interp, Tcl_NewStringObj( - "access mode must include either RDONLY, RDWR, or WRONLY", - -1)); - } - return -1; - } return mode; } /* *---------------------------------------------------------------------- @@ -1691,11 +1705,13 @@ Tcl_Interp *interp, /* Interpreter that evaluates the script. */ Tcl_Obj *pathPtr, /* Pathname of the file to process. * Tilde-substitution is performed on this * pathname. */ const char *encodingName) /* Either the name of an encoding or NULL to - use the utf-8 encoding. */ + use the utf-8 encoding. May also be TCL_ENCODING_UTF8_STRICT, + TCL_ENCODING_UTF8_REPLACE, or TCL_ENCODING_UTF8_TCL8, + for specifying the profile. */ { Tcl_Size length; int result = TCL_ERROR; Tcl_StatBuf statBuf; Tcl_Obj *oldScriptFile; @@ -1733,11 +1749,18 @@ /* * If the encoding is specified, set the channel to that encoding. * Otherwise use utf-8. If the encoding is unknown report an error. */ - if (encodingName == NULL) { + if (encodingName == NULL || encodingName == TCL_ENCODING_UTF8_STRICT) { + goto utf8; + } else if (encodingName == TCL_ENCODING_UTF8_REPLACE) { + Tcl_SetChannelOption(interp, chan, "-profile", "replace"); + goto utf8; + } else if (encodingName == TCL_ENCODING_UTF8_TCL8) { + Tcl_SetChannelOption(interp, chan, "-profile", "tcl8"); + utf8: encodingName = "utf-8"; } if (Tcl_SetChannelOption(interp, chan, "-encoding", encodingName) != TCL_OK) { Tcl_CloseEx(interp,chan,0); @@ -1869,11 +1892,18 @@ /* * If the encoding is specified, set the channel to that encoding. * Otherwise use utf-8. If the encoding is unknown report an error. */ - if (encodingName == NULL) { + if (encodingName == NULL || encodingName == TCL_ENCODING_UTF8_STRICT) { + goto utf8; + } else if (encodingName == TCL_ENCODING_UTF8_REPLACE) { + Tcl_SetChannelOption(interp, chan, "-profile", "replace"); + goto utf8; + } else if (encodingName == TCL_ENCODING_UTF8_TCL8) { + Tcl_SetChannelOption(interp, chan, "-profile", "tcl8"); + utf8: encodingName = "utf-8"; } if (Tcl_SetChannelOption(interp, chan, "-encoding", encodingName) != TCL_OK) { Tcl_CloseEx(interp, chan, 0); @@ -2246,10 +2276,14 @@ Tcl_CloseEx(NULL, retVal, 0); return NULL; } if (modeFlags & CHANNEL_RAW_MODE) { Tcl_SetChannelOption(interp, retVal, "-translation", "binary"); + } else if (ENCODING_PROFILE_GET(modeFlags) == TCL_ENCODING_PROFILE_TCL8) { + Tcl_SetChannelOption(interp, retVal, "-profile", "tcl8"); + } else if (ENCODING_PROFILE_GET(modeFlags) == TCL_ENCODING_PROFILE_REPLACE) { + Tcl_SetChannelOption(interp, retVal, "-profile", "replace"); } return retVal; } /* Index: generic/tclInt.h ================================================================== --- generic/tclInt.h +++ generic/tclInt.h @@ -3346,11 +3346,11 @@ MODULE_SCOPE Tcl_Command TclNRCreateCommandInNs(Tcl_Interp *interp, const char *cmdName, Tcl_Namespace *nsPtr, Tcl_ObjCmdProc *proc, Tcl_ObjCmdProc *nreProc, void *clientData, Tcl_CmdDeleteProc *deleteProc); MODULE_SCOPE int TclNREvalFile(Tcl_Interp *interp, Tcl_Obj *pathPtr, - const char *encodingName); + const char *encoding); MODULE_SCOPE int * TclGetAsyncReadyPtr(void); MODULE_SCOPE Tcl_Obj * TclGetBgErrorHandler(Tcl_Interp *interp); MODULE_SCOPE int TclGetChannelFromObj(Tcl_Interp *interp, Tcl_Obj *objPtr, Tcl_Channel *chanPtr, int *modePtr, int flags); Index: generic/tclMain.c ================================================================== --- generic/tclMain.c +++ generic/tclMain.c @@ -72,11 +72,11 @@ typedef struct { Tcl_Obj *path; /* The filename of the script for *_Main() * routines to [source] as a startup script, * or NULL for none set, meaning enter * interactive mode. */ - Tcl_Obj *encoding; /* The encoding of the startup script file. */ + Tcl_Obj *encoding; /* The encoding or profile of the startup script file. */ Tcl_MainLoopProc *mainLoopProc; /* Any installed main loop handler. The main * extension that installs these is Tk. */ } ThreadSpecificData; @@ -131,21 +131,26 @@ * Side effects: * *---------------------------------------------------------------------- */ +#define IS_ENCODING(encoding) ((encoding) && (((encoding) < TCL_ENCODING_UTF8_TCL8) \ + || ((encoding) > TCL_ENCODING_UTF8_STRICT))) + void Tcl_SetStartupScript( Tcl_Obj *path, /* Filesystem path of startup script file */ - const char *encodingName) /* Encoding of the data in that file */ + const char *encoding) /* Encoding of the data in that file */ { ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey); - Tcl_Obj *encodingObj = NULL; + Tcl_Obj *encodingObj; - if (encodingName != NULL) { - encodingObj = Tcl_NewStringObj(encodingName, -1); + if (IS_ENCODING(encoding)) { + encodingObj = Tcl_NewStringObj(encoding, -1); Tcl_IncrRefCount(encodingObj); + } else { + encodingObj = (Tcl_Obj *)encoding; } if (path != NULL) { Tcl_IncrRefCount(path); } @@ -152,11 +157,11 @@ if (tsdPtr->path != NULL) { Tcl_DecrRefCount(tsdPtr->path); } tsdPtr->path = path; - if (tsdPtr->encoding != NULL) { + if (IS_ENCODING((const char *)tsdPtr->encoding)) { Tcl_DecrRefCount(tsdPtr->encoding); } tsdPtr->encoding = encodingObj; } @@ -170,32 +175,31 @@ * * Results: * The path of the startup script; NULL if none has been set. * * Side effects: - * If encodingPtr is not NULL, stores a (const char *) in it pointing to - * the encoding name registered for the startup script. Tcl retains - * ownership of the string, and may free it. Caller should make a copy - * for long-term use. + * If encodingPtr is not NULL, stores a (const char *) in it pointing + * to the encoding name or profile registered for the startup script. + * Tcl retains ownership of the string, and may free it. Caller + * should make a copy for long-term use. * *---------------------------------------------------------------------- */ Tcl_Obj * Tcl_GetStartupScript( - const char **encodingPtr) /* When not NULL, points to storage for the - * (const char *) that points to the - * registered encoding name for the startup - * script. */ + const char **encodingPtr) /* When not NULL or TCL_ENCODING_UTF8_????, + * points to storage for the (const char *) that points to + * the registered encoding name for the startup script. */ { ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey); if (encodingPtr != NULL) { - if (tsdPtr->encoding != NULL) { + if (IS_ENCODING((const char *)tsdPtr->encoding)) { *encodingPtr = Tcl_GetString(tsdPtr->encoding); } else { - *encodingPtr = NULL; + *encodingPtr = (const char *)tsdPtr->encoding; } } return tsdPtr->path; } @@ -325,10 +329,21 @@ && ('-' != argv[3][0])) { Tcl_Obj *value = NewNativeObj(argv[2]); Tcl_SetStartupScript(NewNativeObj(argv[3]), Tcl_GetString(value)); Tcl_DecrRefCount(value); + argc -= 3; + i += 3; + } else if ((argc >= 3) && (0 == _tcscmp(TEXT("-profile"), argv[1])) + && ('-' != argv[3][0])) { + if (0 == _tcscmp(TEXT("tcl8"), argv[2])) { + Tcl_SetStartupScript(NewNativeObj(argv[3]), TCL_ENCODING_UTF8_TCL8); + } else if (0 == _tcscmp(TEXT("replace"), argv[2])) { + Tcl_SetStartupScript(NewNativeObj(argv[3]), TCL_ENCODING_UTF8_REPLACE); + } else { + Tcl_SetStartupScript(NewNativeObj(argv[3]), NULL); + } argc -= 3; i += 3; } else if ((argc >= 1) && ('-' != argv[1][0])) { Tcl_SetStartupScript(NewNativeObj(argv[1]), NULL); argc--; Index: tests/ioCmd.test ================================================================== --- tests/ioCmd.test +++ tests/ioCmd.test @@ -474,18 +474,18 @@ while processing open access modes \"FOO {BAR BAZ\" invoked from within \"open \$path(test3) \"FOO \\{BAR BAZ\"\"" test iocmd-12.7 {POSIX open access modes: errors} { list [catch {open $path(test3) {FOO BAR BAZ}} msg] $msg -} {1 {invalid access mode "FOO": must be APPEND, BINARY, CREAT, EXCL, NOCTTY, NONBLOCK, RDONLY, RDWR, TRUNC, or WRONLY}} +} {1 {invalid access mode "FOO": must be APPEND, BINARY, CREAT, EXCL, NOCTTY, NONBLOCK, RDONLY, RDWR, REPLACE, STRICT, TCL8, TRUNC, or WRONLY}} test iocmd-12.8 {POSIX open access modes: errors} { - list [catch {open $path(test3) {TRUNC CREAT}} msg] $msg -} {1 {access mode must include either RDONLY, RDWR, or WRONLY}} + list [catch {open $path(test3) {BINARY STRICT}} msg] $msg +} {1 {invalid access mode "STRICT": modes BINARY, REPLACE, STRICT, and TCL8 cannot be combined}} close [open $path(test3) w] test iocmd-12.9 {POSIX open access modes: BINARY} { - list [catch {open $path(test1) BINARY} msg] $msg -} {1 {access mode must include either RDONLY, RDWR, or WRONLY}} + list [catch {open $path(test1) {RDWR WRONLY}} msg] $msg +} {1 {invalid access mode "WRONLY": modes RDONLY, RDWR, and WRONLY cannot be combined}} test iocmd-12.10 {POSIX open access modes: BINARY} { set f [open $path(test1) {WRONLY BINARY TRUNC}] puts $f a puts $f b puts -nonewline $f c ;# contents are now 5 bytes: a\nb\nc @@ -516,14 +516,14 @@ test iocmd-12.12 {POSIX open access modes: errors} { list [catch {open $path(test3) {RDWR WRONLY}} msg] $msg } {1 {invalid access mode "WRONLY": modes RDONLY, RDWR, and WRONLY cannot be combined}} test iocmd-12.13 {POSIX open access modes: errors} { list [catch {open $path(test3) {BINARY BINARY}} msg] $msg -} {1 {access mode "BINARY" repeated}} +} {1 {invalid access mode "BINARY": modes BINARY, REPLACE, STRICT, and TCL8 cannot be combined}} test iocmd-12.14 {POSIX open access modes: errors} { - list [catch {open $path(test3) {TRUNC}} msg] $msg -} {1 {access mode must include either RDONLY, RDWR, or WRONLY}} + list [catch {open $path(test3) {TRUNC TRUNC}} msg] $msg +} {1 {access mode "TRUNC" repeated}} test iocmd-13.1 {errors in open command} { list [catch {open} msg] $msg } {1 {wrong # args: should be "open fileName ?access? ?permissions?"}} test iocmd-13.2 {errors in open command} {