Index: doc/FileSystem.3 ================================================================== --- doc/FileSystem.3 +++ doc/FileSystem.3 @@ -412,11 +412,11 @@ .PP \fBTcl_FSEvalFileEx\fR reads the file given by \fIpathPtr\fR using the encoding identified by \fIencodingName\fR and evaluates its contents as a Tcl script. It returns the same information as \fBTcl_EvalObjEx\fR. -If \fIencodingName\fR is NULL, the system encoding is used for +If \fIencodingName\fR is NULL, the utf-8 encoding is used for reading the file contents. If the file could not be read then a Tcl error is returned to describe why the file could not be read. The eofchar for files is .QW \e32 @@ -428,11 +428,11 @@ or .QW \eu001a , which will be safely substituted by the Tcl interpreter into .QW ^Z . \fBTcl_FSEvalFile\fR is a simpler version of -\fBTcl_FSEvalFileEx\fR that always uses the system encoding +\fBTcl_FSEvalFileEx\fR that always uses the utf-8 encoding when reading the file. .PP \fBTcl_FSLoadFile\fR dynamically loads a binary code file into memory and returns the addresses of two procedures within that file, if they are defined. The appropriate function for the filesystem to which \fIpathPtr\fR Index: doc/InitSubSyst.3 ================================================================== --- doc/InitSubSyst.3 +++ doc/InitSubSyst.3 @@ -21,11 +21,11 @@ library. This procedure is typically invoked as the very first thing in the application's main program. .PP \fBTcl_InitSubsystems\fR is very similar in use to \fBTcl_FindExecutable\fR. It can be used when Tcl is -used as utility library, no other encodings than utf8, +used as utility library, no other encodings than utf-8, iso8859-1 or unicode are used, and no interest exists in the value of \fBinfo nameofexecutable\fR. The system encoding will not -be extracted from the environment, but falls back to iso8859-1. +be extracted from the environment, but falls back to utf-8. .SH KEYWORDS binary, executable file Index: doc/source.n ================================================================== --- doc/source.n +++ doc/source.n @@ -45,11 +45,11 @@ .PP A leading BOM (Byte order mark) contained in the file is ignored for unicode encodings (utf-8, utf-16, ucs-2). .PP The \fB\-encoding\fR option is used to specify the encoding of the data stored in \fIfileName\fR. When the \fB\-encoding\fR option -is omitted, the system encoding is assumed. +is omitted, the utf-8 encoding is assumed. .SH EXAMPLE .PP Run the script in the file \fBfoo.tcl\fR and then the script in the file \fBbar.tcl\fR: .PP Index: generic/tclIOUtil.c ================================================================== --- generic/tclIOUtil.c +++ generic/tclIOUtil.c @@ -1682,11 +1682,11 @@ Tcl_Interp *interp, /* Interpreter that evaluates the script. */ Tcl_Obj *pathPtr, /* Pathname of the file to process. * Tilde-substitution is performed on this * pathname. */ const char *encodingName) /* Either the name of an encoding or NULL to - use the system encoding. */ + use the utf-8 encoding. */ { size_t length; int result = TCL_ERROR; Tcl_StatBuf statBuf; Tcl_Obj *oldScriptFile; @@ -1721,20 +1721,20 @@ Tcl_SetChannelOption(interp, chan, "-eofchar", "\32 {}"); /* * If the encoding is specified, set the channel to that encoding. - * Otherwise don't touch it, leaving things up to the system encoding. If - * the encoding is unknown report an error. + * Otherwise use utf-8. If the encoding is unknown report an error. */ - if (encodingName != NULL) { - if (Tcl_SetChannelOption(interp, chan, "-encoding", encodingName) - != TCL_OK) { - Tcl_CloseEx(interp,chan,0); - return result; - } + if (encodingName == NULL) { + encodingName = "utf-8"; + } + if (Tcl_SetChannelOption(interp, chan, "-encoding", encodingName) + != TCL_OK) { + Tcl_CloseEx(interp,chan,0); + return result; } TclNewObj(objPtr); Tcl_IncrRefCount(objPtr); @@ -1820,11 +1820,11 @@ Tcl_Interp *interp, /* Interpreter in which to evaluate the script. */ Tcl_Obj *pathPtr, /* Pathname of a file containing the script to * evaluate. Tilde-substitution is performed on * this pathname. */ const char *encodingName) /* The name of an encoding to use, or NULL to - * use the system encoding. */ + * use the utf-8 encoding. */ { Tcl_StatBuf statBuf; Tcl_Obj *oldScriptFile, *objPtr; Interp *iPtr; Tcl_Channel chan; @@ -1857,20 +1857,20 @@ Tcl_SetChannelOption(interp, chan, "-eofchar", "\32 {}"); /* * If the encoding is specified, set the channel to that encoding. - * Otherwise don't touch it, leaving things up to the system encoding. If - * the encoding is unknown report an error. + * Otherwise use utf-8. If the encoding is unknown report an error. */ - if (encodingName != NULL) { - if (Tcl_SetChannelOption(interp, chan, "-encoding", encodingName) - != TCL_OK) { - Tcl_CloseEx(interp, chan, 0); - return TCL_ERROR; - } + if (encodingName == NULL) { + encodingName = "utf-8"; + } + if (Tcl_SetChannelOption(interp, chan, "-encoding", encodingName) + != TCL_OK) { + Tcl_CloseEx(interp, chan, 0); + return TCL_ERROR; } TclNewObj(objPtr); Tcl_IncrRefCount(objPtr); Index: library/auto.tcl ================================================================== --- library/auto.tcl +++ library/auto.tcl @@ -263,11 +263,11 @@ } } auto_mkindex_parser::cleanup set fid [open "tclIndex" w] - fconfigure $fid -translation lf + fconfigure $fid -encoding utf-8 -translation lf puts -nonewline $fid $index close $fid cd $oldDir } @@ -290,11 +290,11 @@ } foreach file [lsort [glob -- {*}$args]] { set f "" set error [catch { set f [open $file] - fconfigure $f -eofchar \032 + fconfigure $f -encoding utf-8 -eofchar \032 while {[gets $f line] >= 0} { if {[regexp {^proc[ ]+([^ ]*)} $line match procName]} { set procName [lindex [auto_qualify $procName "::"] 0] append index "set [list auto_index($procName)]" append index " \[list source \[file join \$dir [list $file]\]\]\n" @@ -309,11 +309,11 @@ } } set f "" set error [catch { set f [open tclIndex w] - fconfigure $f -translation lf + fconfigure $f -encoding utf-8 -translation lf puts -nonewline $f $index close $f cd $oldDir } msg opts] if {$error} { @@ -402,11 +402,11 @@ variable imports set scriptFile $file set fid [open $file] - fconfigure $fid -eofchar \032 + fconfigure $fid -encoding utf-8 -eofchar \032 set contents [read $fid] close $fid # There is one problem with sourcing files into the safe interpreter: # references like "$x" will fail since code is not really being executed Index: library/clock.tcl ================================================================== --- library/clock.tcl +++ library/clock.tcl @@ -3312,11 +3312,11 @@ return -code error \ -errorcode [list CLOCK badTimeZone $:fileName] \ "time zone \":$fileName\" not valid" } try { - source -encoding utf-8 [file join $DataDir $fileName] + source [file join $DataDir $fileName] } on error {} { return -code error \ -errorcode [list CLOCK badTimeZone :$fileName] \ "time zone \":$fileName\" not found" } Index: library/init.tcl ================================================================== --- library/init.tcl +++ library/init.tcl @@ -110,11 +110,11 @@ # Auto-loading stubs for 'clock.tcl' foreach cmd {add format scan} { proc ::tcl::clock::$cmd args { variable TclLibDir - source -encoding utf-8 [file join $TclLibDir clock.tcl] + source [file join $TclLibDir clock.tcl] return [uplevel 1 [info level 0]] } } rename ::tcl::initClock {} Index: library/install.tcl ================================================================== --- library/install.tcl +++ library/install.tcl @@ -33,11 +33,11 @@ set version [lindex [split $fname -] 1] ### # Read the file, and override assumptions as needed ### set fin [open $file r] - fconfigure $fin -eofchar \032 + fconfigure $fin -encoding utf-8 -eofchar \032 set dat [read $fin] close $fin # Look for a teapot style Package statement foreach line [split $dat \n] { set line [string trim $line] @@ -57,11 +57,11 @@ append buffer "package ifneeded $package $version \[list source \[file join \$dir [file tail $file]\]\]" \n } foreach file [glob -nocomplain $path/*.tcl] { if { [file tail $file] == "version_info.tcl" } continue set fin [open $file r] - fconfigure $fin -eofchar \032 + fconfigure $fin -encoding utf-8 -eofchar \032 set dat [read $fin] close $fin if {![regexp "package provide" $dat]} continue set fname [file rootname [file tail $file]] # Look for a package provide statement @@ -77,11 +77,11 @@ } } return $buffer } set fin [open $pkgidxfile r] - fconfigure $fin -eofchar \032 + fconfigure $fin -encoding utf-8 -eofchar \032 set dat [read $fin] close $fin set trace 0 #if {[file tail $path] eq "tool"} { # set trace 1 Index: library/package.tcl ================================================================== --- library/package.tcl +++ library/package.tcl @@ -407,11 +407,11 @@ } append index "\n[eval $cmd]" } set f [open [file join $dir pkgIndex.tcl] w] - fconfigure $f -translation lf + fconfigure $f -encoding utf-8 -translation lf puts $f $index close $f } # tclPkgSetup -- Index: library/safe.tcl ================================================================== --- library/safe.tcl +++ library/safe.tcl @@ -935,11 +935,11 @@ Log $child "attempt to use the identity encoding" return -code error "permission denied" } } else { set at 0 - set encoding {} + set encoding utf-8 } if {$argc != 1} { set msg "wrong # args: should be \"source ?-encoding E? fileName\"" Log $child "$msg ($args)" return -code error $msg @@ -978,14 +978,11 @@ # doesn't leak so much. [Bug 2913625] set old [::interp eval $child {info script}] set replacementMsg "script error" set code [catch { set f [open $realfile] - fconfigure $f -eofchar \032 - if {$encoding ne ""} { - fconfigure $f -encoding $encoding - } + fconfigure $f -encoding $encoding -eofchar \032 set contents [read $f] close $f ::interp eval $child [list info script $file] } msg opt] if {$code == 0} { Index: library/tm.tcl ================================================================== --- library/tm.tcl +++ library/tm.tcl @@ -265,11 +265,11 @@ # # Right now LOCATE's implementation assumes that the path # of the package file is the last element in the list. package ifneeded $pkgname $pkgversion \ - "[::list package provide $pkgname $pkgversion];[::list source -encoding utf-8 $file]" + "[::list package provide $pkgname $pkgversion];[::list source $file]" # We abort in this unknown handler only if we got a # satisfying candidate for the requested package. # Otherwise we still have to fallback to the regular # package search to complete the processing. Index: tests/source.test ================================================================== --- tests/source.test +++ tests/source.test @@ -112,11 +112,11 @@ set out [open $sourcefile w] fconfigure $out -encoding utf-8 puts $out "\ufeffset y new-y" close $out set y old-y - source -encoding utf-8 $sourcefile + source $sourcefile return $y } -cleanup { removeFile $sourcefile } -result {new-y} @@ -224,11 +224,11 @@ fconfigure $f -encoding utf-8 puts $f "set symbol(square-root) \u221A; set x correct" close $f } -body { set x unset - source -encoding utf-8 $sourcefile + source $sourcefile set x } -cleanup { removeFile source.file } -result correct test source-7.2 {source -encoding test} -setup { @@ -267,11 +267,11 @@ set f [open $sourcefile w] fconfigure $f -encoding utf-8 puts $f "proc \u20ac {} {return foo}" close $f } -body { - source -encoding utf-8 $sourcefile + source $sourcefile \u20ac } -cleanup { removeFile source.file rename \u20ac {} } -result foo Index: unix/README ================================================================== --- unix/README +++ unix/README @@ -89,11 +89,11 @@ --enable-dtrace Enable tcl DTrace provider (if DTrace is available on the platform), c.f. tclDTrace.d for descriptions of the probes made available, see http://wiki.tcl.tk/DTrace for more details --with-encoding=ENCODING Specifies the encoding for compile-time - configuration values. Defaults to iso8859-1, + configuration values. Defaults to utf-8, which is also sufficient for ASCII. --with-tzdata=FLAG Specifies whether to install timezone data. By default, the configure script tries to detect whether a usable timezone database is present on the system already. Index: unix/configure ================================================================== --- unix/configure +++ unix/configure @@ -1428,11 +1428,11 @@ Optional Packages: --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) --with-encoding encoding for configuration values (default: - iso8859-1) + utf-8) --with-system-libtommath use external libtommath (default: true if available, false otherwise) --with-tzdata install timezone data (default: autodetect) @@ -3980,11 +3980,11 @@ #define TCL_CFGVAL_ENCODING "${with_tcencoding}" _ACEOF else -$as_echo "#define TCL_CFGVAL_ENCODING \"iso8859-1\"" >>confdefs.h +$as_echo "#define TCL_CFGVAL_ENCODING \"utf-8\"" >>confdefs.h fi #-------------------------------------------------------------------- @@ -9736,13 +9736,10 @@ fi $as_echo "#define USE_VFORK 1" >>confdefs.h - -$as_echo "#define TCL_DEFAULT_ENCODING \"utf-8\"" >>confdefs.h - $as_echo "#define TCL_LOAD_FROM_MEMORY 1" >>confdefs.h $as_echo "#define TCL_WIDE_CLICKS 1" >>confdefs.h Index: unix/configure.ac ================================================================== --- unix/configure.ac +++ unix/configure.ac @@ -604,12 +604,10 @@ if test $tcl_corefoundation = yes; then AC_CHECK_HEADERS(libkern/OSAtomic.h) AC_CHECK_FUNCS(OSSpinLockLock) fi AC_DEFINE(USE_VFORK, 1, [Should we use vfork() instead of fork()?]) - AC_DEFINE(TCL_DEFAULT_ENCODING, "utf-8", - [Are we to override what our default encoding is?]) AC_DEFINE(TCL_LOAD_FROM_MEMORY, 1, [Can this platform load code from memory?]) AC_DEFINE(TCL_WIDE_CLICKS, 1, [Does this platform have wide high-resolution clicks?]) AC_CHECK_HEADERS(AvailabilityMacros.h) Index: unix/tcl.m4 ================================================================== --- unix/tcl.m4 +++ unix/tcl.m4 @@ -2443,18 +2443,18 @@ #-------------------------------------------------------------------- AC_DEFUN([SC_TCL_CFG_ENCODING], [ AC_ARG_WITH(encoding, AC_HELP_STRING([--with-encoding], - [encoding for configuration values (default: iso8859-1)]), + [encoding for configuration values (default: utf-8)]), with_tcencoding=${withval}) if test x"${with_tcencoding}" != x ; then AC_DEFINE_UNQUOTED(TCL_CFGVAL_ENCODING,"${with_tcencoding}", [What encoding should be used for embedded configuration info?]) else - AC_DEFINE(TCL_CFGVAL_ENCODING,"iso8859-1", + AC_DEFINE(TCL_CFGVAL_ENCODING,"utf-8", [What encoding should be used for embedded configuration info?]) fi ]) #-------------------------------------------------------------------- Index: unix/tclConfig.h.in ================================================================== --- unix/tclConfig.h.in +++ unix/tclConfig.h.in @@ -413,13 +413,10 @@ #undef TCL_COMPILE_DEBUG /* Are bytecode statistics enabled? */ #undef TCL_COMPILE_STATS -/* Are we to override what our default encoding is? */ -#undef TCL_DEFAULT_ENCODING - /* Is Tcl built as a framework? */ #undef TCL_FRAMEWORK /* Can this platform load code from memory? */ #undef TCL_LOAD_FROM_MEMORY Index: unix/tclUnixInit.c ================================================================== --- unix/tclUnixInit.c +++ unix/tclUnixInit.c @@ -90,11 +90,11 @@ * on the platform. However, there is always a final fallback, and this value * is it. Make sure it is a real Tcl encoding. */ #ifndef TCL_DEFAULT_ENCODING -#define TCL_DEFAULT_ENCODING "iso8859-1" +#define TCL_DEFAULT_ENCODING "utf-8" #endif /* * Default directory in which to look for Tcl library scripts. The symbol is * defined by Makefile. Index: win/configure ================================================================== --- win/configure +++ win/configure @@ -3745,12 +3745,11 @@ cat >>confdefs.h <<_ACEOF #define TCL_CFGVAL_ENCODING "${with_tcencoding}" _ACEOF else - # Default encoding on windows is not "iso8859-1" - $as_echo "#define TCL_CFGVAL_ENCODING \"cp1252\"" >>confdefs.h + $as_echo "#define TCL_CFGVAL_ENCODING \"utf-8\"" >>confdefs.h fi #-------------------------------------------------------------------- Index: win/tcl.m4 ================================================================== --- win/tcl.m4 +++ win/tcl.m4 @@ -1083,12 +1083,11 @@ AC_ARG_WITH(encoding, [ --with-encoding encoding for configuration values], with_tcencoding=${withval}) if test x"${with_tcencoding}" != x ; then AC_DEFINE_UNQUOTED(TCL_CFGVAL_ENCODING,"${with_tcencoding}") else - # Default encoding on windows is not "iso8859-1" - AC_DEFINE(TCL_CFGVAL_ENCODING,"cp1252") + AC_DEFINE(TCL_CFGVAL_ENCODING,"utf-8") fi ]) #-------------------------------------------------------------------- # SC_EMBED_MANIFEST