Tcl Source Code

Check-in [9eb437a15d]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:re-implemente changes in win/tclWinFile.c (handling -DTCL_UTF_MAX=6) using 3 new utility functions. This allows to re-use code in more places: cleaner implementation more future-proof.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | core-8-branch
Files: files | file ages | folders
SHA3-256: 9eb437a15d351b260dca9b40a7fed7140168408f46844f9b0ff4cef1e85895d1
User & Date: jan.nijtmans 2019-03-10 20:18:48.872
Context
2019-03-11
20:25
Fix [590e687905]: utf-1.11 test-case fails check-in: 07c460f447 user: jan.nijtmans tags: core-8-branch
2019-03-10
21:04
Merge 8.7 check-in: 316ceb7616 user: jan.nijtmans tags: trunk
20:18
re-implemente changes in win/tclWinFile.c (handling -DTCL_UTF_MAX=6) using 3 new utility functions. ... check-in: 9eb437a15d user: jan.nijtmans tags: core-8-branch
2019-03-08
21:27
Make -DTCL_UTF_MAX=6-build work on win32. Add travis builds to prove it. check-in: 1df1b22fad user: jan.nijtmans tags: core-8-branch
Changes
Unified Diff Ignore Whitespace Patch
Changes to generic/regcomp.c.
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
    const chr *stop;		/* end of string */
    const chr *savenow;		/* saved now and stop for "subroutine call" */
    const chr *savestop;
    int err;			/* error code (0 if none) */
    int cflags;			/* copy of compile flags */
    int lasttype;		/* type of previous token */
    int nexttype;		/* type of next token */
    chr nextvalue;		/* value (if any) of next token */
    int lexcon;			/* lexical context type (see lex.c) */
    int nsubexp;		/* subexpression count */
    struct subre **subs;	/* subRE pointer vector */
    int nsubs;			/* length of vector */
    struct subre *sub10[10];	/* initial vector, enough for most */
    struct nfa *nfa;		/* the NFA */
    struct colormap *cm;	/* character color map */







|







202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
    const chr *stop;		/* end of string */
    const chr *savenow;		/* saved now and stop for "subroutine call" */
    const chr *savestop;
    int err;			/* error code (0 if none) */
    int cflags;			/* copy of compile flags */
    int lasttype;		/* type of previous token */
    int nexttype;		/* type of next token */
    int nextvalue;		/* value (if any) of next token */
    int lexcon;			/* lexical context type (see lex.c) */
    int nsubexp;		/* subexpression count */
    struct subre **subs;	/* subRE pointer vector */
    int nsubs;			/* length of vector */
    struct subre *sub10[10];	/* initial vector, enough for most */
    struct nfa *nfa;		/* the NFA */
    struct colormap *cm;	/* character color map */
Changes to generic/tclInt.h.
3237
3238
3239
3240
3241
3242
3243











3244
3245
3246
3247
3248
3249
3250
			    const char *trim, int numTrim);
MODULE_SCOPE int	TclTrimRight(const char *bytes, int numBytes,
			    const char *trim, int numTrim);
MODULE_SCOPE const char*TclGetCommandTypeName(Tcl_Command command);
MODULE_SCOPE void	TclRegisterCommandTypeName(
			    Tcl_ObjCmdProc *implementationProc,
			    const char *nameStr);











MODULE_SCOPE int	TclUtfCmp(const char *cs, const char *ct);
MODULE_SCOPE int	TclUtfCasecmp(const char *cs, const char *ct);
MODULE_SCOPE int	TclUtfCount(int ch);
MODULE_SCOPE Tcl_Obj *	TclpNativeToNormalized(ClientData clientData);
MODULE_SCOPE Tcl_Obj *	TclpFilesystemPathType(Tcl_Obj *pathPtr);
MODULE_SCOPE int	TclpDlopen(Tcl_Interp *interp, Tcl_Obj *pathPtr,
			    Tcl_LoadHandle *loadHandle,







>
>
>
>
>
>
>
>
>
>
>







3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
			    const char *trim, int numTrim);
MODULE_SCOPE int	TclTrimRight(const char *bytes, int numBytes,
			    const char *trim, int numTrim);
MODULE_SCOPE const char*TclGetCommandTypeName(Tcl_Command command);
MODULE_SCOPE void	TclRegisterCommandTypeName(
			    Tcl_ObjCmdProc *implementationProc,
			    const char *nameStr);
#if (TCL_UTF_MAX > 4) && (defined(__CYGWIN__) || defined(_WIN32))
MODULE_SCOPE int TclUtfToWChar(const char *src, WCHAR *chPtr);
MODULE_SCOPE char *	TclWCharToUtfDString(const WCHAR *uniStr,
			    int uniLength, Tcl_DString *dsPtr);
MODULE_SCOPE WCHAR * TclUtfToWCharDString(const char *src,
			    int length, Tcl_DString *dsPtr);
#else
#   define TclUtfToWChar TclUtfToUniChar
#   define TclWCharToUtfDString Tcl_UniCharToUtfDString
#   define TclUtfToWCharDString Tcl_UtfToUniCharDString
#endif
MODULE_SCOPE int	TclUtfCmp(const char *cs, const char *ct);
MODULE_SCOPE int	TclUtfCasecmp(const char *cs, const char *ct);
MODULE_SCOPE int	TclUtfCount(int ch);
MODULE_SCOPE Tcl_Obj *	TclpNativeToNormalized(ClientData clientData);
MODULE_SCOPE Tcl_Obj *	TclpFilesystemPathType(Tcl_Obj *pathPtr);
MODULE_SCOPE int	TclpDlopen(Tcl_Interp *interp, Tcl_Obj *pathPtr,
			    Tcl_LoadHandle *loadHandle,
Changes to generic/tclStubInit.c.
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364

char *
Tcl_WinUtfToTChar(
    const char *string,
    int len,
    Tcl_DString *dsPtr)
{
#if TCL_UTF_MAX > 4
    Tcl_UniChar ch = 0;
    wchar_t *w, *wString;
    const char *p, *end;
    int oldLength;
#endif

    Tcl_DStringInit(dsPtr);
    if (!string) {
	return NULL;
    }
#if TCL_UTF_MAX > 4

    if (len < 0) {
	len = strlen(string);
    }

    /*
     * Unicode string length in Tcl_UniChars will be <= UTF-8 string length in
     * bytes.
     */

    oldLength = Tcl_DStringLength(dsPtr);

    Tcl_DStringSetLength(dsPtr,
	    oldLength + (int) ((len + 1) * sizeof(wchar_t)));
    wString = (wchar_t *) (Tcl_DStringValue(dsPtr) + oldLength);

    w = wString;
    p = string;
    end = string + len - 4;
    while (p < end) {
	p += TclUtfToUniChar(p, &ch);
	if (ch > 0xFFFF) {
	    *w++ = (wchar_t) (0xD800 + ((ch -= 0x10000) >> 10));
	    *w++ = (wchar_t) (0xDC00 | (ch & 0x3FF));
	} else {
	    *w++ = ch;
	}
    }
    end += 4;
    while (p < end) {
	if (Tcl_UtfCharComplete(p, end-p)) {
	    p += TclUtfToUniChar(p, &ch);
	} else {
	    ch = UCHAR(*p++);
	}
	if (ch > 0xFFFF) {
	    *w++ = (wchar_t) (0xD800 + ((ch -= 0x10000) >> 10));
	    *w++ = (wchar_t) (0xDC00 | (ch & 0x3FF));
	} else {
	    *w++ = ch;
	}
    }
    *w = '\0';
    Tcl_DStringSetLength(dsPtr,
	    oldLength + ((char *) w - (char *) wString));

    return (char *)wString;
#else
    return (char *)Tcl_UtfToUniCharDString(string, len, dsPtr);
#endif
}

char *
Tcl_WinTCharToUtf(
    const char *string,
    int len,
    Tcl_DString *dsPtr)
{
#if TCL_UTF_MAX > 4
    const wchar_t *w, *wEnd;
    char *p, *result;
    int oldLength, blen = 1;
#endif

    Tcl_DStringInit(dsPtr);
    if (!string) {
	return NULL;
    }
    if (len < 0) {
	len = wcslen((wchar_t *)string);
    } else {
	len /= 2;
    }
#if TCL_UTF_MAX > 4
    oldLength = Tcl_DStringLength(dsPtr);
    Tcl_DStringSetLength(dsPtr, oldLength + (len + 1) * 4);
    result = Tcl_DStringValue(dsPtr) + oldLength;

    p = result;
    wEnd = (wchar_t *)string + len;
    for (w = (wchar_t *)string; w < wEnd; ) {
	if (!blen && ((*w & 0xFC00) != 0xDC00)) {
	    /* Special case for handling high surrogates. */
	    p += Tcl_UniCharToUtf(-1, p);
	}
	blen = Tcl_UniCharToUtf(*w, p);
	p += blen;
	if ((*w >= 0xD800) && (blen < 3)) {
	    /* Indication that high surrogate is handled */
	    blen = 0;
	}
	w++;
    }
    if (!blen) {
	/* Special case for handling high surrogates. */
	p += Tcl_UniCharToUtf(-1, p);
    }
    Tcl_DStringSetLength(dsPtr, oldLength + (p - result));

    return result;
#else
    return Tcl_UniCharToUtfDString((Tcl_UniChar *)string, len, dsPtr);
#endif
}

#if defined(TCL_WIDE_INT_IS_LONG)
/* On Cygwin64, long is 64-bit while on Win64 long is 32-bit. Therefore
 * we have to make sure that all stub entries on Cygwin64 follow the Win64
 * signature. Tcl 9 must find a better solution, but that cannot be done
 * without introducing a binary incompatibility.







<
<
<
<
<
<
<




<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
<
<
<








<
<
<
<
<
<









<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
<







236
237
238
239
240
241
242







243
244
245
246















































247



248
249
250
251
252
253
254
255






256
257
258
259
260
261
262
263
264




























265

266
267
268
269
270
271
272

char *
Tcl_WinUtfToTChar(
    const char *string,
    int len,
    Tcl_DString *dsPtr)
{







    Tcl_DStringInit(dsPtr);
    if (!string) {
	return NULL;
    }















































    return (char *)Tcl_UtfToWCharDString(string, len, dsPtr);



}

char *
Tcl_WinTCharToUtf(
    const char *string,
    int len,
    Tcl_DString *dsPtr)
{






    Tcl_DStringInit(dsPtr);
    if (!string) {
	return NULL;
    }
    if (len < 0) {
	len = wcslen((wchar_t *)string);
    } else {
	len /= 2;
    }




























    return TclWCharToUtfDString((Tcl_UniChar *)string, len, dsPtr);

}

#if defined(TCL_WIDE_INT_IS_LONG)
/* On Cygwin64, long is 64-bit while on Win64 long is 32-bit. Therefore
 * we have to make sure that all stub entries on Cygwin64 follow the Win64
 * signature. Tcl 9 must find a better solution, but that cannot be done
 * without introducing a binary incompatibility.
Changes to generic/tclUtf.c.
262
263
264
265
266
267
268












































269
270
271
272
273
274
275
	p += Tcl_UniCharToUtf(-1, p);
    }
    Tcl_DStringSetLength(dsPtr, oldLength + (p - string));

    return string;
}













































/*
 *---------------------------------------------------------------------------
 *
 * Tcl_UtfToUniChar --
 *
 *	Extract the Tcl_UniChar represented by the UTF-8 string. Bad UTF-8
 *	sequences are converted to valid Tcl_UniChars and processing







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
	p += Tcl_UniCharToUtf(-1, p);
    }
    Tcl_DStringSetLength(dsPtr, oldLength + (p - string));

    return string;
}

#if (TCL_UTF_MAX > 4) && (defined(__CYGWIN__) || defined(_WIN32))
char *
TclWCharToUtfDString(
    const WCHAR *uniStr,	/* WCHAR string to convert to UTF-8. */
    int uniLength,		/* Length of WCHAR string in Tcl_UniChars
				 * (must be >= 0). */
    Tcl_DString *dsPtr)		/* UTF-8 representation of string is appended
				 * to this previously initialized DString. */
{
    const WCHAR *w, *wEnd;
    char *p, *string;
    int oldLength, len = 1;

    /*
     * UTF-8 string length in bytes will be <= Unicode string length * 4.
     */

    oldLength = Tcl_DStringLength(dsPtr);
    Tcl_DStringSetLength(dsPtr, oldLength + (uniLength + 1) * 4);
    string = Tcl_DStringValue(dsPtr) + oldLength;

    p = string;
    wEnd = uniStr + uniLength;
    for (w = uniStr; w < wEnd; ) {
	if (!len && ((*w & 0xFC00) != 0xDC00)) {
	    /* Special case for handling high surrogates. */
	    p += Tcl_UniCharToUtf(-1, p);
	}
	len = Tcl_UniCharToUtf(*w, p);
	p += len;
	if ((*w >= 0xD800) && (len < 3)) {
	    len = 0; /* Indication that high surrogate was found */
	}
	w++;
    }
    if (!len) {
	/* Special case for handling high surrogates. */
	p += Tcl_UniCharToUtf(-1, p);
    }
    Tcl_DStringSetLength(dsPtr, oldLength + (p - string));

    return string;
}
#endif
/*
 *---------------------------------------------------------------------------
 *
 * Tcl_UtfToUniChar --
 *
 *	Extract the Tcl_UniChar represented by the UTF-8 string. Bad UTF-8
 *	sequences are converted to valid Tcl_UniChars and processing
413
414
415
416
417
418
419
420






































































































421
422
423
424
425
426
427
	 * represents itself.
	 */
    }

    *chPtr = byte;
    return 1;
}







































































































/*
 *---------------------------------------------------------------------------
 *
 * Tcl_UtfToUniCharDString --
 *
 *	Convert the UTF-8 string to Unicode.
 *







|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
	 * represents itself.
	 */
    }

    *chPtr = byte;
    return 1;
}

#if (TCL_UTF_MAX > 4) && (defined(__CYGWIN__) || defined(_WIN32))
int
TclUtfToWChar(
    const char *src,	/* The UTF-8 string. */
    WCHAR *chPtr)/* Filled with the WCHAR represented by
				 * the UTF-8 string. */
{
    WCHAR byte;

    /*
     * Unroll 1 to 4 byte UTF-8 sequences.
     */

    byte = *((unsigned char *) src);
    if (byte < 0xC0) {
	/*
	 * Handles properly formed UTF-8 characters between 0x01 and 0x7F.
	 * Treats naked trail bytes 0x80 to 0x9F as valid characters from
	 * the cp1252 table. See: <https://en.wikipedia.org/wiki/UTF-8>
	 * Also treats \0 and other naked trail bytes 0xA0 to 0xBF as valid
	 * characters representing themselves.
	 */

	/* If *chPtr contains a high surrogate (produced by a previous
	 * Tcl_UtfToUniChar() call) and the next 3 bytes are UTF-8 continuation
	 * bytes, then we must produce a follow-up low surrogate. We only
	 * do that if the high surrogate matches the bits we encounter.
	 */
	if ((byte >= 0x80)
		&& (((((byte - 0x10) << 2) & 0xFC) | 0xD800) == (*chPtr & 0xFCFC))
		&& ((src[1] & 0xF0) == (((*chPtr << 4) & 0x30) | 0x80))
		&& ((src[2] & 0xC0) == 0x80)) {
	    *chPtr = ((src[1] & 0x0F) << 6) + (src[2] & 0x3F) + 0xDC00;
	    return 3;
	}
	if ((unsigned)(byte-0x80) < (unsigned)0x20) {
	    *chPtr = cp1252[byte-0x80];
	} else {
	    *chPtr = byte;
	}
	return 1;
    } else if (byte < 0xE0) {
	if ((src[1] & 0xC0) == 0x80) {
	    /*
	     * Two-byte-character lead-byte followed by a trail-byte.
	     */

	    *chPtr = (((byte & 0x1F) << 6) | (src[1] & 0x3F));
	    if ((unsigned)(*chPtr - 1) >= (UNICODE_SELF - 1)) {
		return 2;
	    }
	}

	/*
	 * A two-byte-character lead-byte not followed by trail-byte
	 * represents itself.
	 */
    } else if (byte < 0xF0) {
	if (((src[1] & 0xC0) == 0x80) && ((src[2] & 0xC0) == 0x80)) {
	    /*
	     * Three-byte-character lead byte followed by two trail bytes.
	     */

	    *chPtr = (((byte & 0x0F) << 12)
		    | ((src[1] & 0x3F) << 6) | (src[2] & 0x3F));
	    if (*chPtr > 0x7FF) {
		return 3;
	    }
	}

	/*
	 * A three-byte-character lead-byte not followed by two trail-bytes
	 * represents itself.
	 */
    }
    else if (byte < 0xF8) {
	if (((src[1] & 0xC0) == 0x80) && ((src[2] & 0xC0) == 0x80) && ((src[3] & 0xC0) == 0x80)) {
	    /*
	     * Four-byte-character lead byte followed by three trail bytes.
	     */
	    WCHAR high = (((byte & 0x07) << 8) | ((src[1] & 0x3F) << 2)
		    | ((src[2] & 0x3F) >> 4)) - 0x40;
	    if (high >= 0x400) {
		/* out of range, < 0x10000 or > 0x10ffff */
	    } else {
		/* produce high surrogate, advance source pointer */
		*chPtr = 0xD800 + high;
		return 1;
	    }
	}

	/*
	 * A four-byte-character lead-byte not followed by three trail-bytes
	 * represents itself.
	 */
    }

    *chPtr = byte;
    return 1;
}
#endif

/*
 *---------------------------------------------------------------------------
 *
 * Tcl_UtfToUniCharDString --
 *
 *	Convert the UTF-8 string to Unicode.
 *
484
485
486
487
488
489
490
491






















































492
493
494
495
496
497
498
    }
    *w = '\0';
    Tcl_DStringSetLength(dsPtr,
	    oldLength + ((char *) w - (char *) wString));

    return wString;
}























































/*
 *---------------------------------------------------------------------------
 *
 * Tcl_UtfCharComplete --
 *
 *	Determine if the UTF-8 string of the given length is long enough to be
 *	decoded by Tcl_UtfToUniChar(). This does not ensure that the UTF-8







|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
    }
    *w = '\0';
    Tcl_DStringSetLength(dsPtr,
	    oldLength + ((char *) w - (char *) wString));

    return wString;
}

#if (TCL_UTF_MAX > 4) && (defined(__CYGWIN__) || defined(_WIN32))
WCHAR *
TclUtfToWCharDString(
    const char *src,		/* UTF-8 string to convert to Unicode. */
    int length,			/* Length of UTF-8 string in bytes, or -1 for
				 * strlen(). */
    Tcl_DString *dsPtr)		/* Unicode representation of string is
				 * appended to this previously initialized
				 * DString. */
{
    WCHAR ch = 0, *w, *wString;
    const char *p, *end;
    int oldLength;

    if (length < 0) {
	length = strlen(src);
    }

    /*
     * Unicode string length in Tcl_UniChars will be <= UTF-8 string length in
     * bytes.
     */

    oldLength = Tcl_DStringLength(dsPtr);

    Tcl_DStringSetLength(dsPtr,
	    oldLength + (int) ((length + 1) * sizeof(WCHAR)));
    wString = (WCHAR *) (Tcl_DStringValue(dsPtr) + oldLength);

    w = wString;
    p = src;
    end = src + length - 4;
    while (p < end) {
	p += TclUtfToWChar(p, &ch);
	*w++ = ch;
    }
    end += 4;
    while (p < end) {
	if (Tcl_UtfCharComplete(p, end-p)) {
	    p += TclUtfToWChar(p, &ch);
	} else if (((UCHAR(*p)-0x80)) < 0x20) {
	    ch = cp1252[UCHAR(*p++)-0x80];
	} else {
	    ch = UCHAR(*p++);
	}
	*w++ = ch;
    }
    *w = '\0';
    Tcl_DStringSetLength(dsPtr,
	    oldLength + ((char *) w - (char *) wString));

    return wString;
}
#endif
/*
 *---------------------------------------------------------------------------
 *
 * Tcl_UtfCharComplete --
 *
 *	Determine if the UTF-8 string of the given length is long enough to be
 *	decoded by Tcl_UtfToUniChar(). This does not ensure that the UTF-8
Changes to win/tclWin32Dll.c.
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
Tcl_WinUtfToTChar(
    const char *string,		/* Source string in UTF-8. */
    int len,			/* Source string length in bytes, or -1 for
				 * strlen(). */
    Tcl_DString *dsPtr)		/* Uninitialized or free DString in which the
				 * converted string is stored. */
{
#if TCL_UTF_MAX > 4
    Tcl_UniChar ch = 0;
    TCHAR *w, *wString;
    const char *p, *end;
    int oldLength;
#endif

    Tcl_DStringInit(dsPtr);
    if (!string) {
	return NULL;
    }
#if TCL_UTF_MAX > 4

    if (len < 0) {
	len = strlen(string);
    }

    /*
     * Unicode string length in Tcl_UniChars will be <= UTF-8 string length in
     * bytes.
     */

    oldLength = Tcl_DStringLength(dsPtr);

    Tcl_DStringSetLength(dsPtr,
	    oldLength + (int) ((len + 1) * sizeof(TCHAR)));
    wString = (TCHAR *) (Tcl_DStringValue(dsPtr) + oldLength);

    w = wString;
    p = string;
    end = string + len - 4;
    while (p < end) {
	p += TclUtfToUniChar(p, &ch);
	if (ch > 0xFFFF) {
	    *w++ = (wchar_t) (0xD800 + ((ch -= 0x10000) >> 10));
	    *w++ = (wchar_t) (0xDC00 | (ch & 0x3FF));
	} else {
	    *w++ = ch;
	}
    }
    end += 4;
    while (p < end) {
	if (Tcl_UtfCharComplete(p, end-p)) {
	    p += TclUtfToUniChar(p, &ch);
	} else {
	    ch = UCHAR(*p++);
	}
	if (ch > 0xFFFF) {
	    *w++ = (wchar_t) (0xD800 + ((ch -= 0x10000) >> 10));
	    *w++ = (wchar_t) (0xDC00 | (ch & 0x3FF));
	} else {
	    *w++ = ch;
	}
    }
    *w = '\0';
    Tcl_DStringSetLength(dsPtr,
	    oldLength + ((char *) w - (char *) wString));

    return wString;
#else
    return Tcl_UtfToUniCharDString(string, len, dsPtr);
#endif
}

char *
Tcl_WinTCharToUtf(
    const TCHAR *string,	/* Source string in Unicode. */
    int len,			/* Source string length in bytes, or -1 for
				 * platform-specific string length. */
    Tcl_DString *dsPtr)		/* Uninitialized or free DString in which the
				 * converted string is stored. */
{
#if TCL_UTF_MAX > 4
    const TCHAR *w, *wEnd;
    char *p, *result;
    int oldLength, blen = 1;
#endif

    Tcl_DStringInit(dsPtr);
    if (!string) {
	return NULL;
    }
    if (len < 0) {
	len = wcslen((TCHAR *)string);
    } else {
	len /= 2;
    }
#if TCL_UTF_MAX > 4
    oldLength = Tcl_DStringLength(dsPtr);
    Tcl_DStringSetLength(dsPtr, oldLength + (len + 1) * 4);
    result = Tcl_DStringValue(dsPtr) + oldLength;

    p = result;
    wEnd = (TCHAR *)string + len;
    for (w = (TCHAR *)string; w < wEnd; ) {
	if (!blen && ((*w & 0xFC00) != 0xDC00)) {
	    /* Special case for handling high surrogates. */
	    p += Tcl_UniCharToUtf(-1, p);
	}
	blen = Tcl_UniCharToUtf(*w, p);
	p += blen;
	if ((*w >= 0xD800) && (blen < 3)) {
	    /* Indication that high surrogate is handled */
	    blen = 0;
	}
	w++;
    }
    if (!blen) {
	/* Special case for handling high surrogates. */
	p += Tcl_UniCharToUtf(-1, p);
    }
    Tcl_DStringSetLength(dsPtr, oldLength + (p - result));

    return result;
#else
    return Tcl_UniCharToUtfDString((Tcl_UniChar *)string, len, dsPtr);
#endif
}

/*
 *------------------------------------------------------------------------
 *
 * TclWinCPUID --
 *







<
<
<
<
<
<
<




<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
<




|





<
<
<
<
<
<





|



<
<
<
<
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







467
468
469
470
471
472
473







474
475
476
477

















































478

479
480
481
482
483
484
485
486
487
488






489
490
491
492
493
494
495
496
497




498

























499
500
501
502
503
504
505
Tcl_WinUtfToTChar(
    const char *string,		/* Source string in UTF-8. */
    int len,			/* Source string length in bytes, or -1 for
				 * strlen(). */
    Tcl_DString *dsPtr)		/* Uninitialized or free DString in which the
				 * converted string is stored. */
{







    Tcl_DStringInit(dsPtr);
    if (!string) {
	return NULL;
    }

















































    return TclUtfToWCharDString(string, len, dsPtr);

}

char *
Tcl_WinTCharToUtf(
    const WCHAR *string,	/* Source string in Unicode. */
    int len,			/* Source string length in bytes, or -1 for
				 * platform-specific string length. */
    Tcl_DString *dsPtr)		/* Uninitialized or free DString in which the
				 * converted string is stored. */
{






    Tcl_DStringInit(dsPtr);
    if (!string) {
	return NULL;
    }
    if (len < 0) {
	len = wcslen((WCHAR *)string);
    } else {
	len /= 2;
    }




    return TclWCharToUtfDString((unsigned short *)string, len, dsPtr);

























}

/*
 *------------------------------------------------------------------------
 *
 * TclWinCPUID --
 *
Changes to win/tclWinFCmd.c.
1520
1521
1522
1523
1524
1525
1526

1527
1528
1529
1530
1531
1532
1533
1534
1535
	 * It is hidden. However there is a bug on some Windows OSes in which
	 * root volumes (drives) formatted as NTFS are declared hidden when
	 * they are not (and cannot be).
	 *
	 * We test for, and fix that case, here.
	 */


	const char *str = TclGetString(fileName);
	size_t len = fileName->length;

	if (len < 4) {
	    if (len == 0) {
		/*
		 * Not sure if this is possible, but we pass it on anyway.
		 */
	    } else if (len == 1 && (str[0] == '/' || str[0] == '\\')) {







>
|
<







1520
1521
1522
1523
1524
1525
1526
1527
1528

1529
1530
1531
1532
1533
1534
1535
	 * It is hidden. However there is a bug on some Windows OSes in which
	 * root volumes (drives) formatted as NTFS are declared hidden when
	 * they are not (and cannot be).
	 *
	 * We test for, and fix that case, here.
	 */

	int len;
	const char *str = TclGetStringFromObj(fileName, &len);


	if (len < 4) {
	    if (len == 0) {
		/*
		 * Not sure if this is possible, but we pass it on anyway.
		 */
	    } else if (len == 1 && (str[0] == '/' || str[0] == '\\')) {
1606
1607
1608
1609
1610
1611
1612

1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
     */

    Tcl_IncrRefCount(splitPath);

    for (i = 0; i < pathc; i++) {
	Tcl_Obj *elt;
	char *pathv;


	Tcl_ListObjIndex(NULL, splitPath, i, &elt);

	pathv = TclGetString(elt);
	if ((pathv[0] == '/') || ((elt->length == 3) && (pathv[1] == ':'))
		|| (strcmp(pathv, ".") == 0) || (strcmp(pathv, "..") == 0)) {
	    /*
	     * Handle "/", "//machine/export", "c:/", "." or ".." by just
	     * copying the string literally.  Uppercase the drive letter, just
	     * because it looks better under Windows to do so.
	     */








>



|
|







1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
     */

    Tcl_IncrRefCount(splitPath);

    for (i = 0; i < pathc; i++) {
	Tcl_Obj *elt;
	char *pathv;
	int length;

	Tcl_ListObjIndex(NULL, splitPath, i, &elt);

	pathv = TclGetStringFromObj(elt, &length);
	if ((pathv[0] == '/') || ((length == 3) && (pathv[1] == ':'))
		|| (strcmp(pathv, ".") == 0) || (strcmp(pathv, "..") == 0)) {
	    /*
	     * Handle "/", "//machine/export", "c:/", "." or ".." by just
	     * copying the string literally.  Uppercase the drive letter, just
	     * because it looks better under Windows to do so.
	     */

1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
	    Tcl_IncrRefCount(tempPath);

	    /*
	     * We'd like to call Tcl_FSGetNativePath(tempPath) but that is
	     * likely to lead to infinite loops.
	     */

	    tempString = TclGetString(tempPath);
	    nativeName = Tcl_WinUtfToTChar(tempString, tempPath->length, &ds);
	    Tcl_DecrRefCount(tempPath);
	    handle = FindFirstFile(nativeName, &data);
	    if (handle == INVALID_HANDLE_VALUE) {
		/*
		 * FindFirstFile() doesn't like root directories. We would
		 * only get a root directory here if the caller specified "c:"
		 * or "c:." and the current directory on the drive was the







|
|







1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
	    Tcl_IncrRefCount(tempPath);

	    /*
	     * We'd like to call Tcl_FSGetNativePath(tempPath) but that is
	     * likely to lead to infinite loops.
	     */

	    tempString = TclGetStringFromObj(tempPath, &length);
	    nativeName = Tcl_WinUtfToTChar(tempString, length, &ds);
	    Tcl_DecrRefCount(tempPath);
	    handle = FindFirstFile(nativeName, &data);
	    if (handle == INVALID_HANDLE_VALUE) {
		/*
		 * FindFirstFile() doesn't like root directories. We would
		 * only get a root directory here if the caller specified "c:"
		 * or "c:." and the current directory on the drive was the
Changes to win/tclWinFile.c.
1460
1461
1462
1463
1464
1465
1466

1467
1468
1469
1470
1471
1472

1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
		Tcl_JoinPath(1, &ptr, bufferPtr);
		rc = 1;
		result = Tcl_DStringValue(bufferPtr);
	    }
	}
	Tcl_DStringFree(&ds);
    } else {

	wName = Tcl_WinUtfToTChar(domain + 1, -1, &ds);
	rc = NetGetDCName(NULL, wName, (LPBYTE *) &wDomain);
	Tcl_DStringFree(&ds);
	nameLen = domain - name;
    }
    if (rc == 0) {

	wName = Tcl_WinUtfToTChar(name, nameLen, &ds);
	while (NetUserGetInfo(wDomain, wName, 1, (LPBYTE *) &uiPtr) != 0) {
	    /*
	     * user does not exists - if domain was not specified,
	     * try again using current domain.
	     */
	    rc = 1;
	    if (domain != NULL) break;
	    /* get current domain */
	    rc = NetGetDCName(NULL, NULL, (LPBYTE *) &wDomain);
	    if (rc != 0) break;
	    domain = INT2PTR(-1); /* repeat once */
	}
	if (rc == 0) {
	    DWORD i, size = MAX_PATH;
	    wHomeDir = uiPtr->usri1_home_dir;
	    if ((wHomeDir != NULL) && (wHomeDir[0] != L'\0')) {
		size = lstrlenW(wHomeDir);
		Tcl_WinTCharToUtf((TCHAR *) wHomeDir, size * sizeof(WCHAR), bufferPtr);
	    } else {
		/*
		 * User exists but has no home dir. Return
		 * "{GetProfilesDirectory}/<user>".
		 */
		GetProfilesDirectoryW(buf, &size);
		Tcl_WinTCharToUtf(buf, (size-1) * sizeof(WCHAR), bufferPtr);
		Tcl_DStringAppend(bufferPtr, "/", 1);
		Tcl_DStringAppend(bufferPtr, name, nameLen);
	    }
	    result = Tcl_DStringValue(bufferPtr);
	    /* be sure we return normalized path */
	    for (i = 0; i < size; ++i){
		if (result[i] == '\\') result[i] = '/';







>
|





>
|

















|






|







1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
		Tcl_JoinPath(1, &ptr, bufferPtr);
		rc = 1;
		result = Tcl_DStringValue(bufferPtr);
	    }
	}
	Tcl_DStringFree(&ds);
    } else {
	Tcl_DStringInit(&ds);
	wName = TclUtfToWCharDString(domain + 1, -1, &ds);
	rc = NetGetDCName(NULL, wName, (LPBYTE *) &wDomain);
	Tcl_DStringFree(&ds);
	nameLen = domain - name;
    }
    if (rc == 0) {
	Tcl_DStringInit(&ds);
	wName = TclUtfToWCharDString(name, nameLen, &ds);
	while (NetUserGetInfo(wDomain, wName, 1, (LPBYTE *) &uiPtr) != 0) {
	    /*
	     * user does not exists - if domain was not specified,
	     * try again using current domain.
	     */
	    rc = 1;
	    if (domain != NULL) break;
	    /* get current domain */
	    rc = NetGetDCName(NULL, NULL, (LPBYTE *) &wDomain);
	    if (rc != 0) break;
	    domain = INT2PTR(-1); /* repeat once */
	}
	if (rc == 0) {
	    DWORD i, size = MAX_PATH;
	    wHomeDir = uiPtr->usri1_home_dir;
	    if ((wHomeDir != NULL) && (wHomeDir[0] != L'\0')) {
		size = lstrlenW(wHomeDir);
		TclWCharToUtfDString(wHomeDir, size, bufferPtr);
	    } else {
		/*
		 * User exists but has no home dir. Return
		 * "{GetProfilesDirectory}/<user>".
		 */
		GetProfilesDirectoryW(buf, &size);
		TclWCharToUtfDString(buf, size-1, bufferPtr);
		Tcl_DStringAppend(bufferPtr, "/", 1);
		Tcl_DStringAppend(bufferPtr, name, nameLen);
	    }
	    result = Tcl_DStringValue(bufferPtr);
	    /* be sure we return normalized path */
	    for (i = 0; i < size; ++i){
		if (result[i] == '\\') result[i] = '/';
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
    } else {
	/*
	 * Path of form C:foo/bar, but this only makes sense if the cwd is
	 * also on drive C.
	 */

	int cwdLen;
	const char *drive =
		TclGetStringFromObj(useThisCwd, &cwdLen);
	char drive_cur = path[0];

	if (drive_cur >= 'a') {
	    drive_cur -= ('a' - 'A');
	}
	if (drive[0] == drive_cur) {
	    absolutePath = Tcl_DuplicateObj(useThisCwd);







<
|







2840
2841
2842
2843
2844
2845
2846

2847
2848
2849
2850
2851
2852
2853
2854
    } else {
	/*
	 * Path of form C:foo/bar, but this only makes sense if the cwd is
	 * also on drive C.
	 */

	int cwdLen;

	const char *drive = TclGetStringFromObj(useThisCwd, &cwdLen);
	char drive_cur = path[0];

	if (drive_cur >= 'a') {
	    drive_cur -= ('a' - 'A');
	}
	if (drive[0] == drive_cur) {
	    absolutePath = Tcl_DuplicateObj(useThisCwd);
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
ClientData
TclNativeCreateNativeRep(
    Tcl_Obj *pathPtr)
{
    WCHAR *nativePathPtr = NULL;
    const char *str;
    Tcl_Obj *validPathPtr;
    size_t len;
    WCHAR *wp;

    if (TclFSCwdIsNative()) {
	/*
	 * The cwd is native, which means we can use the translated path
	 * without worrying about normalization (this will also usually be
	 * shorter so the utf-to-external conversion will be somewhat faster).







|







2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
ClientData
TclNativeCreateNativeRep(
    Tcl_Obj *pathPtr)
{
    WCHAR *nativePathPtr = NULL;
    const char *str;
    Tcl_Obj *validPathPtr;
    int len;
    WCHAR *wp;

    if (TclFSCwdIsNative()) {
	/*
	 * The cwd is native, which means we can use the translated path
	 * without worrying about normalization (this will also usually be
	 * shorter so the utf-to-external conversion will be somewhat faster).
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
	if (validPathPtr == NULL) {
	    return NULL;
	}
	/* validPathPtr returned from Tcl_FSGetNormalizedPath is owned by Tcl, so incr refCount here */
	Tcl_IncrRefCount(validPathPtr);
    }

    str = Tcl_GetString(validPathPtr);
    len = validPathPtr->length;

    if (strlen(str)!=(unsigned int)len) {
	/* String contains NUL-bytes. This is invalid. */
	goto done;
    }
    /* For a reserved device, strip a possible postfix ':' */
    len = WinIsReserved(str);
    if (len == 0) {
	/* Let MultiByteToWideChar check for other invalid sequences, like







|
<

|







3003
3004
3005
3006
3007
3008
3009
3010

3011
3012
3013
3014
3015
3016
3017
3018
3019
	if (validPathPtr == NULL) {
	    return NULL;
	}
	/* validPathPtr returned from Tcl_FSGetNormalizedPath is owned by Tcl, so incr refCount here */
	Tcl_IncrRefCount(validPathPtr);
    }

    str = Tcl_GetStringFromObj(validPathPtr, &len);


    if (strlen(str)!=(size_t)len) {
	/* String contains NUL-bytes. This is invalid. */
	goto done;
    }
    /* For a reserved device, strip a possible postfix ':' */
    len = WinIsReserved(str);
    if (len == 0) {
	/* Let MultiByteToWideChar check for other invalid sequences, like
Changes to win/tclWinInit.c.
184
185
186
187
188
189
190

191
192
193
194
195
196
197
    unsigned int *lengthPtr,
    Tcl_Encoding *encodingPtr)
{
#define LIBRARY_SIZE	    64
    Tcl_Obj *pathPtr;
    char installLib[LIBRARY_SIZE];
    const char *bytes;


    pathPtr = Tcl_NewObj();

    /*
     * Initialize the substring used when locating the script library. The
     * installLib variable computes the script library path relative to the
     * installed DLL.







>







184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
    unsigned int *lengthPtr,
    Tcl_Encoding *encodingPtr)
{
#define LIBRARY_SIZE	    64
    Tcl_Obj *pathPtr;
    char installLib[LIBRARY_SIZE];
    const char *bytes;
    int length;

    pathPtr = Tcl_NewObj();

    /*
     * Initialize the substring used when locating the script library. The
     * installLib variable computes the script library path relative to the
     * installed DLL.
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
     * Look for the library in its source checkout location.
     */

    Tcl_ListObjAppendElement(NULL, pathPtr,
	    TclGetProcessGlobalValue(&sourceLibraryDir));

    *encodingPtr = NULL;
    bytes = TclGetString(pathPtr);
    *lengthPtr = pathPtr->length;
    *valuePtr = ckalloc(*lengthPtr + 1);
    memcpy(*valuePtr, bytes, *lengthPtr + 1);
    Tcl_DecrRefCount(pathPtr);
}

/*
 *---------------------------------------------------------------------------
 *
 * AppendEnvironment --







|
|
|
|







220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
     * Look for the library in its source checkout location.
     */

    Tcl_ListObjAppendElement(NULL, pathPtr,
	    TclGetProcessGlobalValue(&sourceLibraryDir));

    *encodingPtr = NULL;
    bytes = TclGetStringFromObj(pathPtr, &length);
    *lengthPtr = length++;
    *valuePtr = ckalloc(length);
    memcpy(*valuePtr, bytes, length);
    Tcl_DecrRefCount(pathPtr);
}

/*
 *---------------------------------------------------------------------------
 *
 * AppendEnvironment --
Changes to win/tclWinPipe.c.
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
    namePtr = (char *) name;
    length = GetTempPath(MAX_PATH, name);
    if (length == 0) {
	goto gotError;
    }
    namePtr += length * sizeof(TCHAR);
    if (basenameObj) {
	const char *string = Tcl_GetString(basenameObj);

	Tcl_WinUtfToTChar(string, basenameObj->length, &buf);
	memcpy(namePtr, Tcl_DStringValue(&buf), Tcl_DStringLength(&buf));
	namePtr += Tcl_DStringLength(&buf);
	Tcl_DStringFree(&buf);
    } else {
	const TCHAR *baseStr = TEXT("TCL");
	int length = 3 * sizeof(TCHAR);

	memcpy(namePtr, baseStr, length);
	namePtr += length;
    }
    counter = TclpGetClicks() % 65533;
    counter2 = 1024;			/* Only try this many times! Prevents
					 * an infinite loop. */







|

|





|







3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
    namePtr = (char *) name;
    length = GetTempPath(MAX_PATH, name);
    if (length == 0) {
	goto gotError;
    }
    namePtr += length * sizeof(TCHAR);
    if (basenameObj) {
	const char *string = TclGetStringFromObj(basenameObj, &length);

	Tcl_WinUtfToTChar(string, length, &buf);
	memcpy(namePtr, Tcl_DStringValue(&buf), Tcl_DStringLength(&buf));
	namePtr += Tcl_DStringLength(&buf);
	Tcl_DStringFree(&buf);
    } else {
	const TCHAR *baseStr = TEXT("TCL");
	length = 3 * sizeof(TCHAR);

	memcpy(namePtr, baseStr, length);
	namePtr += length;
    }
    counter = TclpGetClicks() % 65533;
    counter2 = 1024;			/* Only try this many times! Prevents
					 * an infinite loop. */