Tcl Source Code

Check-in [702b3410f7]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Merge 8.7
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk | main
Files: files | file ages | folders
SHA3-256: 702b3410f77b9e3a4bfcb2d069468b57c2fcbdcec0214998f10e6363e2c60fcd
User & Date: jan.nijtmans 2021-03-18 09:13:22.739
Context
2021-03-18
09:52
Merge 8.7 check-in: 587db5d2a3 user: jan.nijtmans tags: trunk, main
09:13
Merge 8.7 check-in: 702b3410f7 user: jan.nijtmans tags: trunk, main
09:12
More simplification of UtfToUtfProc(). Get rid of wrapper functions UtfIntToUtfExtProc/UtfExtToUtfIn... check-in: 5e32efda50 user: jan.nijtmans tags: core-8-branch
2021-03-17
15:05
Merge 8.7 check-in: 341e1e54c4 user: jan.nijtmans tags: trunk, main
Changes
Unified Diff Ignore Whitespace Patch
Changes to generic/tclEncoding.c.
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
static Tcl_EncodingFreeProc	TableFreeProc;
static Tcl_EncodingConvertProc	TableFromUtfProc;
static Tcl_EncodingConvertProc	TableToUtfProc;
static size_t			unilen(const char *src);
static Tcl_EncodingConvertProc	Utf16ToUtfProc;
static Tcl_EncodingConvertProc	UtfToUtf16Proc;
static Tcl_EncodingConvertProc	UtfToUcs2Proc;
static int              	UtfToUtfProc(ClientData clientData,
	                            const char *src, int srcLen, int flags,
       		                    Tcl_EncodingState *statePtr, char *dst,
				    int dstLen, int *srcReadPtr,
				    int *dstWrotePtr, int *dstCharsPtr,
				    int pureNullMode);
static Tcl_EncodingConvertProc	UtfIntToUtfExtProc;
static Tcl_EncodingConvertProc	UtfExtToUtfIntProc;
static Tcl_EncodingConvertProc	Iso88591FromUtfProc;
static Tcl_EncodingConvertProc	Iso88591ToUtfProc;

/*
 * A Tcl_ObjType for holding a cached Tcl_Encoding in the twoPtrValue.ptr1 field
 * of the intrep. This should help the lifetime of encodings be more useful.
 * See concerns raised in [Bug 1077262].







<
<
<
<
<
<
|
<







216
217
218
219
220
221
222






223

224
225
226
227
228
229
230
static Tcl_EncodingFreeProc	TableFreeProc;
static Tcl_EncodingConvertProc	TableFromUtfProc;
static Tcl_EncodingConvertProc	TableToUtfProc;
static size_t			unilen(const char *src);
static Tcl_EncodingConvertProc	Utf16ToUtfProc;
static Tcl_EncodingConvertProc	UtfToUtf16Proc;
static Tcl_EncodingConvertProc	UtfToUcs2Proc;






static Tcl_EncodingConvertProc	UtfToUtfProc;

static Tcl_EncodingConvertProc	Iso88591FromUtfProc;
static Tcl_EncodingConvertProc	Iso88591ToUtfProc;

/*
 * A Tcl_ObjType for holding a cached Tcl_Encoding in the twoPtrValue.ptr1 field
 * of the intrep. This should help the lifetime of encodings be more useful.
 * See concerns raised in [Bug 1077262].
513
514
515
516
517
518
519




520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
 *
 * Side effects:
 *	Depends on the memory, object, and IO subsystems.
 *
 *---------------------------------------------------------------------------
 */





void
TclInitEncodingSubsystem(void)
{
    Tcl_EncodingType type;
    TableEncodingData *dataPtr;
    unsigned size;
    unsigned short i;
    union {
        char c;
        short s;
    } isLe;

    if (encodingsInitialized) {
	return;
    }

    isLe.s = 1;
    Tcl_MutexLock(&encodingMutex);
    Tcl_InitHashTable(&encodingTable, TCL_STRING_KEYS);
    Tcl_MutexUnlock(&encodingMutex);

    /*
     * Create a few initial encodings.  UTF-8 to UTF-8 translation is not a
     * no-op because it turns a stream of improperly formed UTF-8 into a
     * properly formed stream.
     */

    type.encodingName	= NULL;
    type.toUtfProc	= BinaryProc;
    type.fromUtfProc	= BinaryProc;
    type.freeProc	= NULL;
    type.nullSize	= 1;
    type.clientData	= NULL;
    tclIdentityEncoding = Tcl_CreateEncoding(&type);

    type.encodingName	= "utf-8";
    type.toUtfProc	= UtfExtToUtfIntProc;
    type.fromUtfProc	= UtfIntToUtfExtProc;
    type.freeProc	= NULL;
    type.nullSize	= 1;
    type.clientData	= NULL;
    Tcl_CreateEncoding(&type);

    type.toUtfProc	= Utf16ToUtfProc;
    type.fromUtfProc    = UtfToUcs2Proc;
    type.freeProc	= NULL;
    type.nullSize	= 2;
    type.encodingName   = "ucs-2le";
    type.clientData	= INT2PTR(1);
    Tcl_CreateEncoding(&type);
    type.encodingName   = "ucs-2be";
    type.clientData	= INT2PTR(0);
    Tcl_CreateEncoding(&type);
    type.encodingName   = "ucs-2";
    type.clientData	= INT2PTR(isLe.c);
    Tcl_CreateEncoding(&type);

    type.toUtfProc	= Utf16ToUtfProc;
    type.fromUtfProc    = UtfToUtf16Proc;
    type.freeProc	= NULL;
    type.nullSize	= 2;
    type.encodingName   = "utf-16le";
    type.clientData	= INT2PTR(1);
    Tcl_CreateEncoding(&type);
    type.encodingName   = "utf-16be";
    type.clientData	= INT2PTR(0);
    Tcl_CreateEncoding(&type);
    type.encodingName   = "utf-16";
    type.clientData	= INT2PTR(isLe.c);
    Tcl_CreateEncoding(&type);







>
>
>
>








|







|



















|
|










|













|







506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
 *
 * Side effects:
 *	Depends on the memory, object, and IO subsystems.
 *
 *---------------------------------------------------------------------------
 */

/* Those flags must not conflict with other TCL_ENCODING_* flags in tcl.h */
#define TCL_ENCODING_LE		0x40	/* Little-endian encoding */
#define TCL_ENCODING_EXTERNAL 0x80	/* Converting from internal to external variant */

void
TclInitEncodingSubsystem(void)
{
    Tcl_EncodingType type;
    TableEncodingData *dataPtr;
    unsigned size;
    unsigned short i;
    union {
        unsigned char c;
        short s;
    } isLe;

    if (encodingsInitialized) {
	return;
    }

    isLe.s = TCL_ENCODING_LE;
    Tcl_MutexLock(&encodingMutex);
    Tcl_InitHashTable(&encodingTable, TCL_STRING_KEYS);
    Tcl_MutexUnlock(&encodingMutex);

    /*
     * Create a few initial encodings.  UTF-8 to UTF-8 translation is not a
     * no-op because it turns a stream of improperly formed UTF-8 into a
     * properly formed stream.
     */

    type.encodingName	= NULL;
    type.toUtfProc	= BinaryProc;
    type.fromUtfProc	= BinaryProc;
    type.freeProc	= NULL;
    type.nullSize	= 1;
    type.clientData	= NULL;
    tclIdentityEncoding = Tcl_CreateEncoding(&type);

    type.encodingName	= "utf-8";
    type.toUtfProc	= UtfToUtfProc;
    type.fromUtfProc	= UtfToUtfProc;
    type.freeProc	= NULL;
    type.nullSize	= 1;
    type.clientData	= NULL;
    Tcl_CreateEncoding(&type);

    type.toUtfProc	= Utf16ToUtfProc;
    type.fromUtfProc    = UtfToUcs2Proc;
    type.freeProc	= NULL;
    type.nullSize	= 2;
    type.encodingName   = "ucs-2le";
    type.clientData	= INT2PTR(TCL_ENCODING_LE);
    Tcl_CreateEncoding(&type);
    type.encodingName   = "ucs-2be";
    type.clientData	= INT2PTR(0);
    Tcl_CreateEncoding(&type);
    type.encodingName   = "ucs-2";
    type.clientData	= INT2PTR(isLe.c);
    Tcl_CreateEncoding(&type);

    type.toUtfProc	= Utf16ToUtfProc;
    type.fromUtfProc    = UtfToUtf16Proc;
    type.freeProc	= NULL;
    type.nullSize	= 2;
    type.encodingName   = "utf-16le";
    type.clientData	= INT2PTR(TCL_ENCODING_LE);
    Tcl_CreateEncoding(&type);
    type.encodingName   = "utf-16be";
    type.clientData	= INT2PTR(0);
    Tcl_CreateEncoding(&type);
    type.encodingName   = "utf-16";
    type.clientData	= INT2PTR(isLe.c);
    Tcl_CreateEncoding(&type);
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
	srcLen = 0;
    } else if (srcLen == TCL_INDEX_NONE) {
	srcLen = strlen(src);
    }
    flags = TCL_ENCODING_START | TCL_ENCODING_END;
    while (1) {
	result = encodingPtr->fromUtfProc(encodingPtr->clientData, src,
		srcLen, flags, &state, dst, dstLen, &srcRead, &dstWrote,
		&dstChars);
	soFar = dst + dstWrote - Tcl_DStringValue(dstPtr);

	if (result != TCL_CONVERT_NOSPACE) {
	    if (encodingPtr->nullSize == 2) {
		Tcl_DStringSetLength(dstPtr, soFar + 1);
	    }







|







1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
	srcLen = 0;
    } else if (srcLen == TCL_INDEX_NONE) {
	srcLen = strlen(src);
    }
    flags = TCL_ENCODING_START | TCL_ENCODING_END;
    while (1) {
	result = encodingPtr->fromUtfProc(encodingPtr->clientData, src,
		srcLen, flags | TCL_ENCODING_EXTERNAL, &state, dst, dstLen, &srcRead, &dstWrote,
		&dstChars);
	soFar = dst + dstWrote - Tcl_DStringValue(dstPtr);

	if (result != TCL_CONVERT_NOSPACE) {
	    if (encodingPtr->nullSize == 2) {
		Tcl_DStringSetLength(dstPtr, soFar + 1);
	    }
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
    }
    if (dstCharsPtr == NULL) {
	dstCharsPtr = &dstChars;
    }

    dstLen -= encodingPtr->nullSize;
    result = encodingPtr->fromUtfProc(encodingPtr->clientData, src, srcLen,
	    flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr,
	    dstCharsPtr);
    if (encodingPtr->nullSize == 2) {
	dst[*dstWrotePtr + 1] = '\0';
    }
    dst[*dstWrotePtr] = '\0';

    return result;







|







1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
    }
    if (dstCharsPtr == NULL) {
	dstCharsPtr = &dstChars;
    }

    dstLen -= encodingPtr->nullSize;
    result = encodingPtr->fromUtfProc(encodingPtr->clientData, src, srcLen,
	    flags | TCL_ENCODING_EXTERNAL, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr,
	    dstCharsPtr);
    if (encodingPtr->nullSize == 2) {
	dst[*dstWrotePtr + 1] = '\0';
    }
    dst[*dstWrotePtr] = '\0';

    return result;
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
    memcpy(dst, src, srcLen);
    return result;
}

/*
 *-------------------------------------------------------------------------
 *
 * UtfIntToUtfExtProc --
 *
 *	Convert from UTF-8 to UTF-8. While converting null-bytes from the
 *	Tcl's internal representation (0xC0, 0x80) to the official
 *	representation (0x00). See UtfToUtfProc for details.
 *
 * Results:
 *	Returns TCL_OK if conversion was successful.
 *
 * Side effects:
 *	None.
 *
 *-------------------------------------------------------------------------
 */

static int
UtfIntToUtfExtProc(
    ClientData clientData,
    const char *src,		/* Source string in UTF-8. */
    int srcLen,			/* Source string length in bytes. */
    int flags,			/* Conversion control flags. */
    Tcl_EncodingState *statePtr,/* Place for conversion routine to store state
				 * information used during a piecewise
				 * conversion. Contents of statePtr are
				 * initialized and/or reset by conversion
				 * routine under control of flags argument. */
    char *dst,			/* Output buffer in which converted string
				 * is stored. */
    int dstLen,			/* The maximum length of output buffer in
				 * bytes. */
    int *srcReadPtr,		/* Filled with the number of bytes from the
				 * source string that were converted. This may
				 * be less than the original source length if
				 * there was a problem converting some source
				 * characters. */
    int *dstWrotePtr,		/* Filled with the number of bytes that were
				 * stored in the output buffer as a result of
				 * the conversion. */
    int *dstCharsPtr)		/* Filled with the number of characters that
				 * correspond to the bytes stored in the
				 * output buffer. */
{
    return UtfToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
	    srcReadPtr, dstWrotePtr, dstCharsPtr, 1);
}

/*
 *-------------------------------------------------------------------------
 *
 * UtfExtToUtfIntProc --
 *
 *	Convert from UTF-8 to UTF-8 while converting null-bytes from the
 *	official representation (0x00) to Tcl's internal representation (0xC0,
 *	0x80). See UtfToUtfProc for details.
 *
 * Results:
 *	Returns TCL_OK if conversion was successful.
 *
 * Side effects:
 *	None.
 *
 *-------------------------------------------------------------------------
 */

static int
UtfExtToUtfIntProc(
    ClientData clientData,
    const char *src,		/* Source string in UTF-8. */
    int srcLen,			/* Source string length in bytes. */
    int flags,			/* Conversion control flags. */
    Tcl_EncodingState *statePtr,/* Place for conversion routine to store state
				 * information used during a piecewise
				 * conversion. Contents of statePtr are
				 * initialized and/or reset by conversion
				 * routine under control of flags argument. */
    char *dst,			/* Output buffer in which converted string is
				 * stored. */
    int dstLen,			/* The maximum length of output buffer in
				 * bytes. */
    int *srcReadPtr,		/* Filled with the number of bytes from the
				 * source string that were converted. This may
				 * be less than the original source length if
				 * there was a problem converting some source
				 * characters. */
    int *dstWrotePtr,		/* Filled with the number of bytes that were
				 * stored in the output buffer as a result of
				 * the conversion. */
    int *dstCharsPtr)		/* Filled with the number of characters that
				 * correspond to the bytes stored in the
				 * output buffer. */
{
    return UtfToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
	    srcReadPtr, dstWrotePtr, dstCharsPtr, 0);
}

/*
 *-------------------------------------------------------------------------
 *
 * UtfToUtfProc --
 *
 *	Convert from UTF-8 to UTF-8. Note that the UTF-8 to UTF-8 translation
 *	is not a no-op, because it will turn a stream of improperly formed
 *	UTF-8 into a properly formed stream.
 *
 * Results:







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







2087
2088
2089
2090
2091
2092
2093


































































































2094
2095
2096
2097
2098
2099
2100
    memcpy(dst, src, srcLen);
    return result;
}

/*
 *-------------------------------------------------------------------------
 *


































































































 * UtfToUtfProc --
 *
 *	Convert from UTF-8 to UTF-8. Note that the UTF-8 to UTF-8 translation
 *	is not a no-op, because it will turn a stream of improperly formed
 *	UTF-8 into a properly formed stream.
 *
 * Results:
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
				 * source string that were converted. This may
				 * be less than the original source length if
				 * there was a problem converting some source
				 * characters. */
    int *dstWrotePtr,		/* Filled with the number of bytes that were
				 * stored in the output buffer as a result of
				 * the conversion. */
    int *dstCharsPtr,		/* Filled with the number of characters that
				 * correspond to the bytes stored in the
				 * output buffer. */
    int pureNullMode)		/* Convert embedded nulls from internal
				 * representation to real null-bytes or vice
				 * versa. Also combine or separate surrogate pairs */
{
    const char *srcStart, *srcEnd, *srcClose;
    const char *dstStart, *dstEnd;
    int result, numChars, charLimit = INT_MAX;
    int ch;

    result = TCL_OK;







|


<
<
<







2121
2122
2123
2124
2125
2126
2127
2128
2129
2130



2131
2132
2133
2134
2135
2136
2137
				 * source string that were converted. This may
				 * be less than the original source length if
				 * there was a problem converting some source
				 * characters. */
    int *dstWrotePtr,		/* Filled with the number of bytes that were
				 * stored in the output buffer as a result of
				 * the conversion. */
    int *dstCharsPtr)		/* Filled with the number of characters that
				 * correspond to the bytes stored in the
				 * output buffer. */



{
    const char *srcStart, *srcEnd, *srcClose;
    const char *dstStart, *dstEnd;
    int result, numChars, charLimit = INT_MAX;
    int ch;

    result = TCL_OK;
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
	    result = TCL_CONVERT_MULTIBYTE;
	    break;
	}
	if (dst > dstEnd) {
	    result = TCL_CONVERT_NOSPACE;
	    break;
	}
	if (UCHAR(*src) < 0x80 && !(UCHAR(*src) == 0 && pureNullMode == 0)) {
	    /*
	     * Copy 7bit characters, but skip null-bytes when we are in input
	     * mode, so that they get converted to 0xC080.
	     */

	    *dst++ = *src++;
	} else if (pureNullMode == 1 && UCHAR(*src) == 0xC0 &&
		(src + 1 < srcEnd) && UCHAR(*(src+1)) == 0x80) {
	    /*
	     * Convert 0xC080 to real nulls when we are in output mode.
	     */

	    *dst++ = 0;
	    src += 2;
	} else if (!Tcl_UtfCharComplete(src, srcEnd - src)) {







|






|
|







2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
	    result = TCL_CONVERT_MULTIBYTE;
	    break;
	}
	if (dst > dstEnd) {
	    result = TCL_CONVERT_NOSPACE;
	    break;
	}
	if ((UCHAR(*src - 1) < 0x7F) && !(flags & TCL_ENCODING_EXTERNAL)) {
	    /*
	     * Copy 7bit characters, but skip null-bytes when we are in input
	     * mode, so that they get converted to 0xC080.
	     */

	    *dst++ = *src++;
	} else if (UCHAR(*src) == 0xC0 && (src + 1 < srcEnd)
		&& UCHAR(src[1]) == 0x80 && (flags & TCL_ENCODING_EXTERNAL)) {
	    /*
	     * Convert 0xC080 to real nulls when we are in output mode.
	     */

	    *dst++ = 0;
	    src += 2;
	} else if (!Tcl_UtfCharComplete(src, srcEnd - src)) {
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
 *	None.
 *
 *-------------------------------------------------------------------------
 */

static int
Utf16ToUtfProc(
    ClientData clientData,	/* != NULL means LE, == NUL means BE */
    const char *src,		/* Source string in Unicode. */
    int srcLen,			/* Source string length in bytes. */
    int flags,			/* Conversion control flags. */
    TCL_UNUSED(Tcl_EncodingState *),
    char *dst,			/* Output buffer in which converted string is
				 * stored. */
    int dstLen,			/* The maximum length of output buffer in







|







2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
 *	None.
 *
 *-------------------------------------------------------------------------
 */

static int
Utf16ToUtfProc(
    ClientData clientData,	/* additional flags, e.g. TCL_ENCODING_LE */
    const char *src,		/* Source string in Unicode. */
    int srcLen,			/* Source string length in bytes. */
    int flags,			/* Conversion control flags. */
    TCL_UNUSED(Tcl_EncodingState *),
    char *dst,			/* Output buffer in which converted string is
				 * stored. */
    int dstLen,			/* The maximum length of output buffer in
2358
2359
2360
2361
2362
2363
2364

2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
				 * output buffer. */
{
    const char *srcStart, *srcEnd;
    const char *dstEnd, *dstStart;
    int result, numChars, charLimit = INT_MAX;
    unsigned short ch;


    if (flags & TCL_ENCODING_CHAR_LIMIT) {
	charLimit = *dstCharsPtr;
    }
    result = TCL_OK;

    /* check alignment with utf-16 (2 == sizeof(UTF-16)) */
    if ((srcLen % 2) != 0) {
	result = TCL_CONVERT_MULTIBYTE;
	srcLen--;
    }
    /* If last code point is a high surrogate, we cannot handle that yet */
    if ((srcLen >= 2) && ((src[srcLen - (clientData?1:2)] & 0xFC) == 0xD8)) {
	result = TCL_CONVERT_MULTIBYTE;
	srcLen-= 2;
    }

    srcStart = src;
    srcEnd = src + srcLen;

    dstStart = dst;
    dstEnd = dst + dstLen - TCL_UTF_MAX;

    for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) {
	if (dst > dstEnd) {
	    result = TCL_CONVERT_NOSPACE;
	    break;
	}

	if (clientData) {
	    ch = (src[1] & 0xFF) << 8 | (src[0] & 0xFF);
	} else {
	    ch = (src[0] & 0xFF) << 8 | (src[1] & 0xFF);
	}
	/*
	 * Special case for 1-byte utf chars for speed. Make sure we work with
	 * unsigned short-size data.







>











|
















|







2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
				 * output buffer. */
{
    const char *srcStart, *srcEnd;
    const char *dstEnd, *dstStart;
    int result, numChars, charLimit = INT_MAX;
    unsigned short ch;

    flags |= PTR2INT(clientData);
    if (flags & TCL_ENCODING_CHAR_LIMIT) {
	charLimit = *dstCharsPtr;
    }
    result = TCL_OK;

    /* check alignment with utf-16 (2 == sizeof(UTF-16)) */
    if ((srcLen % 2) != 0) {
	result = TCL_CONVERT_MULTIBYTE;
	srcLen--;
    }
    /* If last code point is a high surrogate, we cannot handle that yet */
    if ((srcLen >= 2) && ((src[srcLen - ((flags & TCL_ENCODING_LE)?1:2)] & 0xFC) == 0xD8)) {
	result = TCL_CONVERT_MULTIBYTE;
	srcLen-= 2;
    }

    srcStart = src;
    srcEnd = src + srcLen;

    dstStart = dst;
    dstEnd = dst + dstLen - TCL_UTF_MAX;

    for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) {
	if (dst > dstEnd) {
	    result = TCL_CONVERT_NOSPACE;
	    break;
	}

	if (flags & TCL_ENCODING_LE) {
	    ch = (src[1] & 0xFF) << 8 | (src[0] & 0xFF);
	} else {
	    ch = (src[0] & 0xFF) << 8 | (src[1] & 0xFF);
	}
	/*
	 * Special case for 1-byte utf chars for speed. Make sure we work with
	 * unsigned short-size data.
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
 *	None.
 *
 *-------------------------------------------------------------------------
 */

static int
UtfToUcs2Proc(
    ClientData clientData,	/* != NULL means LE, == NUL means BE */
    const char *src,		/* Source string in UTF-8. */
    int srcLen,			/* Source string length in bytes. */
    int flags,			/* Conversion control flags. */
    TCL_UNUSED(Tcl_EncodingState *),
    char *dst,			/* Output buffer in which converted string is
				 * stored. */
    int dstLen,			/* The maximum length of output buffer in







|







2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
 *	None.
 *
 *-------------------------------------------------------------------------
 */

static int
UtfToUcs2Proc(
    ClientData clientData,	/* additional flags, e.g. TCL_ENCODING_LE */
    const char *src,		/* Source string in UTF-8. */
    int srcLen,			/* Source string length in bytes. */
    int flags,			/* Conversion control flags. */
    TCL_UNUSED(Tcl_EncodingState *),
    char *dst,			/* Output buffer in which converted string is
				 * stored. */
    int dstLen,			/* The maximum length of output buffer in
2552
2553
2554
2555
2556
2557
2558

2559
2560
2561
2562
2563
2564
2565
    const char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd;
    int result, numChars;
#if TCL_UTF_MAX <= 3
    int len;
#endif
    Tcl_UniChar ch = 0;


    srcStart = src;
    srcEnd = src + srcLen;
    srcClose = srcEnd;
    if ((flags & TCL_ENCODING_END) == 0) {
	srcClose -= TCL_UTF_MAX;
    }








>







2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
    const char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd;
    int result, numChars;
#if TCL_UTF_MAX <= 3
    int len;
#endif
    Tcl_UniChar ch = 0;

    flags |= PTR2INT(clientData);
    srcStart = src;
    srcEnd = src + srcLen;
    srcClose = srcEnd;
    if ((flags & TCL_ENCODING_END) == 0) {
	srcClose -= TCL_UTF_MAX;
    }

2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
#endif

	/*
	 * Need to handle this in a way that won't cause misalignment by
	 * casting dst to a Tcl_UniChar. [Bug 1122671]
	 */

	if (clientData) {
	    *dst++ = (ch & 0xFF);
	    *dst++ = (ch >> 8);
	} else {
	    *dst++ = (ch >> 8);
	    *dst++ = (ch & 0xFF);
	}
    }







|







2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
#endif

	/*
	 * Need to handle this in a way that won't cause misalignment by
	 * casting dst to a Tcl_UniChar. [Bug 1122671]
	 */

	if (flags & TCL_ENCODING_LE) {
	    *dst++ = (ch & 0xFF);
	    *dst++ = (ch >> 8);
	} else {
	    *dst++ = (ch >> 8);
	    *dst++ = (ch & 0xFF);
	}
    }