Index: generic/tkSelect.c ================================================================== --- generic/tkSelect.c +++ generic/tkSelect.c @@ -24,11 +24,11 @@ Tcl_Interp *interp; /* Interpreter in which to invoke command. */ int cmdLength; /* # of non-NULL bytes in command. */ int charOffset; /* The offset of the next char to retrieve. */ int byteOffset; /* The expected byte offset of the next * chunk. */ - char buffer[TCL_UTF_MAX]; /* A buffer to hold part of a UTF character + char buffer[4]; /* A buffer to hold part of a UTF character * that is split across chunks. */ char command[1]; /* Command to invoke. Actual space is * allocated as large as necessary. This must * be the last entry in the structure. */ } CommandInfo; Index: generic/tkUtil.c ================================================================== --- generic/tkUtil.c +++ generic/tkUtil.c @@ -1185,11 +1185,11 @@ event.general.xany.send_event = False; event.general.xany.window = Tk_WindowId(target); event.general.xany.display = Tk_Display(target); event.virtual.name = Tk_GetUid(eventName); if (detail != NULL) { - event.virtual.user_data = detail; + event.virtual.user_data = detail; } Tk_QueueWindowEvent(&event.general, TCL_QUEUE_TAIL); } Index: macosx/tkMacOSXClipboard.c ================================================================== --- macosx/tkMacOSXClipboard.c +++ macosx/tkMacOSXClipboard.c @@ -26,26 +26,38 @@ { NSMutableString *string = [NSMutableString new]; if (dispPtr && dispPtr->clipboardActive && [type isEqualToString:NSStringPboardType]) { + Tcl_Encoding utf8 = Tcl_GetEncoding(NULL, "utf-8"); + Tcl_DString ds; + + Tcl_DStringInit(&ds); for (TkClipboardTarget *targetPtr = dispPtr->clipTargetPtr; targetPtr; targetPtr = targetPtr->nextPtr) { if (targetPtr->type == XA_STRING || targetPtr->type == dispPtr->utf8Atom) { for (TkClipboardBuffer *cbPtr = targetPtr->firstBufferPtr; cbPtr; cbPtr = cbPtr->nextPtr) { - NSString *s = [[NSString alloc] initWithBytesNoCopy: - cbPtr->buffer length:cbPtr->length + char *p = Tcl_UtfToExternalDString(utf8, cbPtr->buffer, + cbPtr->length, &ds); + int len = Tcl_DStringLength(&ds); + NSString *s = [[NSString alloc] initWithBytesNoCopy:p + length:len encoding:NSUTF8StringEncoding freeWhenDone:NO]; - [string appendString:s]; - [s release]; + if (s) { + [string appendString:s]; + [s release]; + } + Tcl_DStringSetLength(&ds, 0); } break; } } + Tcl_DStringFree(&ds); + Tcl_FreeEncoding(utf8); } [sender setString:string forType:type]; [string release]; } @@ -129,15 +141,24 @@ || target == dispPtr->utf8Atom)) { NSString *string = nil; NSPasteboard *pb = [NSPasteboard generalPasteboard]; NSString *type = [pb availableTypeFromArray:[NSArray arrayWithObject: NSStringPboardType]]; + Tcl_DString ds; if (type) { string = [pb stringForType:type]; } - result = proc(clientData, interp, string ? [string UTF8String] : ""); + Tcl_DStringInit(&ds); + if (string) { + Tcl_Encoding utf8 = Tcl_GetEncoding(NULL, "utf-8"); + + Tcl_ExternalToUtfDString(utf8, [string UTF8String], -1, &ds); + Tcl_FreeEncoding(utf8); + } + result = proc(clientData, interp, Tcl_DStringValue(&ds)); + Tcl_DStringFree(&ds); } else { Tcl_SetObjResult(interp, Tcl_ObjPrintf( "%s selection doesn't exist or form \"%s\" not defined", Tk_GetAtomName(tkwin, selection), Tk_GetAtomName(tkwin, target))); Index: macosx/tkMacOSXFont.c ================================================================== --- macosx/tkMacOSXFont.c +++ macosx/tkMacOSXFont.c @@ -117,10 +117,188 @@ ((faPtr)->slant == TK_FS_ITALIC ? NSItalicFontMask : NSUnitalicFontMask) /* *--------------------------------------------------------------------------- * + * NumUTF16Chars -- + * + * Like Tcl_NumUtfChars() but result is count of UTF16Chars, + * i.e. a surrogate pair counts as two UTF16Chars and not + * as a single entity. + * + * Results: + * As above. + * + * Side effects: + * None. + * + *--------------------------------------------------------------------------- + */ + +#if TCL_UTF_MAX <= 4 + +/* No special code for BMP needed. */ +#define NumUTF16Chars Tcl_NumUtfChars + +#else + +static int +NumUTF16Chars( + const char *src, /* The UTF-8 string to measure. */ + int length) /* The length of the string in bytes, or -1 + * for strlen(string). */ +{ + Tcl_UniChar ch = 0; + int i = 0; + + if (length < 0) { + while (*src != '\0') { + src += Tcl_UtfToUniChar(src, &ch); + if (ch > 0xFFFF) { + /* A surrogate pair in UTF16Char representation. */ + i++; + } + i++; + } + if (i < 0) { + i = INT_MAX; + } + } else { + const char *endPtr = src + length - 4; + + while (src < endPtr) { + src += Tcl_UtfToUniChar(src, &ch); + if (ch > 0xFFFF) { + /* A surrogate pair in UTF16Char representation. */ + i++; + } + i++; + } + endPtr += 4; + while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) { + src += Tcl_UtfToUniChar(src, &ch); + if (ch > 0xFFFF) { + /* A surrogate pair in UTF16Char representation. */ + i++; + } + i++; + } + if (src < endPtr) { + i += endPtr - src; + } + } + return i; +} + +#endif + +/* + *--------------------------------------------------------------------------- + * + * UTF16CharAtIndex + * + * Like Tcl_UtfAtIndex() but counting in UTF16Char entities. + * Returns a pointer to the specified character position in + * the given UTF-8 string. + * + * Results: + * As above. + * + * Side effects: + * None. + * + *--------------------------------------------------------------------------- + */ + +#if TCL_UTF_MAX <= 4 + +/* No special code for BMP needed. */ +#define UTF16CharAtIndex Tcl_UtfAtIndex + +#else + +static const char * +UTF16CharAtIndex( + const char *src, /* The UTF-8 string. */ + int index) /* The position of the desired character. */ +{ + Tcl_UniChar ch = 0; + + while (index > 0) { + --index; + src += Tcl_UtfToUniChar(src, &ch); + if (ch > 0xFFFF) { + /* A surrogate pair in UTF16Char representation. */ + --index; + } + } + return src; +} + +#endif + +/* + *--------------------------------------------------------------------------- + * + * UtfToUTF16DString -- + * + * Convert an UTF-8 string to an UTF16Char string using the provided + * Tcl_DString as result buffer. Invalid data is silently replaced + * with "\uFFFD". The Tcl_DString is initialized by this function + * and must be free'd by the caller. + * + * Results: + * Pointer to UTF16Char string. + * + * Side effects: + * None. + * + *--------------------------------------------------------------------------- + */ + +static UTF16Char * +UtfToUTF16DString( + const char *src, /* Input UTF-8 string to be converted. + * Need not be '\0' terminated. */ + int numBytes, /* Maximum number of bytes to consider from + * source string in all. */ + Tcl_DString *dsPtr, /* Tcl_DString receiving the result. */ + int *lengthPtr) /* Number of UTF16Chars in result buffer. */ +{ + Tcl_UniChar ch = 0; + UTF16Char utf16; + const char *end; + + Tcl_DStringInit(dsPtr); + if (numBytes > 0) { + Tcl_DStringSetLength(dsPtr, numBytes * sizeof(utf16)); + Tcl_DStringSetLength(dsPtr, 0); + } + end = src + numBytes; + while (src < end) { + int len = Tcl_UtfToUniChar(src, &ch); + + utf16 = (UTF16Char)ch; +#if TCL_UTF_MAX > 4 + if (ch >= 0xD800 && ch <= 0xDFFF) { + utf16 = 0xFFFD; + } else if (ch > 0xFFFF) { + utf16 = (((ch - 0x10000) >> 10) & 0x3FF) | 0xD800; + Tcl_DStringAppend(dsPtr, (char *) &utf16, sizeof(utf16)); + utf16 = ((ch - 0x10000) & 0x3FF) | 0xDC00; + } +#endif + Tcl_DStringAppend(dsPtr, (char *) &utf16, sizeof(utf16)); + src += len; + } + *lengthPtr = Tcl_DStringLength(dsPtr) / sizeof(utf16); + return (UTF16Char *) Tcl_DStringValue(dsPtr); +} + +/* + *--------------------------------------------------------------------------- + * * GetTkFontAttributesForNSFont -- * * Fill in TkFontAttributes for given NSFont. * * Results: @@ -677,14 +855,22 @@ { MacFont *fontPtr = (MacFont *) tkfont; NSFont *nsFont = fontPtr->nsFont; *faPtr = fontPtr->font.fa; if (nsFont && ![[nsFont coveredCharacterSet] characterIsMember:c]) { - UTF16Char ch = (UTF16Char) c; + UTF16Char ch[2]; - nsFont = [nsFont bestMatchingFontForCharacters:&ch - length:1 attributes:nil actualCoveredLength:NULL]; + if (c > 0xFFFF) { + ch[0] = (((c - 0x10000) >> 10) & 0x3FF) | 0xD800; + ch[1] = ((c - 0x10000) & 0x3FF) | 0xDC00; + nsFont = [nsFont bestMatchingFontForCharacters:ch + length:2 attributes:nil actualCoveredLength:NULL]; + } else { + ch[0] = (UTF16Char) c; + nsFont = [nsFont bestMatchingFontForCharacters:ch + length:1 attributes:nil actualCoveredLength:NULL]; + } if (nsFont) { GetTkFontAttributesForNSFont(nsFont, faPtr); } } } @@ -809,11 +995,13 @@ CFRange range = {0, 0}; CTLineRef line; CGFloat offset = 0; CFIndex index; double width; - int length, fit; + int length, fit, utf16Len; + UTF16Char *utf16String; + Tcl_DString ds; if (rangeStart < 0 || rangeLength <= 0 || rangeStart + rangeLength > numBytes || (maxLength == 0 && !(flags & TK_AT_LEAST_ONE))) { *lengthPtr = 0; @@ -820,23 +1008,24 @@ return 0; } if (maxLength > 32767) { maxLength = 32767; } - string = [[NSString alloc] initWithBytesNoCopy:(void*)source - length:numBytes encoding:NSUTF8StringEncoding freeWhenDone:NO]; + utf16String = UtfToUTF16DString(source, numBytes, &ds, &utf16Len); + string = [[NSString alloc] initWithCharactersNoCopy:(void*)utf16String + length:utf16Len freeWhenDone:NO]; if (!string) { length = 0; fit = rangeLength; goto done; } attributedString = [[NSAttributedString alloc] initWithString:string attributes:fontPtr->nsAttributes]; typesetter = CTTypesetterCreateWithAttributedString( (CFAttributedStringRef)attributedString); - start = Tcl_NumUtfChars(source, rangeStart); - len = Tcl_NumUtfChars(source + rangeStart, rangeLength); + start = NumUTF16Chars(source, rangeStart); + len = NumUTF16Chars(source, rangeStart + rangeLength); if (start > 0) { range.length = start; line = CTTypesetterCreateLine(typesetter, range); offset = CTLineGetTypographicBounds(line, NULL, NULL, NULL); CFRelease(line); @@ -899,12 +1088,13 @@ } CFRelease(typesetter); [attributedString release]; [string release]; length = ceil(width - offset); - fit = (Tcl_UtfAtIndex(source, index) - source) - rangeStart; + fit = (UTF16CharAtIndex(source, index) - source) - rangeStart; done: + Tcl_DStringFree(&ds); #ifdef TK_MAC_DEBUG_FONTS TkMacOSXDbgMsg("measure: source=\"%s\" range=\"%.*s\" maxLength=%d " "flags='%s%s%s%s' -> width=%d bytesFit=%d\n", source, rangeLength, source+rangeStart, maxLength, flags & TK_PARTIAL_OK ? "partialOk " : "", @@ -1059,20 +1249,24 @@ TkMacOSXDrawingContext drawingContext; CGContextRef context; CGColorRef fg; NSFont *nsFont; CGAffineTransform t; - int h; + int h, utf16Len; + Tcl_DString ds; + UTF16Char *utf16String; if (rangeStart < 0 || rangeLength <= 0 || rangeStart + rangeLength > numBytes || !TkMacOSXSetupDrawingContext(drawable, gc, 1, &drawingContext)) { return; } - string = [[NSString alloc] initWithBytesNoCopy:(void*)source - length:numBytes encoding:NSUTF8StringEncoding freeWhenDone:NO]; + utf16String = UtfToUTF16DString(source, numBytes, &ds, &utf16Len); + string = [[NSString alloc] initWithCharactersNoCopy:(void*)utf16String + length:utf16Len freeWhenDone:NO]; if (!string) { + Tcl_DStringFree(&ds); return; } context = drawingContext.context; fg = TkMacOSXCreateCGColor(gc, gc->foreground); attributes = [fontPtr->nsAttributes mutableCopy]; @@ -1095,12 +1289,12 @@ t = CGAffineTransformTranslate(CGAffineTransformRotate( CGAffineTransformTranslate(t, x, y), angle*PI/180.0), -x, -y); } CGContextConcatCTM(context, t); CGContextSetTextPosition(context, x, y); - start = Tcl_NumUtfChars(source, rangeStart); - len = Tcl_NumUtfChars(source, rangeStart + rangeLength); + start = NumUTF16Chars(source, rangeStart); + len = NumUTF16Chars(source, rangeStart + rangeLength); if (start > 0) { CGRect clipRect = CGRectInfinite, startBounds; line = CTTypesetterCreateLine(typesetter, CFRangeMake(0, start)); startBounds = CTLineGetImageBounds(line, context); CFRelease(line); @@ -1112,10 +1306,11 @@ CFRelease(line); CFRelease(typesetter); [attributedString release]; [string release]; [attributes release]; + Tcl_DStringFree(&ds); TkMacOSXRestoreDrawingContext(&drawingContext); } #pragma mark - #pragma mark Accessors: Index: unix/tkUnixFont.c ================================================================== --- unix/tkUnixFont.c +++ unix/tkUnixFont.c @@ -519,11 +519,11 @@ srcStart = src; srcEnd = src + srcLen; dstStart = dst; - dstEnd = dst + dstLen - TCL_UTF_MAX; + dstEnd = dst + dstLen - 4; for (numChars = 0; src < srcEnd; numChars++) { if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; break; @@ -594,11 +594,11 @@ srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; if (!(flags & TCL_ENCODING_END)) { - srcClose -= TCL_UTF_MAX; + srcClose -= 4; } dstStart = dst; dstEnd = dst + dstLen - 2 /* sizeof(UCS-2) */; @@ -2211,11 +2211,11 @@ SubFont *subFontPtr, /* Contains font mapping cache to be * updated. */ int row) /* Index of the page to be loaded into the * cache. */ { - char buf[16], src[TCL_UTF_MAX]; + char buf[16], src[4]; int minHi, maxHi, minLo, maxLo, scale, checkLo; int i, end, bitOffset, isTwoByteFont, n; Tcl_Encoding encoding; XFontStruct *fontStructPtr; XCharStruct *widths; @@ -2415,11 +2415,11 @@ Tk_Uid hateFoundry; const char *charset, *hateCharset; unsigned bestScore[2]; char **nameList; char **nameListOrig; - char src[TCL_UTF_MAX]; + char src[4]; FontAttributes want, got; Display *display; SubFont subFont; XFontStruct *fontStructPtr; Tcl_DString dsEncodings; Index: unix/tkUnixSelect.c ================================================================== --- unix/tkUnixSelect.c +++ unix/tkUnixSelect.c @@ -19,11 +19,11 @@ * means only the final zero-length transfer * still has to be done. Otherwise it is the * offset of the next chunk of data to * transfer. */ Tcl_EncodingState state; /* The encoding state needed across chunks. */ - char buffer[TCL_UTF_MAX]; /* A buffer to hold part of a UTF character + char buffer[4]; /* A buffer to hold part of a UTF character * that is split across chunks.*/ } ConvertInfo; /* * When handling INCR-style selection retrievals, the selection owner uses the @@ -444,11 +444,11 @@ /* * Preserve any left-over bytes. */ - if (srcLen > TCL_UTF_MAX) { + if (srcLen > 4) { Tcl_Panic("selection conversion left too many bytes unconverted"); } memcpy(incrPtr->converts[i].buffer, src, (size_t) srcLen+1); Tcl_DStringFree(&ds); } else {