Tk Library Source Code: Artifact [b5ebdef7ab]

Artifact b5ebdef7abdd5814fe3780f580309b749820c447:

Attachment "word_encode-2.patch" to ticket [763731ffff] added by andreas_kupries 2006-10-26 06:19:41.
--- modules/mime/mime.tcl.orig	2006-10-25 00:20:03.000000000 +0200
+++ modules/mime/mime.tcl	2006-10-25 08:44:02.000000000 +0200
@@ -2410,13 +2410,12 @@
 	    }
 	    append result $line\n
 	}
-
     
-    # Trim off last \n, since the above code has the side-effect
-    # of adding an extra \n to the encoded string and return the result.
-
-    set result [string range $result 0 end-1]
+        # Trim off last \n, since the above code has the side-effect
+        # of adding an extra \n to the encoded string and return the result.
+        set result [string range $result 0 end-1]
     }
+
     # If the string ends in space or tab, replace with =xx
 
     set lastChar [string index $result end]
@@ -3671,11 +3670,15 @@
 #       charset   The character set to encode the message to.
 #       method    The encoding method (base64 or quoted-printable).
 #       string    The string to encode.
+#       ?-charset_encoded   0 or 1      Whether the data is already encoded
+#                                       in the specified charset (default 1)
+#       ?-maxlength         maxlength   The maximum length of each encoded
+#                                       word to return (default 66)
 #
 # Results:
 #	Returns a word encoded string.
 
-proc ::mime::word_encode {charset method string} {
+proc ::mime::word_encode {charset method string {args}} {
 
     variable encodings
 
@@ -3691,13 +3694,83 @@
 	error "unknown method '$method', must be base64 or quoted-printable"
     }
 
-    set result "=?$encodings($charset)?"
+    # default to encoded and a length that won't make the Subject header to long
+    array set options [list -charset_encoded 1 -maxlength 66]
+    array set options $args
+
+    if { $options(-charset_encoded) } {
+    	set unencoded_string [::encoding convertfrom $charset $string]
+    } else {
+        set unencoded_string $string
+    }
+
+    set string_length [string length $unencoded_string]
+    set string_bytelength [string bytelength $unencoded_string]
+
+    # the 7 is for =?, ?Q?, ?= delimiters of the encoded word
+    set maxlength [expr {$options(-maxlength) - [string length $encodings($charset)] - 7}]
     switch -exact -- $method {
 	base64 {
-	    append result "B?[string trimright [base64 -mode encode -- $string] \n]?="
+            if { $maxlength < 4 } {
+                error "maxlength $options(-maxlength) too short for chosen\
+                    charset and encoding"
+            }
+            set count 0
+            set maxlength [expr {($maxlength / 4) * 3}]
+            while { $count < $string_length } {
+                set length 0
+                set enc_string ""
+                while { ($length < $maxlength) && ($count < $string_length) } {
+                    set char [string range $unencoded_string $count $count]
+                    set enc_char [::encoding convertto $charset $char]
+                    if { ($length + [string length $enc_char]) > $maxlength } {
+                        set length $maxlength
+                    } else {
+                        append enc_string $enc_char
+                        incr count
+                        incr length [string length $enc_char]
+                    }
+                }
+                set encoded_word [base64 -mode encode -- $enc_string]
+                append result "=?$encodings($charset)?B?$encoded_word?=\n "
+            }
+            # Trim off last "\n ", since the above code has the side-effect
+            # of adding an extra "\n " to the encoded string.
+
+            set result [string range $result 0 end-2]
 	}
 	quoted-printable {
-	    append result "Q?[qp_encode $string 1]?="
+            if { $maxlength < 1 } {
+                error "maxlength $options(-maxlength) too short for chosen\
+                    charset and encoding"
+            }
+            set count 0
+            while { $count < $string_length } {
+            set length 0
+            set encoded_word ""
+            while { ($length < $maxlength) && ($count < $string_length) } {
+                set char [string range $unencoded_string $count $count]
+                set enc_char [::encoding convertto $charset $char]
+                set qp_enc_char [qp_encode $enc_char 1]
+                set qp_enc_char_length [string length $qp_enc_char]
+                if { $qp_enc_char_length > $maxlength } {
+                    error "maxlength $options(-maxlength) too short for chosen\
+                        charset and encoding"
+                }
+		if { ($length + [string length $qp_enc_char]) > $maxlength } {
+                    set length $maxlength
+                } else {
+                    append encoded_word $qp_enc_char
+                    incr count
+                    incr length [string length $qp_enc_char]
+                }
+            }
+	    append result "=?$encodings($charset)?Q?$encoded_word?=\n "
+            }
+            # Trim off last "\n ", since the above code has the side-effect
+            # of adding an extra "\n " to the encoded string.
+
+            set result [string range $result 0 end-2]
 	}
 	"" {
 	    # Go ahead
@@ -3769,13 +3842,13 @@
 # ::mime::field_decode --
 #
 #    Word decodes strings that have been word encoded as per RFC 2047
-#    and converts the string from UTF to the original encoding/charset.
+#    and converts the string from the original encoding/charset to UTF.
 #
 # Arguments:
 #       field     The string to decode
 #
 # Results:
-#	Returns the decoded string in its original encoding/charset..
+#	Returns the decoded string in UTF.
 
 proc ::mime::field_decode {field} {
     # ::mime::field_decode is broken.  Here's a new version.
--- modules/mime/mime.test.orig	2006-01-31 05:16:45.000000000 +0100
+++ modules/mime/mime.test	2006-10-25 08:43:28.000000000 +0200
@@ -60,7 +60,6 @@
 	    -header [list Content-Description "Test Multipart"] \
 	    -parts [list $tok1 $tok2]]
     set msg [mime::buildmessage $bigTok]
-
     # The generated message is predictable except for the Content-ID
     list [regexp "MIME-Version: 1.0\r
 Content-Description: Test Multipart\r
@@ -266,6 +265,17 @@
 } "Test de caract=E8res accentu=E9s : =E2 =EE =E9 =E7 et quelques contr=F4le=\ns =22=5B=7C=5D()=22"
 
 
+test mime-4.5 {Test qp_encode with softbreak} {
+    set str1 [string repeat abc 40]
+    mime::qp_encode $str1
+} "abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabca=
+bcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc"
+
+test mime-4.6 {Test qp_encode with softbreak} {
+    set str1 [string repeat abc 40]
+    mime::qp_encode $str1 0 1
+} [string repeat abc 40]
+
 
 
 
@@ -295,6 +305,132 @@
 	mime::word_decode "=?ISO-8859-1?b?VGVzdCBsb3dlcmNhc2UgYg==?="
 } {iso8859-1 base64 {Test lowercase b}}
 
+test mime-5.7 {Test word_encode with quoted-printable method across encoded word boundaries} {
+    mime::word_encode iso8859-1 quoted-printable "Test de contrôle effectué" -maxlength 31
+} "=?ISO-8859-1?Q?Test_de_contr?=
+ =?ISO-8859-1?Q?=F4le_effectu?=
+ =?ISO-8859-1?Q?=E9?="
+
+test mime-5.8 {Test word_encode with quoted-printable method across encoded word boundaries} {
+    mime::word_encode iso8859-1 quoted-printable "Test de contrôle effectué" -maxlength 32
+} "=?ISO-8859-1?Q?Test_de_contr?=
+ =?ISO-8859-1?Q?=F4le_effectu?=
+ =?ISO-8859-1?Q?=E9?="
+
+test mime-5.9 {Test word_encode with quoted-printable method and multibyte character} {
+    mime::word_encode euc-jp quoted-printable "Following me is a multibyte character \xA4\xCF"
+} "=?EUC-JP?Q?Following_me_is_a_multibyte_character_=A4=CF?="
+
+set n 10
+while {$n < 14} {
+    test mime-5.$n {Test word_encode with quoted-printable method and multibyte character across encoded word boundary} {
+        mime::word_encode euc-jp quoted-printable "Following me is a multibyte character \xA4\xCF" -maxlength [expr 42 + $n]
+    } "=?EUC-JP?Q?Following_me_is_a_multibyte_character_?=
+ =?EUC-JP?Q?=A4=CF?="
+    incr n
+}
+
+test mime-5.14 {Test word_encode with quoted-printable method and multibyte character (triple)} {
+    mime::word_encode utf-8 quoted-printable "Here is a triple byte encoded character \xE3\x81\xAF"
+} "=?UTF-8?Q?Here_is_a_triple_byte_encoded_character_=E3=81=AF?="
+
+set n 15
+while {$n < 23} {
+    test mime-5.$n {Test word_encode with quoted-printable method and triple byte character across encoded word boundary} {
+        mime::word_encode utf-8 quoted-printable "Here is a triple byte encoded character \xE3\x81\xAF" -maxlength [expr 38 + $n]
+    } "=?UTF-8?Q?Here_is_a_triple_byte_encoded_character_?=
+ =?UTF-8?Q?=E3=81=AF?="
+    incr n
+}
+
+while {$n < 25} {
+    test mime-5.$n {Test word_encode with quoted-printable method and triple byte character across encoded word boundary} {
+        mime::word_encode utf-8 quoted-printable "Here is a triple byte encoded character \xE3\x81\xAF" -maxlength [expr 38 + $n]
+    } "=?UTF-8?Q?Here_is_a_triple_byte_encoded_character_=E3=81=AF?="
+    incr n
+}
+
+while {$n < 29} {
+    test mime-5.$n {Test word_encode with base64 method across encoded word boundaries} {
+        mime::word_encode euc-jp base64 "There is a multibyte character \xA4\xCF" -maxlength [expr 28 + $n]
+    } "=?EUC-JP?B?VGhlcmUgaXMgYSBtdWx0aWJ5dGUgY2hhcmFjdGVy?=
+ =?EUC-JP?B?IKTP?="
+    incr n
+}
+
+while {$n < 33} {
+    test mime-5.$n {Test word_encode with base64 method and triple byte character across encoded word boundary} {
+        mime::word_encode utf-8 base64 "Here is a multibyte character \xE3\x81\xAF" -maxlength [expr 23 + $n]
+    } "=?UTF-8?B?SGVyZSBpcyBhIG11bHRpYnl0ZSBjaGFyYWN0ZXIg?=
+ =?UTF-8?B?44Gv?="
+    incr n
+}
+
+test mime-5.33 {Test word_encode with quoted-printable method and -maxlength set to same length as will the result} {
+    mime::word_encode iso8859-1 quoted-printable "123" -maxlength 20
+} "=?ISO-8859-1?Q?123?="
+
+test mime-5.34 {Test word_encode with base64 method and -maxlength set to same length as will the result} {
+    mime::word_encode iso8859-1 base64 "123" -maxlength 21
+} "=?ISO-8859-1?B?MTIz?="
+
+test mime-5.35 {Test word_encode with quoted-printable method and non charset encoded string} {
+    mime::word_encode utf-8 quoted-printable "\u306F" -charset_encoded 0
+} "=?UTF-8?Q?=E3=81=AF?="
+
+test mime-5.36 {Test word_encode with base64 method and non charset encoded string} {
+    mime::word_encode utf-8 base64 "\u306F" -charset_encoded 0
+} "=?UTF-8?B?44Gv?="
+
+test mime-5.36 {Test word_encode with base64 method and one byte} {
+    mime::word_encode iso8859-1 base64 "a"
+} "=?ISO-8859-1?B?YQ==?="
+
+test mime-5.37 {Test word_encode with base64 method and two bytes} {
+    mime::word_encode euc-jp base64 "\xA4\xCF"
+} "=?EUC-JP?B?pM8=?="
+
+test mime-5.38 {Test word_encode with unknown charset} {
+    catch {mime::word_encode scribble  quoted-printable "scribble is an unknown charset"} errmsg
+    set errmsg
+} "unknown charset 'scribble'"
+
+test mime-5.39 {Test word_encode with invalid charset} {
+    catch {mime::word_encode unicode quoted-printable "unicode is not a valid charset"} errmsg
+    set errmsg
+} "invalid charset 'unicode'"
+
+test mime-5.40 {Test word_encode with invalid method} {
+    catch {mime::word_encode iso8859-1 tea-leaf "tea-leaf is not a valid method"} errmsg
+    set errmsg
+} "unknown method 'tea-leaf', must be base64 or quoted-printable"
+
+test mime-5.41 {Test word_encode with maxlength to short for method quoted-printable} {
+    catch {mime::word_encode iso8859-1 quoted-printable "1" -maxlength 17} errmsg
+    set errmsg
+} "maxlength 17 too short for chosen charset and encoding"
+
+test mime-5.42 {Test word_encode with maxlength on the limit for quoted_printable and an unquoted character} {
+   catch {mime::word_encode iso8859-1 quoted-printable "_" -maxlength 18} errmsg
+   set errmsg
+} "=?ISO-8859-1?Q?_?="
+
+test mime-5.43 {Test word_encode with maxlength to short for method quoted_printable and a character to be quoted} {
+   catch {mime::word_encode iso8859-1 quoted-printable "=" -maxlength 18} errmsg
+   set errmsg
+} "maxlength 18 too short for chosen charset and encoding"
+
+
+test mime-5.44 {Test word_encode with maxlength to short for method quoted-printable and multibyte character} {
+    catch {mime::word_encode euc-jp quoted-printable "\xA4\xCF" -maxlength 17} errmsg
+    set errmsg
+} "maxlength 17 too short for chosen charset and encoding"
+
+test mime-5.45 {Test word_encode with maxlength to short for method base64} {
+    catch {mime::word_encode iso8859-1 base64 "1" -maxlength 20} errmsg
+    set errmsg
+} "maxlength 20 too short for chosen charset and encoding"
+
 test mime-6.1 {Test field_decode (from RFC 2047, part 8)} {
     mime::field_decode {=?US-ASCII?Q?Keith_Moore?= <[email protected]>}
 } {Keith Moore <[email protected]>}