Attachment "word_encode-2.patch" to
ticket [763731ffff]
added by
andreas_kupries
2006-10-26 06:19:41.
--- modules/mime/mime.tcl.orig 2006-10-25 00:20:03.000000000 +0200
+++ modules/mime/mime.tcl 2006-10-25 08:44:02.000000000 +0200
@@ -2410,13 +2410,12 @@
}
append result $line\n
}
-
- # Trim off last \n, since the above code has the side-effect
- # of adding an extra \n to the encoded string and return the result.
-
- set result [string range $result 0 end-1]
+ # Trim off last \n, since the above code has the side-effect
+ # of adding an extra \n to the encoded string and return the result.
+ set result [string range $result 0 end-1]
}
+
# If the string ends in space or tab, replace with =xx
set lastChar [string index $result end]
@@ -3671,11 +3670,15 @@
# charset The character set to encode the message to.
# method The encoding method (base64 or quoted-printable).
# string The string to encode.
+# ?-charset_encoded 0 or 1 Whether the data is already encoded
+# in the specified charset (default 1)
+# ?-maxlength maxlength The maximum length of each encoded
+# word to return (default 66)
#
# Results:
# Returns a word encoded string.
-proc ::mime::word_encode {charset method string} {
+proc ::mime::word_encode {charset method string {args}} {
variable encodings
@@ -3691,13 +3694,83 @@
error "unknown method '$method', must be base64 or quoted-printable"
}
- set result "=?$encodings($charset)?"
+ # default to encoded and a length that won't make the Subject header to long
+ array set options [list -charset_encoded 1 -maxlength 66]
+ array set options $args
+
+ if { $options(-charset_encoded) } {
+ set unencoded_string [::encoding convertfrom $charset $string]
+ } else {
+ set unencoded_string $string
+ }
+
+ set string_length [string length $unencoded_string]
+ set string_bytelength [string bytelength $unencoded_string]
+
+ # the 7 is for =?, ?Q?, ?= delimiters of the encoded word
+ set maxlength [expr {$options(-maxlength) - [string length $encodings($charset)] - 7}]
switch -exact -- $method {
base64 {
- append result "B?[string trimright [base64 -mode encode -- $string] \n]?="
+ if { $maxlength < 4 } {
+ error "maxlength $options(-maxlength) too short for chosen\
+ charset and encoding"
+ }
+ set count 0
+ set maxlength [expr {($maxlength / 4) * 3}]
+ while { $count < $string_length } {
+ set length 0
+ set enc_string ""
+ while { ($length < $maxlength) && ($count < $string_length) } {
+ set char [string range $unencoded_string $count $count]
+ set enc_char [::encoding convertto $charset $char]
+ if { ($length + [string length $enc_char]) > $maxlength } {
+ set length $maxlength
+ } else {
+ append enc_string $enc_char
+ incr count
+ incr length [string length $enc_char]
+ }
+ }
+ set encoded_word [base64 -mode encode -- $enc_string]
+ append result "=?$encodings($charset)?B?$encoded_word?=\n "
+ }
+ # Trim off last "\n ", since the above code has the side-effect
+ # of adding an extra "\n " to the encoded string.
+
+ set result [string range $result 0 end-2]
}
quoted-printable {
- append result "Q?[qp_encode $string 1]?="
+ if { $maxlength < 1 } {
+ error "maxlength $options(-maxlength) too short for chosen\
+ charset and encoding"
+ }
+ set count 0
+ while { $count < $string_length } {
+ set length 0
+ set encoded_word ""
+ while { ($length < $maxlength) && ($count < $string_length) } {
+ set char [string range $unencoded_string $count $count]
+ set enc_char [::encoding convertto $charset $char]
+ set qp_enc_char [qp_encode $enc_char 1]
+ set qp_enc_char_length [string length $qp_enc_char]
+ if { $qp_enc_char_length > $maxlength } {
+ error "maxlength $options(-maxlength) too short for chosen\
+ charset and encoding"
+ }
+ if { ($length + [string length $qp_enc_char]) > $maxlength } {
+ set length $maxlength
+ } else {
+ append encoded_word $qp_enc_char
+ incr count
+ incr length [string length $qp_enc_char]
+ }
+ }
+ append result "=?$encodings($charset)?Q?$encoded_word?=\n "
+ }
+ # Trim off last "\n ", since the above code has the side-effect
+ # of adding an extra "\n " to the encoded string.
+
+ set result [string range $result 0 end-2]
}
"" {
# Go ahead
@@ -3769,13 +3842,13 @@
# ::mime::field_decode --
#
# Word decodes strings that have been word encoded as per RFC 2047
-# and converts the string from UTF to the original encoding/charset.
+# and converts the string from the original encoding/charset to UTF.
#
# Arguments:
# field The string to decode
#
# Results:
-# Returns the decoded string in its original encoding/charset..
+# Returns the decoded string in UTF.
proc ::mime::field_decode {field} {
# ::mime::field_decode is broken. Here's a new version.
--- modules/mime/mime.test.orig 2006-01-31 05:16:45.000000000 +0100
+++ modules/mime/mime.test 2006-10-25 08:43:28.000000000 +0200
@@ -60,7 +60,6 @@
-header [list Content-Description "Test Multipart"] \
-parts [list $tok1 $tok2]]
set msg [mime::buildmessage $bigTok]
-
# The generated message is predictable except for the Content-ID
list [regexp "MIME-Version: 1.0\r
Content-Description: Test Multipart\r
@@ -266,6 +265,17 @@
} "Test de caract=E8res accentu=E9s : =E2 =EE =E9 =E7 et quelques contr=F4le=\ns =22=5B=7C=5D()=22"
+test mime-4.5 {Test qp_encode with softbreak} {
+ set str1 [string repeat abc 40]
+ mime::qp_encode $str1
+} "abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabca=
+bcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc"
+
+test mime-4.6 {Test qp_encode with softbreak} {
+ set str1 [string repeat abc 40]
+ mime::qp_encode $str1 0 1
+} [string repeat abc 40]
+
@@ -295,6 +305,132 @@
mime::word_decode "=?ISO-8859-1?b?VGVzdCBsb3dlcmNhc2UgYg==?="
} {iso8859-1 base64 {Test lowercase b}}
+test mime-5.7 {Test word_encode with quoted-printable method across encoded word boundaries} {
+ mime::word_encode iso8859-1 quoted-printable "Test de contrôle effectué" -maxlength 31
+} "=?ISO-8859-1?Q?Test_de_contr?=
+ =?ISO-8859-1?Q?=F4le_effectu?=
+ =?ISO-8859-1?Q?=E9?="
+
+test mime-5.8 {Test word_encode with quoted-printable method across encoded word boundaries} {
+ mime::word_encode iso8859-1 quoted-printable "Test de contrôle effectué" -maxlength 32
+} "=?ISO-8859-1?Q?Test_de_contr?=
+ =?ISO-8859-1?Q?=F4le_effectu?=
+ =?ISO-8859-1?Q?=E9?="
+
+test mime-5.9 {Test word_encode with quoted-printable method and multibyte character} {
+ mime::word_encode euc-jp quoted-printable "Following me is a multibyte character \xA4\xCF"
+} "=?EUC-JP?Q?Following_me_is_a_multibyte_character_=A4=CF?="
+
+set n 10
+while {$n < 14} {
+ test mime-5.$n {Test word_encode with quoted-printable method and multibyte character across encoded word boundary} {
+ mime::word_encode euc-jp quoted-printable "Following me is a multibyte character \xA4\xCF" -maxlength [expr 42 + $n]
+ } "=?EUC-JP?Q?Following_me_is_a_multibyte_character_?=
+ =?EUC-JP?Q?=A4=CF?="
+ incr n
+}
+
+test mime-5.14 {Test word_encode with quoted-printable method and multibyte character (triple)} {
+ mime::word_encode utf-8 quoted-printable "Here is a triple byte encoded character \xE3\x81\xAF"
+} "=?UTF-8?Q?Here_is_a_triple_byte_encoded_character_=E3=81=AF?="
+
+set n 15
+while {$n < 23} {
+ test mime-5.$n {Test word_encode with quoted-printable method and triple byte character across encoded word boundary} {
+ mime::word_encode utf-8 quoted-printable "Here is a triple byte encoded character \xE3\x81\xAF" -maxlength [expr 38 + $n]
+ } "=?UTF-8?Q?Here_is_a_triple_byte_encoded_character_?=
+ =?UTF-8?Q?=E3=81=AF?="
+ incr n
+}
+
+while {$n < 25} {
+ test mime-5.$n {Test word_encode with quoted-printable method and triple byte character across encoded word boundary} {
+ mime::word_encode utf-8 quoted-printable "Here is a triple byte encoded character \xE3\x81\xAF" -maxlength [expr 38 + $n]
+ } "=?UTF-8?Q?Here_is_a_triple_byte_encoded_character_=E3=81=AF?="
+ incr n
+}
+
+while {$n < 29} {
+ test mime-5.$n {Test word_encode with base64 method across encoded word boundaries} {
+ mime::word_encode euc-jp base64 "There is a multibyte character \xA4\xCF" -maxlength [expr 28 + $n]
+ } "=?EUC-JP?B?VGhlcmUgaXMgYSBtdWx0aWJ5dGUgY2hhcmFjdGVy?=
+ =?EUC-JP?B?IKTP?="
+ incr n
+}
+
+while {$n < 33} {
+ test mime-5.$n {Test word_encode with base64 method and triple byte character across encoded word boundary} {
+ mime::word_encode utf-8 base64 "Here is a multibyte character \xE3\x81\xAF" -maxlength [expr 23 + $n]
+ } "=?UTF-8?B?SGVyZSBpcyBhIG11bHRpYnl0ZSBjaGFyYWN0ZXIg?=
+ =?UTF-8?B?44Gv?="
+ incr n
+}
+
+test mime-5.33 {Test word_encode with quoted-printable method and -maxlength set to same length as will the result} {
+ mime::word_encode iso8859-1 quoted-printable "123" -maxlength 20
+} "=?ISO-8859-1?Q?123?="
+
+test mime-5.34 {Test word_encode with base64 method and -maxlength set to same length as will the result} {
+ mime::word_encode iso8859-1 base64 "123" -maxlength 21
+} "=?ISO-8859-1?B?MTIz?="
+
+test mime-5.35 {Test word_encode with quoted-printable method and non charset encoded string} {
+ mime::word_encode utf-8 quoted-printable "\u306F" -charset_encoded 0
+} "=?UTF-8?Q?=E3=81=AF?="
+
+test mime-5.36 {Test word_encode with base64 method and non charset encoded string} {
+ mime::word_encode utf-8 base64 "\u306F" -charset_encoded 0
+} "=?UTF-8?B?44Gv?="
+
+test mime-5.36 {Test word_encode with base64 method and one byte} {
+ mime::word_encode iso8859-1 base64 "a"
+} "=?ISO-8859-1?B?YQ==?="
+
+test mime-5.37 {Test word_encode with base64 method and two bytes} {
+ mime::word_encode euc-jp base64 "\xA4\xCF"
+} "=?EUC-JP?B?pM8=?="
+
+test mime-5.38 {Test word_encode with unknown charset} {
+ catch {mime::word_encode scribble quoted-printable "scribble is an unknown charset"} errmsg
+ set errmsg
+} "unknown charset 'scribble'"
+
+test mime-5.39 {Test word_encode with invalid charset} {
+ catch {mime::word_encode unicode quoted-printable "unicode is not a valid charset"} errmsg
+ set errmsg
+} "invalid charset 'unicode'"
+
+test mime-5.40 {Test word_encode with invalid method} {
+ catch {mime::word_encode iso8859-1 tea-leaf "tea-leaf is not a valid method"} errmsg
+ set errmsg
+} "unknown method 'tea-leaf', must be base64 or quoted-printable"
+
+test mime-5.41 {Test word_encode with maxlength to short for method quoted-printable} {
+ catch {mime::word_encode iso8859-1 quoted-printable "1" -maxlength 17} errmsg
+ set errmsg
+} "maxlength 17 too short for chosen charset and encoding"
+
+test mime-5.42 {Test word_encode with maxlength on the limit for quoted_printable and an unquoted character} {
+ catch {mime::word_encode iso8859-1 quoted-printable "_" -maxlength 18} errmsg
+ set errmsg
+} "=?ISO-8859-1?Q?_?="
+
+test mime-5.43 {Test word_encode with maxlength to short for method quoted_printable and a character to be quoted} {
+ catch {mime::word_encode iso8859-1 quoted-printable "=" -maxlength 18} errmsg
+ set errmsg
+} "maxlength 18 too short for chosen charset and encoding"
+
+
+test mime-5.44 {Test word_encode with maxlength to short for method quoted-printable and multibyte character} {
+ catch {mime::word_encode euc-jp quoted-printable "\xA4\xCF" -maxlength 17} errmsg
+ set errmsg
+} "maxlength 17 too short for chosen charset and encoding"
+
+test mime-5.45 {Test word_encode with maxlength to short for method base64} {
+ catch {mime::word_encode iso8859-1 base64 "1" -maxlength 20} errmsg
+ set errmsg
+} "maxlength 20 too short for chosen charset and encoding"
+
test mime-6.1 {Test field_decode (from RFC 2047, part 8)} {
mime::field_decode {=?US-ASCII?Q?Keith_Moore?= <[email protected]>}
} {Keith Moore <[email protected]>}