Attachment "word_encode.patch" to
ticket [763731ffff]
added by
gunzel
2003-07-01 14:44:54.
? modules/mime/mime.tcl.patch
Index: modules/mime//mime.tcl
===================================================================
RCS file: /cvsroot/tcllib/tcllib/modules/mime/mime.tcl,v
retrieving revision 1.33
diff -u -r1.33 mime.tcl
--- modules/mime//mime.tcl 25 Jun 2003 20:54:59 -0000 1.33
+++ modules/mime//mime.tcl 1 Jul 2003 06:42:54 -0000
@@ -3448,11 +3609,15 @@
# charset The character set to encode the message to.
# method The encoding method (base64 or quoted-printable).
# string The string to encode.
+# ?-charset_encoded 0 or 1 Whether the data is already encoded
+# in the specified charset (default 1)
+# ?-maxlength maxlength The maximum length of each encoded
+# word to return (default 66)
#
# Results:
# Returns a word encoded string.
-proc ::mime::word_encode {charset method string} {
+proc ::mime::word_encode {charset method string {args}} {
variable encodings
@@ -3468,13 +3633,85 @@
error "unknown method '$method', must be base64 or quoted-printable"
}
- set result "=?$encodings($charset)?"
+ # default to encoded and a length that won't make the Subject header to long
+ array set options [list -charset_encoded 1 -maxlength 66]
+ array set options $args
+
+ if { $options(-charset_encoded) } {
+ set unencoded_string [::encoding convertfrom $charset $string]
+ } else {
+ set unencoded_string $string
+ }
+
+ set string_length [string length $unencoded_string]
+ set string_bytelength [string bytelength $unencoded_string]
+
+ # the 7 is for =?, ?Q?, ?= delimiters of the encoded word
+ set maxlength [expr $options(-maxlength) - \
+ [string length $encodings($charset)] - 7]
+
switch -exact -- $method {
base64 {
- append result "B?[string trimright [base64 -mode encode -- $string] \n]?="
+ if { $maxlength < 4 } {
+ error "maxlength $options(-maxlength) too short for chosen\
+ charset and encoding"
+ }
+ set count 0
+ set maxlength [expr ($maxlength / 4) * 3]
+ while { $count < $string_length } {
+ set length 0
+ set enc_string ""
+ while { ($length < $maxlength) && ($count < $string_length) } {
+ set char [string range $unencoded_string $count $count]
+ set enc_char [::encoding convertto $charset $char]
+ if { ($length + [string length $enc_char]) > $maxlength } {
+ set length $maxlength
+ } else {
+ append enc_string $enc_char
+ incr count
+ incr length [string length $enc_char]
+ }
+ }
+ set encoded_word [base64 -mode encode -- $enc_string]
+ append result "=?$encodings($charset)?B?$encoded_word?=\n "
+ }
+ # Trim off last "\n ", since the above code has the side-effect
+ # of adding an extra "\n " to the encoded string.
+
+ set result [string range $result 0 end-2]
}
quoted-printable {
- append result "Q?[qp_encode $string 1]?="
+ if { $maxlength < 1 } {
+ error "maxlength $options(-maxlength) too short for chosen\
+ charset and encoding"
+ }
+ set count 0
+ while { $count < $string_length } {
+ set length 0
+ set encoded_word ""
+ while { ($length < $maxlength) && ($count < $string_length) } {
+ set char [string range $unencoded_string $count $count]
+ set enc_char [::encoding convertto $charset $char]
+ set qp_enc_char [qp_encode $enc_char 1]
+ set qp_enc_char_length [string length $qp_enc_char]
+ if { $qp_enc_char_length > $maxlength } {
+ error "maxlength $options(-maxlength) too short for chosen\
+ charset and encoding"
+ }
+ if { ($length + [string length $qp_enc_char]) > $maxlength } {
+ set length $maxlength
+ } else {
+ append encoded_word $qp_enc_char
+ incr count
+ incr length [string length $qp_enc_char]
+ }
+ }
+ append result "=?$encodings($charset)?Q?$encoded_word?=\n "
+ }
+ # Trim off last "\n ", since the above code has the side-effect
+ # of adding an extra "\n " to the encoded string.
+
+ set result [string range $result 0 end-2]
}
"" {
# Go ahead
@@ -3546,13 +3783,13 @@
# ::mime::field_decode --
#
# Word decodes strings that have been word encoded as per RFC 2047
-# and converts the string from UTF to the original encoding/charset.
+# and converts the string from the original encoding/charset to UTF.
#
# Arguments:
# field The string to decode
#
# Results:
-# Returns the decoded string in its original encoding/charset..
+# Returns the decoded string in UTF.
proc ::mime::field_decode {field} {
# ::mime::field_decode is broken. Here's a new version.
Index: modules/mime//mime.test
===================================================================
RCS file: /cvsroot/tcllib/tcllib/modules/mime/mime.test,v
retrieving revision 1.9
diff -u -r1.9 mime.test
--- modules/mime//mime.test 25 Jun 2003 20:54:59 -0000 1.9
@@ -226,6 +299,126 @@
mime::word_decode "=?ISO-8859-1?b?VGVzdCBsb3dlcmNhc2UgYg==?="
} {iso8859-1 base64 {Test lowercase b}}
+test mime-5.7 {Test word_encode with quoted-printable method across encoded word boundaries} {
+ mime::word_encode iso8859-1 quoted-printable "Test de contrôle effectué" -maxlength 31
+} "=?ISO-8859-1?Q?Test_de_contr?=
+ =?ISO-8859-1?Q?=F4le_effectu?=
+ =?ISO-8859-1?Q?=E9?="
+
+test mime-5.8 {Test word_encode with quoted-printable method across encoded word boundaries} {
+ mime::word_encode iso8859-1 quoted-printable "Test de contrôle effectué" -maxlength 32
+} "=?ISO-8859-1?Q?Test_de_contr?=
+ =?ISO-8859-1?Q?=F4le_effectu?=
+ =?ISO-8859-1?Q?=E9?="
+
+test mime-5.9 {Test word_encode with quoted-printable method and multibyte character} {
+ mime::word_encode euc-jp quoted-printable "Following me is a multibyte character \xA4\xCF"
+} "=?EUC-JP?Q?Following_me_is_a_multibyte_character_=A4=CF?="
+
+set n 10
+while {$n < 14} {
+ test mime-5.$n {Test word_encode with quoted-printable method and multibyte character across encoded word boundary} {
+ mime::word_encode euc-jp quoted-printable "Following me is a multibyte character \xA4\xCF" -maxlength [expr 42 + $n]
+ } "=?EUC-JP?Q?Following_me_is_a_multibyte_character_?=
+ =?EUC-JP?Q?=A4=CF?="
+ incr n
+}
+
+test mime-5.14 {Test word_encode with quoted-printable method and multibyte character (triple)} {
+ mime::word_encode utf-8 quoted-printable "Here is a triple byte encoded character \xE3\x81\xAF"
+} "=?UTF-8?Q?Here_is_a_triple_byte_encoded_character_=E3=81=AF?="
+
+set n 15
+while {$n < 23} {
+ test mime-5.$n {Test word_encode with quoted-printable method and triple byte character across encoded word boundary} {
+ mime::word_encode utf-8 quoted-printable "Here is a triple byte encoded character \xE3\x81\xAF" -maxlength [expr 38 + $n]
+ } "=?UTF-8?Q?Here_is_a_triple_byte_encoded_character_?=
+ =?UTF-8?Q?=E3=81=AF?="
+ incr n
+}
+
+while {$n < 25} {
+ test mime-5.$n {Test word_encode with quoted-printable method and triple byte character across encoded word boundary} {
+ mime::word_encode utf-8 quoted-printable "Here is a triple byte encoded character \xE3\x81\xAF" -maxlength [expr 38 + $n]
+ } "=?UTF-8?Q?Here_is_a_triple_byte_encoded_character_=E3=81=AF?="
+ incr n
+}
+
+while {$n < 29} {
+ test mime-5.$n {Test word_encode with base64 method across encoded word boundaries} {
+ mime::word_encode euc-jp base64 "There is a multibyte character \xA4\xCF" -maxlength [expr 28 + $n]
+ } "=?EUC-JP?B?VGhlcmUgaXMgYSBtdWx0aWJ5dGUgY2hhcmFjdGVy?=
+ =?EUC-JP?B?IKTP?="
+ incr n
+}
+
+while {$n < 33} {
+ test mime-5.$n {Test word_encode with base64 method and triple byte character across encoded word boundary} {
+ mime::word_encode utf-8 base64 "Here is a multibyte character \xE3\x81\xAF" -maxlength [expr 23 + $n]
+ } "=?UTF-8?B?SGVyZSBpcyBhIG11bHRpYnl0ZSBjaGFyYWN0ZXIg?=
+ =?UTF-8?B?44Gv?="
+ incr n
+}
+
+test mime-5.33 {Test word_encode with quoted-printable method and -maxlength set to same length as will the result} {
+ mime::word_encode iso8859-1 quoted-printable "123" -maxlength 20
+} "=?ISO-8859-1?Q?123?="
+
+test mime-5.34 {Test word_encode with base64 method and -maxlength set to same length as will the result} {
+ mime::word_encode iso8859-1 base64 "123" -maxlength 21
+} "=?ISO-8859-1?B?MTIz?="
+
+test mime-5.35 {Test word_encode with quoted-printable method and non charset encoded string} {
+ mime::word_encode utf-8 quoted-printable "\u306F" -charset_encoded 0
+} "=?UTF-8?Q?=E3=81=AF?="
+
+test mime-5.36 {Test word_encode with base64 method and non charset encoded string} {
+ mime::word_encode utf-8 base64 "\u306F" -charset_encoded 0
+} "=?UTF-8?B?44Gv?="
+
+test mime-5.36 {Test word_encode with base64 method and one byte} {
+ mime::word_encode iso8859-1 base64 "a"
+} "=?ISO-8859-1?B?YQ==?="
+
+test mime-5.37 {Test word_encode with base64 method and two bytes} {
+ mime::word_encode euc-jp base64 "\xA4\xCF"
+} "=?EUC-JP?B?pM8=?="
+
+test mime-5.38 {Test word_encode with unknown charset} {
+ catch {mime::word_encode scribble quoted-printable "scribble is an unknown charset"} errmsg
+ set errmsg
+} "unknown charset 'scribble'"
+
+test mime-5.39 {Test word_encode with invalid charset} {
+ catch {mime::word_encode unicode quoted-printable "unicode is not a valid charset"} errmsg
+ set errmsg
+} "invalid charset 'unicode'"
+
+test mime-5.40 {Test word_encode with invalid method} {
+ catch {mime::word_encode iso8859-1 tea-leaf "tea-leaf is not a valid method"} errmsg
+ set errmsg
+} "unknown method 'tea-leaf', must be base64 or quoted-printable"
+
+test mime-5.41 {Test word_encode with maxlength to short for method quoted-printable} {
+ catch {mime::word_encode iso8859-1 quoted-printable "1" -maxlength 17} errmsg
+ set errmsg
+} "maxlength 17 too short for chosen charset and encoding"
+
+test mime-5.42 {Test word_encode with maxlength to short for method quoted_printable and a charaacter to be quoted} {
+ catch {mime::word_encode iso8859-1 quoted-printable "_" -maxlength 18} errmsg
+ set errmsg
+} "maxlength 18 too short for chosen charset and encoding"
+
+test mime-5.43 {Test word_encode with maxlength to short for method quoted-printable and multibyte character} {
+ catch {mime::word_encode euc-jp quoted-printable "\xA4\xCF" -maxlength 17} errmsg
+ set errmsg
+} "maxlength 17 too short for chosen charset and encoding"
+
+test mime-5.44 {Test word_encode with maxlength to short for method base64} {
+ catch {mime::word_encode iso8859-1 base64 "1" -maxlength 20} errmsg
+ set errmsg
+} "maxlength 20 too short for chosen charset and encoding"
+
test mime-6.1 {Test field_decode (from RFC 2047, part 8)} {
mime::field_decode {=?US-ASCII?Q?Keith_Moore?= <[email protected]>}
} {Keith Moore <[email protected]>}