Tk Library Source Code

Artifact [4178346130]
Login

Artifact 4178346130222d08ffcdbe6826306c326b6a458d:

Attachment "mime-1961881-a.patch" to ticket [1961881fff] added by andreas_kupries 2008-05-15 03:53:25.
Index: modules/mime/ChangeLog
===================================================================
RCS file: /cvsroot/tcllib/tcllib/modules/mime/ChangeLog,v
retrieving revision 1.96
diff -w -u -r1.96 ChangeLog
--- modules/mime/ChangeLog	5 Nov 2007 23:16:36 -0000	1.96
+++ modules/mime/ChangeLog	14 May 2008 20:51:16 -0000
@@ -1,3 +1,14 @@
+2008-05-13  Andreas Kupries  <[email protected]>
+
+	* mime.tcl (::mime::parsepart): [SF Tcllib Bug 1961881]. Accepted
+	  patch, and extended. Now handling malformed input without having
+	  to throw an eror, and without going into an infinite loop. See
+	  also [Bug 631314], and Changelog entries 2003-06-06,
+	  2003-06-25. The testcases mime-3.{7,8} are not redundant, but
+	  significantly different. 3.7 actually has a terminating
+	  boundary, but misses the starting one, causing non-recognition
+	  of any terminating one.
+
 2007-11-05  Andreas Kupries  <[email protected]>
 
 	* mime.tcl (::mime::parsepart): Fixed [SF Tcllib Bug 1825092],
Index: modules/mime/mime.tcl
===================================================================
RCS file: /cvsroot/tcllib/tcllib/modules/mime/mime.tcl,v
retrieving revision 1.60
diff -w -u -r1.60 mime.tcl
--- modules/mime/mime.tcl	5 Nov 2007 23:11:50 -0000	1.60
+++ modules/mime/mime.tcl	14 May 2008 20:51:16 -0000
@@ -825,15 +825,37 @@
 
     if {$fileP} {
         set pos [tell $state(fd)]
+	# This variable is like 'start', for the reasons laid out
+	# below, in the other branch of this conditional.
+	set initialpos $pos
+    } else {
+	# This variable is like 'start', a list of lines in the
+	# part. This record is made even before we find a starting
+	# boundary and used if we run into the terminating boundary
+	# before a starting boundary was found. In that case the lines
+	# before the terminator as recorded by tracelines are seen as
+	# the part, or at least we attempt to parse them as a
+	# part. See the forceoctet and nochild flags later. We cannot
+	# use 'start' as that records lines only after the starting
+	# boundary was found.
+	set tracelines [list]
     }
 
     set inP 0
     set moreP 1
+    set forceoctet 0
     while {$moreP} {
         if {$fileP} {
             if {$pos > $last} {
-                 error "termination string missing in $state(content)"
+		# We have run over the end of the part per the outer
+		# information without finding a terminating boundary.
+		# We now fake the boundary and force the parser to
+		# give any new part coming of this a mime-type of
+		# application/octet-stream regardless of header
+		# information.
                  set line "--$boundary--"
+		set x [string length $line]
+		set forceoctet 1
             } else {
               if {[set x [gets $state(fd) line]] < 0} {
                   error "end-of-file encountered while parsing $state(content)"
@@ -865,47 +887,116 @@
              }
 
              continue
+        } else {
+	    lappend tracelines $line
         }
 
         if {!$inP} {
-            if {![string compare $line "--$boundary"]} {
+	    # Haven't seen the starting boundary yet. Check if the
+	    # current line contains this starting boundary.
+
+            if {[string equal $line "--$boundary"]} {
+		# Yes. Switch parser state to now search for the
+		# terminating boundary of the part and record where
+		# the part begins (or initialize the recorder for the
+		# lines in the part).
                 set inP 1
                 if {$fileP} {
                     set start $pos
                 } else {
 		    set start [list]
                 }
-            }
+		continue
+            } elseif {[string equal $line "--$boundary--"]} {
+		# We just saw a terminating boundary before we ever
+		# saw the starting boundary of a part. This forces us
+		# to stop parsing, we do this by forcing the parser
+		# into an accepting state. We will try to create a
+		# child part based on faked start position or recorded
+		# lines, or, if that fails, let the current part have
+		# no children.
+
+		# As an example note the test case mime-3.7 and the
+		# referenced file "badmail1.txt".
 
+                set inP 1
+                if {$fileP} {
+                    set start $initialpos
+                } else {
+		    set start $tracelines
+                }
+		set forceoctet 1
+		# Fall through. This brings to the creation of the new
+		# part instead of searching further and possible
+		# running over the end.
+	    } else {
             continue
         }
+	}
+
+	# Looking for the end of the current part. We accept both a
+	# terminating boundary and the starting boundary of the next
+	# part as the end of the current part.
 
         if {([set moreP [string compare $line "--$boundary--"]]) \
                 && ([string compare $line "--$boundary"])} {
+	    # The current part has not ended, so we record the line
+	    # if we are inside a part and doing string parsing.
             if {$inP && !$fileP} {
 		lappend start $line
             }
             continue
         }
+
+	# The current part has ended. We now determine the exact
+	# boundaries, create a mime part object for it and recursively
+	# parse it deeper as part of that action.
+
 	# FRINK: nocheck
         variable [set child $token-[incr state(cid)]]
 
         lappend state(parts) $child
 
+	set nochild 0
         if {$fileP} {
             if {[set count [expr {$pos-($start+$x+$crlf+1)}]] < 0} {
                 set count 0
             }
-
+	    if {$forceoctet} {
+		set ::errorInfo {}
+		if {[catch {
             mime::initializeaux $child \
                 -file $state(file) -root $state(root) \
                 -offset $start -count $count
-
+		}]} {
+		    set nochild 1
+		    set state(parts) [lrange $state(parts) 0 end-1]
+		}
+	    } else {
+		mime::initializeaux $child \
+		    -file $state(file) -root $state(root) \
+		    -offset $start -count $count
+	    }
             seek $state(fd) [set start $pos] start
         } else {
+	    if {$forceoctet} {
+		if {[catch {
 	    mime::initializeaux $child -lineslist $start
+		}]} {
+		    set nochild 1
+		    set state(parts) [lrange $state(parts) 0 end-1]
+		}
+	    } else {
+		mime::initializeaux $child -lineslist $start
+	    }
             set start ""
         }
+	if {$forceoctet && !$nochild} {
+	    variable $child
+	    upvar 0  $child childstate
+	    set childstate(content) application/octet-stream
+	}
+	set forceoctet 0
     }
 }
 
Index: modules/mime/mime.test
===================================================================
RCS file: /cvsroot/tcllib/tcllib/modules/mime/mime.test,v
retrieving revision 1.29
diff -w -u -r1.29 mime.test
--- modules/mime/mime.test	5 Nov 2007 23:11:50 -0000	1.29
+++ modules/mime/mime.test	14 May 2008 20:51:16 -0000
@@ -169,20 +169,27 @@
 foo}"
 
 test mime-3.7 {Test mime with a bad email [SF Bug 631314 ]} {
-    catch {
 	set tok [mime::initialize -file \
 		[file join $tcltest::testsDirectory badmail1.txt]]
-    } msg ; #{}
-    set msg
-} {termination string missing in multipart/mixed}
+
+    set res {}
+    set ctok [lindex [mime::getproperty $tok parts] 0]
+    lappend res [dictsort [mime::getproperty $tok]]
+    lappend res [dictsort [mime::getproperty $ctok]]
+    mime::finalize $tok
+    string map [list $ctok CHILD] $res
+} {{content multipart/mixed encoding {} params {boundary ----------CSFNU9QKPGZL79} parts CHILD size 0} {content application/octet-stream encoding {} params {charset us-ascii} size 0}}
 
 test mime-3.8 {Test mime with another bad email [SF Bug 631314 ]} {
-    catch {
 	set tok [mime::initialize -file \
 		[file join $tcltest::testsDirectory badmail2.txt]]
-    } msg ; #{}
-    set msg
-} {termination string missing in multipart/related}
+    set res {}
+    set ctok [lindex [mime::getproperty $tok parts] 0]
+    lappend res [dictsort [mime::getproperty $tok]]
+    lappend res [dictsort [mime::getproperty $ctok]]
+    mime::finalize $tok
+    string map [list $ctok CHILD] $res
+} {{content multipart/related encoding {} params {boundary ----=_NextPart_000_0000_2CBA2CBA.150C56D2} parts CHILD size 659} {content application/octet-stream encoding base64 params {} size 659}}
 
 test mime-3.9 {Parse a MIME message with a charset encoded body and use getbody -decode to get it back} {
     set msg {MIME-Version: 1.0