Attachment "htmlparse.patch" to
ticket [3553350fff]
added by
andreas_kupries
2012-08-03 00:47:54.
Index: htmlparse.tcl
===================================================================
RCS file: /cvsroot/tcllib/tcllib/modules/htmlparse/htmlparse.tcl,v
retrieving revision 1.28
diff -w -u -r1.28 htmlparse.tcl
--- htmlparse.tcl 10 Feb 2009 20:37:22 -0000 1.28
+++ htmlparse.tcl 2 Aug 2012 17:46:24 -0000
@@ -329,14 +329,23 @@
set incomplete ""
}
- # Convert the HTML string into a script.
+ # Convert the HTML string into a script. First look for tag
+ # patterns and convert them into command invokations. The command
+ # is actually a placeholder ((LF) NUL SOH @ NUL). See step 2 for
+ # the explanation.
- set sub "\}\n$cmd {\\1} {} {\\2} \{\}\n$cmd {\\1} {/} {} \{"
+ set sub "\}\n\0\1@\0 {\\1} {} {\\2} \{\}\n\0\1@\0 {\\1} {/} {} \{"
regsub -all -- {<([^\s>]+)\s*([^>]*)/>} $html $sub html
- set sub "\}\n$cmd {\\2} {\\1} {\\3} \{"
+ set sub "\}\n\0\1@\0 {\\2} {\\1} {\\3} \{"
regsub -all -- {<(/?)([^\s>]+)\s*([^>]*)>} $html $sub html
+ # Step 2, replace the command placeholder with the command
+ # itself. This way any characters in the command prefix which are
+ # special to regsub are kept from the regsub.
+
+ set html [string map [list \n\0\1@\0 \n$cmd] $html]
+
# The value of queue now determines wether we process the HTML by
# ourselves (queue is empty) or if we generate a list of scripts
# each of which processes n tags, n the argument to -split.
@@ -896,4 +905,4 @@
# ### ######### ###########################
-package provide htmlparse 1.2
+package provide htmlparse 1.2.1