Tk Library Source Code

Artifact [5ec2bbc16c]
Login

Artifact 5ec2bbc16cd9e4c0b0d417c112ce9114f4ec5419:

Attachment "htmlparse.patch" to ticket [3553350fff] added by andreas_kupries 2012-08-03 00:47:54.
Index: htmlparse.tcl
===================================================================
RCS file: /cvsroot/tcllib/tcllib/modules/htmlparse/htmlparse.tcl,v
retrieving revision 1.28
diff -w -u -r1.28 htmlparse.tcl
--- htmlparse.tcl	10 Feb 2009 20:37:22 -0000	1.28
+++ htmlparse.tcl	2 Aug 2012 17:46:24 -0000
@@ -329,14 +329,23 @@
 	set incomplete ""
     }
 
-    # Convert the HTML string into a script.
+    # Convert the HTML string into a script. First look for tag
+    # patterns and convert them into command invokations. The command
+    # is actually a placeholder ((LF) NUL SOH @ NUL). See step 2 for
+    # the explanation.
 
-    set sub "\}\n$cmd {\\1} {} {\\2} \{\}\n$cmd {\\1} {/} {} \{"
+    set sub "\}\n\0\1@\0 {\\1} {} {\\2} \{\}\n\0\1@\0 {\\1} {/} {} \{"
     regsub -all -- {<([^\s>]+)\s*([^>]*)/>} $html $sub html
 
-    set sub "\}\n$cmd {\\2} {\\1} {\\3} \{"
+    set sub "\}\n\0\1@\0 {\\2} {\\1} {\\3} \{"
     regsub -all -- {<(/?)([^\s>]+)\s*([^>]*)>} $html $sub html
 
+    # Step 2, replace the command placeholder with the command
+    # itself. This way any characters in the command prefix which are
+    # special to regsub are kept from the regsub.
+
+    set html [string map [list \n\0\1@\0 \n$cmd] $html]
+
     # The value of queue now determines wether we process the HTML by
     # ourselves (queue is empty) or if we generate a list of  scripts
     # each of which processes n tags, n the argument to -split.
@@ -896,4 +905,4 @@
 
 # ### ######### ###########################
 
-package provide htmlparse 1.2
+package provide htmlparse 1.2.1