Tcl Library Source Code

Check-in [9c454867a1]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Tkt [daa83d2edf]: uri::urn - Fix the handling of characters represented by a true multi-byte utf-8 sequence, in both encoding and decoding, i.e. quote and unquote. Fixed original test cases as well, they were broken. Bumped version to 1.0.3
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 9c454867a16559a3a282a4f2f9928788c46f6f0e
User & Date: andreask 2014-09-02 20:37:06.020
Context
2014-10-08
03:50
Integration branch for new oauth module. Missing docs and tests. check-in: 7b3a12af1f user: aku tags: oauth
2014-09-21
12:40
Fix problem with detecting exceptions in solving linear programs check-in: 82424135be user: markus tags: trunk
2014-09-02
20:37
Tkt [daa83d2edf]: uri::urn - Fix the handling of characters represented by a true multi-byte utf-8 sequence, in both encoding and decoding, i.e. quote and unquote. Fixed original test cases as well, they were broken. Bumped version to 1.0.3 check-in: 9c454867a1 user: andreask tags: trunk
19:23
Tkt [7a623c098d]: valtype::iban - Updated IBAN to v50, version 1.5, courtesy of Max Jarek. check-in: 77f4958809 user: andreask tags: trunk
Changes
Unified Diff Ignore Whitespace Patch
Changes to modules/uri/pkgIndex.tcl.
1
2
3
4
5
6
if {![package vsatisfies [package provide Tcl] 8.2]} {
    # FRINK: nocheck
    return
}
package ifneeded uri      1.2.4 [list source [file join $dir uri.tcl]]
package ifneeded uri::urn 1.0.2 [list source [file join $dir urn-scheme.tcl]]





|
1
2
3
4
5
6
if {![package vsatisfies [package provide Tcl] 8.2]} {
    # FRINK: nocheck
    return
}
package ifneeded uri      1.2.4 [list source [file join $dir uri.tcl]]
package ifneeded uri::urn 1.0.3 [list source [file join $dir urn-scheme.tcl]]
Changes to modules/uri/urn-scheme.man.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17

[manpage_begin uri_urn n 1.1.2]
[keywords {rfc 2141}]
[keywords uri]
[keywords url]
[keywords urn]
[moddesc   {Tcl Uniform Resource Identifier Management}]
[titledesc {URI utilities, URN scheme}]
[category  Networking]
[require Tcl 8.2]
[require uri::urn [opt 1.1.2]]
[description]

This package provides two commands to quote and unquote the disallowed
characters for url using the [term urn] scheme, registers the scheme
with the package [package uri], and provides internal helpers which
will be automatically used by the commands [cmd uri::split] and
[cmd uri::join] of package [package uri] to handle urls using the
>
|








|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
[vset VERSION 1.0.3]
[manpage_begin uri_urn n [vset VERSION]]
[keywords {rfc 2141}]
[keywords uri]
[keywords url]
[keywords urn]
[moddesc   {Tcl Uniform Resource Identifier Management}]
[titledesc {URI utilities, URN scheme}]
[category  Networking]
[require Tcl 8.2]
[require uri::urn [opt [vset VERSION]]]
[description]

This package provides two commands to quote and unquote the disallowed
characters for url using the [term urn] scheme, registers the scheme
with the package [package uri], and provides internal helpers which
will be automatically used by the commands [cmd uri::split] and
[cmd uri::join] of package [package uri] to handle urls using the
Changes to modules/uri/urn-scheme.tcl.
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# $Id: urn-scheme.tcl,v 1.11 2005/09/28 04:51:24 andreas_kupries Exp $
# -------------------------------------------------------------------------

package require uri      1.1.2

namespace eval ::uri {}
namespace eval ::uri::urn {
    variable version 1.0.2
}

# -------------------------------------------------------------------------

# Description:
#   Called by uri::split with a url to split into its parts.
#







|







8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# $Id: urn-scheme.tcl,v 1.11 2005/09/28 04:51:24 andreas_kupries Exp $
# -------------------------------------------------------------------------

package require uri      1.1.2

namespace eval ::uri {}
namespace eval ::uri::urn {
    variable version 1.0.3
}

# -------------------------------------------------------------------------

# Description:
#   Called by uri::split with a url to split into its parts.
#
59
60
61
62
63
64
65

66
67
68
69
70







71
72
73
74
75
76
77
proc ::uri::urn::quote {url} {
    variable trans
    
    set ndx 0
    set result ""
    while {[regexp -indices -- "\[^$trans\]" $url r]} {
        set ndx [lindex $r 0]

        scan [string index $url $ndx] %c chr
        set rep %[format %.2X $chr]
        if {[string match $rep %00]} {
            error "invalid character: character $chr is not allowed"
        }







        
        incr ndx -1
        append result [string range $url 0 $ndx] $rep
        incr ndx 2
        set url [string range $url $ndx end]
    }
    append result $url







>
|
<
|


>
>
>
>
>
>
>







59
60
61
62
63
64
65
66
67

68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
proc ::uri::urn::quote {url} {
    variable trans
    
    set ndx 0
    set result ""
    while {[regexp -indices -- "\[^$trans\]" $url r]} {
        set ndx [lindex $r 0]

        set ch [string index $url $ndx]

        if {$ch eq "\0"} {
            error "invalid character: character $chr is not allowed"
        }

        # Decode into UTF-8 bytes.
        set rep {}
        foreach ch [split [encoding convertto utf-8 $ch] {}] {
            scan $ch %c chr
            append rep %[format %.2X $chr]
        }
        
        incr ndx -1
        append result [string range $url 0 $ndx] $rep
        incr ndx 2
        set url [string range $url $ndx end]
    }
    append result $url
93
94
95
96
97
98
99

100
101
102
103
104
105
106
107
108
109
110
111
112

113
114
115
116
117
118
119
120
            incr first $start ; # Make the indices relative to the true string.
            incr last  $start ; # I.e. undo the effect of the 'string range' on match results.
            append result [string range $url $start [expr {$first - 1}]]
            append result [format %c 0x[string range $url [incr first] $last]]
            set start [incr last]
        }
        append result [string range $url $start end]

        return $result
    }
} else {
    proc ::uri::urn::unquote {url} {
        set result ""
        set start 0
        while {[regexp -start $start -indices {%[0-9a-fA-F]{2}} $url match]} {
            foreach {first last} $match break
            append result [string range $url $start [expr {$first - 1}]]
            append result [format %c 0x[string range $url [incr first] $last]]
            set start [incr last]
        }
        append result [string range $url $start end]

        return $result
    }
}

# -------------------------------------------------------------------------

::uri::register {urn URN} {
	variable NIDpart {[a-zA-Z0-9][a-zA-Z0-9-]{0,31}}







>
|












>
|







100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
            incr first $start ; # Make the indices relative to the true string.
            incr last  $start ; # I.e. undo the effect of the 'string range' on match results.
            append result [string range $url $start [expr {$first - 1}]]
            append result [format %c 0x[string range $url [incr first] $last]]
            set start [incr last]
        }
        append result [string range $url $start end]
        # Recode the array of utf-8 bytes to the proper internal rep.
        return [encoding convertfrom utf-8 $result]
    }
} else {
    proc ::uri::urn::unquote {url} {
        set result ""
        set start 0
        while {[regexp -start $start -indices {%[0-9a-fA-F]{2}} $url match]} {
            foreach {first last} $match break
            append result [string range $url $start [expr {$first - 1}]]
            append result [format %c 0x[string range $url [incr first] $last]]
            set start [incr last]
        }
        append result [string range $url $start end]
        # Recode the array of utf-8 bytes to the proper internal rep.
        return [encoding convertfrom utf-8 $result]
    }
}

# -------------------------------------------------------------------------

::uri::register {urn URN} {
	variable NIDpart {[a-zA-Z0-9][a-zA-Z0-9-]{0,31}}
Changes to modules/uri/urn.test.
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149


150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167

168
169
170
171
172
173
174
    if {[catch {uri::join scheme urn nid $nid nss $nss} result]} {
        set result ok
    }
    set result
} {ok}

# -------------------------------------------------------------------------
# Quoting checks - various UTF-8 representations for 'coffee' (RFC 2324)
#
set data \
    [list \
         "coffee"                           "coffee" \
         "\x4B\x61\x66\x66\x65\x65"         "Kaffee" \
         "\x71\xC3\xA6\x68\x76\xC3\xA6"     "q%C3%A6hv%C3%A6" \
         "\xD9\x82\xD9\x87\xD9\x88\xD8\xA9" "%D9%82%D9%87%D9%88%D8%A9" \
         "\xCE\xBA\xCE\xB1\xCF\x86\xCE\xAD" "%CE%BA%CE%B1%CF%86%CE%AD" \
         "\xE0\xA4\x95\xE0\xA5\x8C\xE0\xA4\xAB\xE0\xA5\x80" \
                                "%E0%A4%95%E0%A5%8C%E0%A4%AB%E0%A5%80" \
        ]



set n 0
foreach {utf8 quoted} $data {
    test urn-4.[incr n] [list quote utf8 string] {
        list [catch {uri::urn::quote $utf8} msg] $msg
    } [list 0 $quoted]
}

set n 0
foreach {utf8 quoted} $data {
    test urn-5.[incr n] [list unquote utf8 string] {
        list [catch {uri::urn::unquote $quoted} msg] $msg
    } [list 0 $utf8]
}

# -------------------------------------------------------------------------
# Clean up the tests


testsuiteCleanup
return

# Local variables:
#    mode: tcl
#    indent-tabs-mode: nil
# End:







|
|
|
<
|
|
|
|
|
<
|
|
>
>


















>







131
132
133
134
135
136
137
138
139
140

141
142
143
144
145

146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
    if {[catch {uri::join scheme urn nid $nid nss $nss} result]} {
        set result ok
    }
    set result
} {ok}

# -------------------------------------------------------------------------
# Quoting checks - various UTF-8 representations for 'coffee' (RFC 2324, section 3)

catch { unset data }

lappend data  "coffee"                   "coffee"
lappend data  "\x4B\x61\x66\x66\x65\x65" "Kaffee"
lappend data  "q\u00e6hv\u00e6"          "q%C3%A6hv%C3%A6"                      ;# aserbaidjani
lappend data  "\u0642\u0647\u0648\u0629" "%D9%82%D9%87%D9%88%D8%A9"             ;# arabic
lappend data  "\u03ba\u03b1\u03c6\u03ad" "%CE%BA%CE%B1%CF%86%CE%AD"             ;# greek

lappend data  "\u0915\u094c\u092b\u0940" "%E0%A4%95%E0%A5%8C%E0%A4%AB%E0%A5%80" ;# hindi

# Ticket [daa83d2edf].
lappend data  "\u4f60\u597d"             "%E4%BD%A0%E5%A5%BD" ;# chinese 'How are you?'

set n 0
foreach {utf8 quoted} $data {
    test urn-4.[incr n] [list quote utf8 string] {
        list [catch {uri::urn::quote $utf8} msg] $msg
    } [list 0 $quoted]
}

set n 0
foreach {utf8 quoted} $data {
    test urn-5.[incr n] [list unquote utf8 string] {
        list [catch {uri::urn::unquote $quoted} msg] $msg
    } [list 0 $utf8]
}

# -------------------------------------------------------------------------
# Clean up the tests

unset data
testsuiteCleanup
return

# Local variables:
#    mode: tcl
#    indent-tabs-mode: nil
# End: