# -*- tcl -*-
# ### ### ### ######### ######### #########
## Copyright (c) 2008-2012 ActiveState Software Inc., Andreas Kupries
## 2016-2022 Andreas Kupries
## BSD License
##
# Package providing commands for the decoding of basic zip-file
# structures.
package require Tcl 8.5 9
package require fileutil::decode 0.2.1 ; # Framework for easy decoding of files.
namespace eval ::zipfile::decode {}
if {[package vcompare $tcl_patchLevel "8.6"] < 0} {
# Only needed pre-8.6
package require Trf ; # Wrapper to zlib
package require zlibtcl ; # Zlib usage. No commands, access through Trf
set ::zipfile::decode::native_zip_functs 0
} else {
set ::zipfile::decode::native_zip_functs 1
}
namespace eval ::zipfile::decode {
namespace import ::fileutil::decode::*
}
# ### ### ### ######### ######### #########
## Convenience command, decode and copy to dir
proc ::zipfile::decode::unzipfile {in out} {
zipfile::decode::open $in
set zd [zipfile::decode::archive]
zipfile::decode::unzip $zd $out
zipfile::decode::close
return
}
## Convenience command, decode and return list of contained paths.
proc ::zipfile::decode::content {in} {
zipfile::decode::open $in
set zd [zipfile::decode::archive]
set f [files $zd]
zipfile::decode::close
return $f
}
# ### ### ### ######### ######### #########
##
proc ::zipfile::decode::iszip {fname} {
if {[catch {
LocateEnd $fname
} msg]} {
return 0
}
return 1
}
proc ::zipfile::decode::open {fname} {
variable eoa
if {[catch {
set eoa [LocateEnd $fname]
} msg]} {
Error "\"$fname\" is not a zip file" BAD ARCHIVE
}
fileutil::decode::open $fname
return
}
proc ::zipfile::decode::close {} {
variable eoa
unset eoa
fileutil::decode::close
return
}
# ### ### ### ######### ######### #########
##
proc ::zipfile::decode::comment {zdict} {
array set _ $zdict
return $_(comment)
}
proc ::zipfile::decode::files {zdict} {
array set _ $zdict
array set f $_(files)
return [array names f]
}
proc ::zipfile::decode::filelocations zdict {
set res {}
foreach {fname finfo} [dict get $zdict files] {
set start [dict get $finfo fileloc]
set size [dict get $finfo csize]
lappend res $start $size $fname
}
set res [lsort -stride 3 -index 0 -integer $res[set res {}]]
return $res
}
proc ::zipfile::decode::hasfile {zdict fname} {
array set _ $zdict
array set f $_(files)
return [info exists f($fname)]
}
proc ::zipfile::decode::copyfile {zdict src dst} {
array set _ $zdict
array set f $_(files)
if {![info exists f($src)]} {
Error "File \"$src\" not known" BAD PATH
}
array set fd $f($src)
CopyFile $src fd $dst
return
}
proc ::zipfile::decode::getfile {zdict src} {
array set _ $zdict
array set f $_(files)
if {![info exists f($src)]} {
Error "File \"$src\" not known" BAD PATH
}
array set fd $f($src)
return [GetFile $src fd]
}
proc ::zipfile::decode::filesize {zdict src} {
array set _ $zdict
array set f $_(files)
if {![info exists f($src)]} {
Error "File \"$src\" not known" BAD PATH
}
array set fd $f($src)
return $fd(ucsize)
}
proc ::zipfile::decode::filecomment {zdict src} {
array set _ $zdict
array set f $_(files)
if {![info exists f($src)]} {
Error "File \"$src\" not known" BAD PATH
}
array set fd $f($src)
return $fd(comment)
}
proc ::zipfile::decode::unzip {zdict dst} {
array set _ $zdict
array set f $_(files)
foreach src [array names f] {
array set fd $f($src)
CopyFile $src fd [file join $dst $src]
unset fd
}
return
}
proc ::zipfile::decode::CopyFile {src fdv dst} {
upvar 1 $fdv fd
file mkdir [file dirname $dst]
if {[string match */ $src]} {
# Entry is a directory. Just create.
file mkdir $dst
return
}
# Create files. Empty files are a special case, we have
# nothing to decompress.
if {$fd(ucsize) == 0} {
::close [::open $dst w] ; # touch
return
}
# non-empty files, work depends on type of compression.
switch -exact -- $fd(cm) {
uncompressed {
go $fd(fileloc)
nbytes $fd(csize)
set out [::open $dst wb]
puts -nonewline $out [getval]
::close $out
}
deflate {
go $fd(fileloc)
nbytes $fd(csize)
set out [::open $dst wb]
if {$::zipfile::decode::native_zip_functs} {
puts -nonewline $out \
[zlib inflate [getval]]
} else {
puts -nonewline $out \
[zip -mode decompress -nowrap 1 -- \
[getval]]
}
::close $out
}
default {
Error "Unable to handle file \"$src\" compressed with method \"$fd(cm)\"" \
BAD COMPRESSION
}
}
if {
($::tcl_platform(platform) ne "windows") &&
($fd(efattr) != 0)
} {
# On unix take the permissions encoded in the external
# attributes and apply them to the new file. If there are
# permission. A value of 0 indicates an older teabag where
# the encoder did not yet support permissions. These we do not
# change from the sustem defaults. Permissions are in the
# lower 9 bits of the MSW.
file attributes $dst -permissions \
[string map {0 --- 1 --x 2 -w- 3 -wx 4 r-- 5 r-x 6 rw- 7 rwx} \
[format %03o [expr {($fd(efattr) >> 16) & 0x1ff}]]]
}
# FUTURE: Run crc checksum on created file and compare to the
# ......: stored information.
return
}
proc ::zipfile::decode::GetFile {src fdv} {
# See also CopyFile for similar code.
# TODO: Check with CopyFile for refactoring opportunity
upvar 1 $fdv fd
# Entry is a directory.
if {[string match */ $src]} {return {}}
# Empty files are a special case, we have
# nothing to decompress.
if {$fd(ucsize) == 0} {return {}}
# non-empty files, work depends on type of compression.
switch -exact -- $fd(cm) {
uncompressed {
go $fd(fileloc)
nbytes $fd(csize)
return [getval]
}
deflate {
go $fd(fileloc)
nbytes $fd(csize)
if {$::zipfile::decode::native_zip_functs} {
return [zlib inflate [getval]]
} else {
return [zip -mode decompress -nowrap 1 -- [getval]]
}
}
default {
Error "Unable to handle file \"$src\" compressed with method \"$fd(cm)\"" \
BAD COMPRESSION
}
}
# FUTURE: Run crc checksum on created file and compare to the
# ......: stored information.
return {}
}
# ### ### ### ######### ######### #########
##
proc ::zipfile::decode::tag {etag} {
mark
long-le
return [match 0x${etag}4b50] ; # 'PK x y', little-endian integer.
}
proc ::zipfile::decode::localfileheader {} {
clear
putloc @
if {![tag 0403]} {clear ; return 0}
short-le ; unsigned ; recode VER ; put vnte ; # version needed to extract
short-le ; unsigned ; put gpbf ; # general purpose bitflag
short-le ; unsigned ; recode CM ; put cm ; # compression method
short-le ; unsigned ; put lmft ; # last mod file time
short-le ; unsigned ; put lmfd ; # last mod file date
long-le ; unsigned ; put crc ; # crc32 | zero's here imply non-seekable,
long-le ; unsigned ; put csize ; # compressed file size | data is in a DDS behind the stored
long-le ; unsigned ; put ucsize ; # uncompressed file size | file.
short-le ; unsigned ; put fnamelen ; # file name length
short-le ; unsigned ; put efieldlen ; # extra field length
array set hdr [get]
clear
nbytes $hdr(fnamelen) ; put fname
putloc efieldloc
skip $hdr(efieldlen)
putloc fileloc
array set hdr [get]
clear
set hdr(gpbf) [GPBF $hdr(gpbf) $hdr(cm)]
setbuf [array get hdr]
return 1
}
proc ::zipfile::decode::centralfileheader {} {
clear
putloc @
if {![tag 0201]} {clear ; return 0}
# The items marked with ++ do not exist in the local file
# header. Everything else exists in the local file header as well,
# and has to match that information.
clear
short-le ; unsigned ; recode VER ; put vmb ; # ++ version made by
short-le ; unsigned ; recode VER ; put vnte ; # version needed to extract
short-le ; unsigned ; put gpbf ; # general purpose bitflag
short-le ; unsigned ; recode CM ; put cm ; # compression method
short-le ; unsigned ; put lmft ; # last mod file time
short-le ; unsigned ; put lmfd ; # last mod file date
long-le ; unsigned ; put crc ; # crc32 | zero's here imply non-seekable,
long-le ; unsigned ; put csize ; # compressed file size | data is in a DDS behind the stored
long-le ; unsigned ; put ucsize ; # uncompressed file size | file.
short-le ; unsigned ; put fnamelen ; # file name length
short-le ; unsigned ; put efieldlen2 ; # extra field length
short-le ; unsigned ; put fcommentlen ; # ++ file comment length
short-le ; unsigned ; put dns ; # ++ disk number start
short-le ; unsigned ; recode IFA ; put ifattr ; # ++ internal file attributes
long-le ; unsigned ; put efattr ; # ++ external file attributes
long-le ; unsigned ; put localloc ; # ++ relative offset of local file header
array set hdr [get]
clear
nbytes $hdr(fnamelen) ; put fname
putloc efieldloc2
skip $hdr(efieldlen2)
nbytes $hdr(fcommentlen) ; put comment
array set hdr [get]
clear
set hdr(gpbf) [GPBF $hdr(gpbf) $hdr(cm)]
setbuf [array get hdr]
return 1
}
## NOT USED
proc ::zipfile::decode::datadescriptor {} {
if {![tag 0807]} {return 0}
clear
long-le ; unsigned ; put crc ; # crc32
long-le ; unsigned ; put csize ; # compressed file size
long-le ; unsigned ; put ucsize ; # uncompressed file size
return 1
}
proc ::zipfile::decode::endcentralfiledir {} {
clear
putloc ecdloc
if {![tag 0605]} {clear ; return 0}
short-le ; unsigned ; put nd ; #
short-le ; unsigned ; put ndscd ; #
short-le ; unsigned ; put tnecdd ; #
short-le ; unsigned ; put tnecd ; #
long-le ; unsigned ; put sizecd ; #
long-le ; unsigned ; put ocd ; #
short-le ; unsigned ; put commentlen ; # archive comment length
array set hdr [get] ; clear
nbytes $hdr(commentlen) ; put comment
array set hdr [get] ; clear
setbuf [array get hdr]
return 1
}
## NOT USED
proc ::zipfile::decode::afile {} {
if {![localfileheader]} {return 0}
array set hdr [get]
if {($hdr(ucsize) == 0) || ($hdr(csize) > 0)} {
# The header entry specifies either
# 1. A zero-length file (possibly a directory entry), or
# 2. a non-empty file (compressed size > 0).
# In both cases we can skip the file contents directly.
# In both cases there should be no data descriptor behind
# we contents, but we check nevertheless. If there is its
# data overrides the current size and crc info.
skip $hdr(csize)
if {[datadescriptor]} {
array set hdr [get]
set hdr(ddpresent) 1
setbuf [array get hdr]
}
} else {
Error "Search data descriptor. Not Yet Implemented" INCOMPLETE
}
return 1
}
proc ::zipfile::decode::archive {} {
variable eoa
array set cb $eoa
# Position us at the beginning of CFH, using the data provided to
# us by 'LocateEnd', called during 'open'.
go [expr {$cb(base) + $cb(coff)}]
array set fn {}
set nentries 0
while {[centralfileheader]} {
array set _ [set data [get]] ; clear
#parray _
# Use the information found in the CFH entry to locate and
# read the associated LFH. We explicitly remember where we are
# in the file because mark/rewind is only one level and the
# LFH command already used that up.
set here [at]
go [expr {$cb(base) + $_(localloc)}]
if {![localfileheader]} {
ArchiveError "Directory entry without file." DIR WITHOUT FILE
}
array set lh [get] ; clear
go $here
# Compare the information in the CFH entry and associated
# LFH. Should match.
if {![hdrmatch lh _]} {
ArchiveError "File/Dir Header mismatch." HEADER MISMATCH FILE/DIR
}
# Merge local and central data.
array set lh $data
set fn($_(fname)) [array get lh]
unset lh _
incr nentries
}
if {![endcentralfiledir]} {
ArchiveError "Bad closure." BAD CLOSURE
}
array set _ [get] ; clear
#parray _
#puts \#$nentries
if {$nentries != $_(tnecd)} {
ArchiveError "\#Files ($_(tnecd)) does not match \#Actual files ($nentries)" \
MISMATCH COUNTS
}
set _(files) [array get fn]
return [array get _]
}
proc ::zipfile::decode::hdrmatch {lhv chv} {
upvar 1 $lhv lh $chv ch
#puts ______________________________________________
#parray lh
#parray ch
foreach key {
vnte gpbf cm lmft lmfd fnamelen fname
} {
if {$lh($key) != $ch($key)} {return 0}
}
if {[lsearch -exact $lh(gpbf) dd] < 0} {
# Compare the central and local size information only if the
# latter is not provided by a DDS. Which we haven't read.
# Because in that case the LFH information is uniformly 0, not
# known at the time of writing.
foreach key {
crc csize ucsize
} {
if {$lh($key) != $ch($key)} {return 0}
}
}
return 1
}
# ### ### ### ######### ######### #########
##
proc ::zipfile::decode::IFA {v} {
if {$v & 0x1} {
return text
} else {
return binary
}
}
# ### ### ### ######### ######### #########
##
namespace eval ::zipfile::decode {
variable vhost
array set vhost {
0 FAT 1 Amiga
2 VMS 3 Unix
4 VM/CMS 5 Atari
6 HPFS 7 Macintosh
8 Z-System 9 CP/M
10 TOPS-20 11 NTFS
12 SMS/QDOS 13 {Acorn RISC OS}
14 VFAT 15 MVS
16 BeOS 17 Tandem
}
}
proc ::zipfile::decode::VER {v} {
variable vhost
set u [expr {($v & 0xff00) >> 16}]
set l [expr {($v & 0x00ff)}]
set major [expr {$l / 10}]
set minor [expr {$l % 10}]
return [list $vhost($u) ${major}.$minor]
}
# ### ### ### ######### ######### #########
##
namespace eval ::zipfile::decode {
variable cm
array set cm {
0 uncompressed 1 shrink
2 {reduce 1} 3 {reduce 2}
4 {reduce 3} 5 {reduce 4}
6 implode 7 reserved
8 deflate 9 reserved
10 implode-pkware-dcl
}
}
proc ::zipfile::decode::CM {v} {
variable cm
return $cm($v)
}
# ### ### ### ######### ######### #########
##
namespace eval ::zipfile::decode {
variable gbits
array set gbits {
0,1 encrypted
1,0,implode 4k-window
1,1,implode 8k-window
2,0,implode 2fano
2,1,implode 3fano
3,1 dd
5,1 patched
deflate,0 normal
deflate,1 maximum
deflate,2 fast
deflate,3 superfast
}
}
proc ::zipfile::decode::GPBF {v cm} {
variable gbits
set res {}
if {$cm eq "deflate"} {
# bit 1, 2 are treated together for deflate
lappend res $gbits($cm,[expr {($v >> 1) & 0x3}])
}
set bit 0
while {$v > 0} {
set odd [expr {$v % 2 == 1}]
if {[info exists gbits($bit,$odd,$cm)]} {
lappend res $gbits($bit,$odd,$cm)
} elseif {[info exists gbits($bit,$odd)]} {
lappend res $gbits($bit,$odd)
}
set v [expr {$v >> 1}]
incr bit
}
return $res
}
# ### ### ### ######### ######### #########
proc ::zipfile::decode::ArchiveError {msg args} {
# Inlined "Error" -- Avoided eval/linsert dance
set code [linsert $args 0 ZIP DECODE BAD ARCHIVE]
return -code error -errorcode $code "Bad zip file. $msg"
}
proc ::zipfile::decode::Error {msg args} {
set code [linsert $args 0 ZIP DECODE]
return -code error -errorcode $code $msg
}
# ### ### ### ######### ######### #########
## Decode the zip file by locating its end (of the central file
## header). The higher levels will then use the information
## inside to locate and read the CFH. No scanning from the beginning
## This piece of code lifted from tclvs/library/zipvfs (v 1.0.3).
proc ::zipfile::decode::LocateEnd {path} {
if {[set code [catch {
LocateEndCore $path fd
} result]]} {
::close $fd
return -code $code $result
}
return $result
}
proc ::zipfile::decode::LocateEndCore {path fdv} {
upvar 1 $fdv fd
set fd [::open $path r]
fconfigure $fd -translation binary ;#-buffering none
array set cb {}
# [SF Tclvfs Bug 1003574]. Do not seek over beginning of file.
seek $fd 0 end
# Just looking in the last 512 bytes may be enough to handle zip
# archives without comments, however for archives which have
# comments the chunk may start at an arbitrary distance from the
# end of the file. So if we do not find the header immediately we
# have to extend the range of our search, possibly until we have a
# large part of the archive in memory. We can fail only after the
# whole file has been searched.
set sz [tell $fd]
set len 512
set at 512
while {1} {
if {$sz < $at} {set n -$sz} else {set n -$at}
seek $fd $n end
set hdr [read $fd $len]
# We are using 'string last' as we are searching the first
# from the end, which is the last from the beginning. See [SF
# Bug 2256740]. A zip archive stored in a zip archive can
# confuse the unmodified code, triggering on the magic
# sequence for the inner, uncompressed archive.
set pos [string last "PK\05\06" $hdr]
if {$pos == -1} {
if {$at >= $sz} {
ArchiveError "No header found" HEADER MISSING
}
# after the 1st iteration we force an overlap with last
# buffer to ensure that the pattern we look for is not
# split at a buffer boundary, nor the header itself
set len 540
incr at 512
} else {
break
}
}
set hdrlen [string length $hdr]
set hdr [string range $hdr [expr {$pos + 4}] [expr {$pos + 21}]]
set pos [expr {wide([tell $fd]) + $pos - $hdrlen}]
if {$pos < 0} {
set pos 0
}
binary scan $hdr ssssiis _ _ _ _ cb(csize) cb(coff) _
# Compute base for situations where ZIP file has been appended to
# another media (e.g. EXE). We can do this because
# (a) The expected location is stored in ECFH. (-> cb(coff))
# (b) We know the actual location of EFCH. (-> pos)
# (c) We know the size of CFH (-> cb(csize))
# (d) The CFH comes directly before the EFCH.
# (e) Items b...d provide us with the actual location of CFH, as (b)-(c).
# Thus the difference between (e) and (d) is the base in question.
set base [expr { $pos - $cb(csize) - $cb(coff) }]
if {$base < 0} {
set base 0
}
set cb(base) $base
if {$cb(coff) < 0} {
set cb(base) [expr {wide($cb(base)) - 4294967296}]
set cb(coff) [expr {wide($cb(coff)) + 4294967296}]
}
#--------------
::close $fd
return [array get cb]
}
# ### ### ### ######### ######### #########
## Ready
package provide zipfile::decode 0.10.1
return