Attachment "csvdiff" to
ticket [485717ffff]
added by
lvirden
2001-11-27 01:27:56.
#!/bin/sh
# use -*- tcl -*- \
exec tclsh "$0" "$@"
# Perform a diff on two CSV files.
# The result is a CSV file
package require csv
package require cmdline
# ----------------------------------------------------
# csvdiff ?-sep sepchar? ?-key LIST? file1 file2
#
# Argument processing and checks.
set sepChar ,
set usage "Usage: $argv0 ?-sep sepchar? ?-key LIST? file1 file2\n\tLIST=idx,...\n\tidx in \{n, -m, n-, n-m\}"
set keySpec "0-"
while {[set ok [cmdline::getopt argv {sep.arg key.arg} opt val]] > 0} {
#puts stderr "= $opt $val"
switch -exact -- $opt {
sep {set sepChar $val}
key {set keySpec $val}
}
}
if {($ok < 0) || ([llength $argv] != 2)} {
puts stderr $usage
exit -1
}
foreach {inA inB} $argv break
if {[llength $keySpec] == 0} {
#puts stderr >>$keySpec<<
#puts stderr B
puts stderr $usage
exit -1
}
set idx [list]
foreach i $keySpec {
if {[regexp -- {[0-9]+-[0-9]+} $i]} {
foreach {f t} [split $i -] break
lappend idx [list $f $t]
} elseif {[regexp -- {[0-9]+-} $i]} {
foreach {f t} [split $i -] break
lappend idx [list $f end]
} elseif {[regexp -- {-[0-9]+} $i]} {
foreach {f t} [split $i -] break
lappend idx [list 0 $t]
} elseif {[regexp -- {[0-9]+} $i]} {
lappend idx [list $i $i]
} else {
#puts stderr >>$idx<<
#puts stderr C
puts stderr $usage
exit -1
}
}
set keySpec $idx
set inA [open $inA r]
set inB [open $inB r]
# ----------------------------------------------------
# Actual processing, uses the following information from the
# commandline:
#
# inA - channel for input A
# inB - channel for input B
# sepChar - separator character
# We read file2 completely and then go through the records of
# file1. For any record we don't find we write a "deleted" record. If
# we find the matching record we remove it from the internal
# storage. In a second sweep through internal array we write "added"
# records for the remaining data as that was not in file1 but is in
# file2.
proc keyof {data} {
global keySpec
set key [list]
foreach i $keySpec {
foreach {f t} $i break
eval lappend key [lrange $data $f $t]
}
return $key
}
set order [list]
array set map {}
while {![eof $inB]} {
if {[gets $inB line] < 0} {
continue
}
set data [::csv::split $line $sepChar]
set key [keyof $data]
set map($key) .
lappend order $data
}
close $inB
while {![eof $inA]} {
if {[gets $inA line] < 0} {
continue
}
set data [::csv::split $line $sepChar]
set key [keyof $data]
if {[info exists map($key)]} {
unset map($key)
continue
}
puts stdout [::csv::join [linsert $data 0 -] $sepChar]
}
foreach data $order {
set key [keyof $data]
if {[info exists map($key)]} {
puts stdout [::csv::join [linsert $data 0 +] $sepChar]
}
}
exit