Tcl Library Source Code

Check-in [265bbde03a]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Significant changes to fumagic to bring it closer to feature-parity with file(1). Changed format of [filetype] result.
Timelines: family | ancestors | descendants | both | fumagic
Files: files | file ages | folders
SHA1: 265bbde03ac1e579cb206fa2d56075e65d63cb43
User & Date: pooryorick 2016-06-12 14:40:10
Context
2016-06-12
22:27
module fumagic {eliminate [procdef], replace with [generate]} Closed-Leaf check-in: b0158a7951 user: pooryorick tags: fumagic
14:50
(no comment) check-in: 404f28df65 user: pooryorick tags: trunk
14:40
Significant changes to fumagic to bring it closer to feature-parity with file(1). Changed format of [filetype] result. check-in: 265bbde03a user: pooryorick tags: fumagic
2016-06-09
20:26
Pulling fix for tool::main from the odie branch check-in: ad9ba43de8 user: hypnotoad tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to modules/fumagic/cfront.tcl.

1
2
3
4
5

6
7
8
9
10
11
12
# cfront.tcl --
#
#	Generator frontend for compiler of magic(5) files into recognizers
#	based on the 'rtcore'. Parses magic(5) into a basic 'script'.
#

# Copyright (c) 2004-2005 Colin McCormack <[email protected]>
# Copyright (c) 2005      Andreas Kupries <[email protected]>
#
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
# 
# RCS: @(#) $Id: cfront.tcl,v 1.7 2008/03/22 01:10:32 andreas_kupries Exp $





>







1
2
3
4
5
6
7
8
9
10
11
12
13
# cfront.tcl --
#
#	Generator frontend for compiler of magic(5) files into recognizers
#	based on the 'rtcore'. Parses magic(5) into a basic 'script'.
#
# Copyright (c) 2016      Poor Yorick     <[email protected]>
# Copyright (c) 2004-2005 Colin McCormack <[email protected]>
# Copyright (c) 2005      Andreas Kupries <[email protected]>
#
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
# 
# RCS: @(#) $Id: cfront.tcl,v 1.7 2008/03/22 01:10:32 andreas_kupries Exp $
27
28
29
30
31
32
33

34
35
36
37
38
39
40
41
42
43
44
45
46


47

48













49




50



51

52














































53




54
55














56







57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85

86
87
88
89
90
91



92







93





















































































































































































































































































































































































































































94


95




96





97
98
99
100
101
102
103
104
105
106




107




108







109
110
111


112
113
114
115
116
117
118

119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146


147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214



215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332


333
334















335

336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356

357

358
359
360
361
362
363
364
365
366
367











368
369
370
371
372
373
374
package require Tcl 8.4

# file to compile the magic file from magic(5) into a tcl program
package require fileutil              ; # File processing (input)
package require fileutil::magic::cgen ; # Code generator.
package require fileutil::magic::rt   ; # Runtime (typemap)
package require struct::list          ; # lrepeat.


package provide fileutil::magic::cfront 1.0

# ### ### ### ######### ######### #########
## Implementation

namespace eval ::fileutil::magic::cfront {
    # Configuration flag. (De)activate debugging output.
    # This is done during initialization.
    # Changes at runtime have no effect.

    variable debug 0



    # Constants















    variable hashprotection  [list "\#" "\\#" \" \\\" \{ \\\{ \} \\\}]      ;#"




    variable hashprotectionB [list "\#" "\\\#" \" \\\" \} \\\} ( \\( ) \\)] ;#"





    # Make backend functionality accessible














































    namespace import ::fileutil::magic::cgen::*





    namespace export compile procdef install














}








# parse an individual line
proc ::fileutil::magic::cfront::parseline {line {maxlevel 10000}} {
    # calculate the line's level
    set unlevel [string trimleft $line >]
    set level   [expr {[string length $line] - [string length $unlevel]}]
    if {$level > $maxlevel} {
   	return -code continue "Skip - too high a level"
    }

    # regexp parse line into (offset, type, value, command)
    set parse [regexp -expanded -inline {^(\S+)\s+(\S+)\s*((\S|(\B\s))*)\s*(.*)$} $unlevel]
    if {$parse == {}} {
   	error "Can't parse: '$unlevel'"
    }

    # unpack parsed line
    set value   ""
    set command ""
    foreach {junk offset type value junk1 junk2 command} $parse break

    # handle trailing spaces
    if {[string index $value end] eq "\\"} {
   	append value " "
    }
    if {[string index $command end] eq "\\"} {
   	append command " "
    }


    if {$value eq ""} {
	# badly formatted line
   	return -code error "no value"
    }

    ::fileutil::magic::cfront::Debug {



   	puts "level:$level offset:$offset type:$type value:'$value' command:'$command'"







    }
























































































































































































































































































































































































































































    # return the line's fields




    return [list $level $offset $type $value $command]





}

# process a magic file
proc ::fileutil::magic::cfront::process {file {maxlevel 10000}} {
    variable hashprotection
    variable hashprotectionB
    variable level	;# level of line
    variable linenum	;# line number

    set level  0




    set script {}












    set linenum 0
    ::fileutil::foreachLine line $file {
   	incr linenum


   	set line [string trim $line " "]
   	if {[string index $line 0] eq "#"} {
   	    continue	;# skip comments
   	} elseif {$line == ""} {
   	    continue	;# skip blank lines
   	} else {
   	    # parse line

   	    if {[catch {parseline $line $maxlevel} parsed]} {
   		continue	;# skip erroring lines
   	    }

   	    # got a valid line
   	    foreach {level offset type value message} $parsed break

   	    # strip comparator out of value field,
   	    # (they are combined)
   	    set compare [string index $value 0]
   	    switch -glob --  $value {
   		[<>]=* {
   		    set compare [string range $value 0 1]
   		    set value   [string range $value 2 end]
   		}

   		<* - >* - &* - ^* {
   		    set value [string range $value 1 end]
   		}

   		=* {
   		    set compare "=="
   		    set value   [string range $value 1 end]
   		}

   		!* {
   		    set compare "!="
   		    set value   [string range $value 1 end]


   		}

   		x {
   		    # this is the 'don't care' match
   		    # used for collecting values
   		    set value ""
   		}

   		default {
   		    # the default comparator is equals
   		    set compare "=="
   		    if {[string match {\\[<!>=]*} $value]} {
   			set value [string range $value 1 end]
   		    }
   		}
   	    }

   	    # process type field
   	    set qual ""
   	    switch -glob -- $type {
   		pstring* - string* {
   		    # String or Pascal string type

   		    # extract string match qualifiers
		    foreach {type qual} [split $type /] break

   		    # convert pstring to string + qualifier
   		    if {$type eq "pstring"} {
   			append qual "p"
   			set type "string"
   		    }

   		    # protect hashes in output script value
   		    set value [string map $hashprotection $value]

   		    if {($value eq "\\0") && ($compare eq ">")} {
   			# record 'any string' match
   			set value   ""
   			set compare x
   		    } elseif {$compare eq "!="} {
   			# string doesn't allow !match
   			set value   !$value
   			set compare "=="
   		    }

   		    if {$type ne "string"} {
   			# don't let any odd string types sneak in
   			puts stderr "Reject String: ${file}:$linenum $type - $line"
   			continue
   		    }
   		}

   		regex {
   		    # I am *not* going to handle regex
   		    puts stderr "Reject Regex: ${file}:$linenum $type - $line"
   		    continue
   		}

   		*byte* - *short* - *long* - *date* {
   		    # Numeric types

   		    # extract numeric match &qualifiers
   		    set type [split  $type &]
   		    set qual [lindex $type 1]

   		    if {$qual ne ""} {
   			# this is an &-qualifier
   			set qual &$qual



   		    } else {
   			# extract -qualifier from type
   			set type [split  $type -]
   			set qual [lindex $type 1]
   			if {$qual ne ""} {
   			    set qual -$qual
   			}
   		    }
   		    set type [lindex $type 0]

   		    # perform value adjustments
   		    if {$compare ne "x"} {
   			# trim redundant Long value qualifier
   			set value [string trimright $value L]

   			if {[catch {set value [expr $value]} x]} {
			    upvar #0 errorInfo eo
   			    # check that value is representable in tcl
   			    puts stderr "Reject Value Error: ${file}:$linenum '$value' '$line' - $eo"
   			    continue;
   			}

   			# coerce numeric value into hex
   			set value [format "0x%x" $value]
   		    }
   		}

   		default {
   		    # this is not a type we can handle
   		    puts stderr "Reject Unknown Type: ${file}:$linenum $type - $line"
   		    continue
   		}
   	    }
   	}

   	# collect some summaries
   	::fileutil::magic::cfront::Debug {
   	    variable types
   	    set types($type) $type
   	    variable quals
   	    set quals($qual) $qual
   	}

   	#puts $linenum level:$level offset:$offset type:$type
	#puts qual:$qual compare:$compare value:'$value' message:'$message'

   	# protect hashes in output script message
   	set message [string map $hashprotectionB $message]

   	if {![string match "(*)" $offset]} {
   	    catch {set offset [expr $offset]}
   	}

   	# record is the complete match command,
   	# encoded for tcl code generation
   	set record [list $linenum $type $qual $compare $offset $value $message]
   	if {$script == {}} {
   	    # the original script has level 0,
   	    # regardless of what the script says
   	    set level 0
   	}

   	if {$level == 0} {
   	    # add a new 0-level record
   	    lappend script $record
   	} else {
   	    # find the growing edge of the script
   	    set depth [::struct::list repeat [expr $level] end]
   	    while {[catch {
   		# get the insertion point
   		set insertion [eval [linsert $depth 0 lindex $script]]
		# 8.5 #	set insertion [lindex $script {*}$depth]
   	    }]} {
   		# handle scripts which jump levels,
   		# reduce depth to current-depth+1
   		set depth [lreplace $depth end end]
   	    }

   	    # add the record at the insertion point
   	    lappend insertion $record

   	    # re-insert the record into its correct position
   	    eval [linsert [linsert $depth 0 lset script] end $insertion]
   	    # 8.5 # lset script {*}$depth $insertion
   	}
    }
    #puts "Script: $script"
    return $script
}

# compile up magic files or directories of magic files into a single recognizer.
proc ::fileutil::magic::cfront::compile {args} {
    set tcl ""
    set script {}
    foreach arg $args {
   	if {[file type $arg] == "directory"} {
   	    foreach file [glob [file join $arg *]] {
   		set script1 [process $file]
		eval [linsert $script1 0 lappend script [list file $file]]
   		# 8.5 # lappend script [list file $file] {*}$script1

   		#append tcl "magic::file_start $file" \n
   		#append tcl [run $script1] \n
   	    }
   	} else {
   	    set file $arg
   	    set script1 [process $file]
   	     eval [linsert $script1 0 lappend script [list file $file]]
   	    # 8.5 # lappend script [list file $file] {*}$script1

   	    #append tcl "magic::file_start $file" \n
   	    #append tcl [run $script1] \n
   	}
    }

    #puts stderr $script
    ::fileutil::magic::cfront::Debug {puts "\# $args"}



    set    t   [2tree $script]
    set    tcl [treegen $t root]















    append tcl "\nreturn \{\}"


    ::fileutil::magic::cfront::Debug {puts [treedump $t]}
    #set tcl [run $script]

    return $tcl
}

proc ::fileutil::magic::cfront::procdef {procname args} {

    set pspace [namespace qualifiers $procname]

    if {$pspace eq ""} {
	return -code error "Cannot generate recognizer in the global namespace"
    }

    set     script {}
    lappend script "package require fileutil::magic::rt"
    lappend script "namespace eval [list ${pspace}] \{"
    lappend script "    namespace import ::fileutil::magic::rt::*"
    lappend script "\}"
    lappend script ""

    lappend script [list proc ${procname} {} \n[eval [linsert $args 0 compile]]\n]

    return [join $script \n]
}

proc ::fileutil::magic::cfront::install {args} {
    foreach arg $args {
	set path [file tail $arg]
	eval [procdef ::fileutil::magic::/${path}::run $arg]
    }
    return
}












# ### ### ### ######### ######### #########
## Internal, debugging.

if {!$::fileutil::magic::cfront::debug} {
    # This procedure definition is optimized out of using code by the
    # core bcc. It knows that neither argument checks are required,







>

|











>
>
|
>

>
>
>
>
>
>
>
>
>
>
>
>
>
|
>
>
>
>
|
>
>
>

>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>
>
>
>
|
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>

>
>
>
>
>
>
>


|
<
<
<
<
<
|
|
<
<
|
<
<
|
<
|
|
|
|
<
<
|
|
<
<
<
|
>
|
<
|
|
|

>
>
>
|
>
>
>
>
>
>
>

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

>
>
|
>
>
>
>
|
>
>
>
>
>



|
<
<




>
>
>
>

>
>
>
>
|
>
>
>
>
>
>
>
|


>
>
|
|

|



>
|
|
|
|
<
<
|
<
<
<
<
<
<
<
|
|
<
<
|
|
<
<
|
<
|
<
|
<
>
>
|
|
|
<
|
<
|
|
<
<
|
<
<
|
<
<
|
<
|
<
<
<
|
<
<
|
<
<
<
<
|
|
<
<
|
<
<
|
|
<
<
<
|
|
|
<
<
<
<
<
<
|
<
<
|
<
<
|
<
<
|
<
|
|
|
|
<
|
>
>
>
|
<
<
<
<
<
<
<
<
|
<
<
<
<
|
<
<
<
<
<
|
|
<
<
|
<
<
<
<
<
<
<
<





|

|



|

<
<
|
<
<
|

<
<
<
<
<
<
<
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<


|
|

|

|
<
<
|
|
|
<


|
<
<
<
|
|






>
>
|
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>















<
|
|
|
|
|
>
|
>
|









>
>
>
>
>
>
>
>
>
>
>







28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156





157
158


159


160

161
162
163
164


165
166



167
168
169

170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640


641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678


679







680
681


682
683


684

685

686

687
688
689
690
691

692

693
694


695


696


697

698



699


700




701
702


703


704
705



706
707
708






709


710


711


712

713
714
715
716

717
718
719
720
721








722




723





724
725


726








727
728
729
730
731
732
733
734
735
736
737
738
739


740


741
742







743





























744
745
746
747
748
749
750
751


752
753
754

755
756
757



758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801

802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
package require Tcl 8.4

# file to compile the magic file from magic(5) into a tcl program
package require fileutil              ; # File processing (input)
package require fileutil::magic::cgen ; # Code generator.
package require fileutil::magic::rt   ; # Runtime (typemap)
package require struct::list          ; # lrepeat.
package require struct::tree          ; #

package provide fileutil::magic::cfront 1.2

# ### ### ### ######### ######### #########
## Implementation

namespace eval ::fileutil::magic::cfront {
    # Configuration flag. (De)activate debugging output.
    # This is done during initialization.
    # Changes at runtime have no effect.

    variable debug 0

    # Make backend functionality accessible
    namespace import ::fileutil::magic::cgen

    namespace export compile procdef install

    variable floattestops {= < > !}
    variable inttestops {= < > & ^ ~ !}
    variable stringtestops { > < = !}
    variable offsetopts {& | ^ + - * / %}
    variable stringmodifiers {W w c C t b T}
    variable typemodifiers [dict create \
	indirect r \
	search $stringmodifiers \
	string $stringmodifiers \
	pstring [list {*}$stringmodifiers B H h L l J] \
	regex {c s l} \
    ]
    set numeric_modifier_allowed {regex search}
	
    variable types_numeric_short { 
	dC byte d1 byte C byte 1 byte ds short d2 short S short 2 short dI long
	dL long d4 long I long L long 4 long  d8 quad 8 quad dQ quad Q quad
    }

    variable types_numeric_re [join [list {*}[
	array names ::fileutil::magic::rt::typemap] {*}[
	dict keys $types_numeric_short]] |]

    variable types_string_short [dict create s string] 

    variable types_string {
	bestring clear default indirect lestring pstring regex search string
    }
    variable types_string_re [join [list {*}[
	dict keys $types_string_short] {*}$types_string] |]

    variable types_verbatim {name use}

    variable types_notimplemented {}
    variable types_notimplemented_re [join $types_notimplemented |]

    variable types_numeric_real {
	float double befloat bedouble lefloat ledouble
    }

    variable indir_typemap [dict create \
	b byte c byte e ledouble f ledouble g ledouble i leid3 h leshort \
	s leshort l lelong B byte C byte E bedouble F bedouble G bedouble \
	H beshort I beid3 L belong m ME S beshort]

}

proc ::fileutil::magic::cfront::advance {len args} {
    upvar node node tree tree
    if {[llength args]} {
	upvar [lindex $args 0] res
    }
    set res {}
    set line [$tree get $node line]
    set cursor [$tree get $node cursor]
    if {[string index $len 0] eq {w}} {
	regexp -start $cursor {\A(\s*)} $line match res
	incr cursor [string length $match]
	set len [string range $len 1 end]
    }
    if {$len ne {}} {
	if {[regexp -start $cursor "\\A(.{[
	    scan $len %lld]})" $line match res]} {
	    incr cursor [string length $match]
	}
    }
    set line [$tree get $node line]
    $tree set $node cursor $cursor
    return $res
}

proc ::fileutil::magic::cfront::rewind len {
    upvar node node tree tree
    set cursor [$tree get $node cursor]
    incr cursor -$len
    $tree set $node cursor $cursor
}

proc ::fileutil::magic::cfront::parseerror args {
    upvar node node tree tree
    set cursor [$tree get $node cursor]
    set line [$tree get $node line]
    set files [$tree get root files]
    set file [lindex files [$tree get $node file]]
    return -code error -errorcode [list fumagic {parse error}] [
	list [lmap arg $args {string trim $arg}] \
	file $file \
	linenenum [$tree get $node linenum] \
	cursor $cursor \
	line [list \
	    [string range $line 0 ${cursor}-1] \
	    [string range $line $cursor end]]]
}

proc ::fileutil::magic::cfront::parsewarning args {
    upvar node node tree tree
    catch {parseerror {*}$args} res options
    puts stderr [dict get $options -errorinfo]
}


# parse an individual line
variable ::fileutil::magic::cfront::parsedkeys {





}
proc ::fileutil::magic::cfront::parseline {tree node} {


    variable parsedkeys


    set line [$tree get $node line]

    $tree set $node cursor 0 
    parseoffset $tree $node
    parsetype $tree $node
    parsetest $tree $node


    parsemsg $tree $node




    set record [$tree getall $node]
    foreach key $parsedkeys {
	if {![dict exists $record $key]} {

	    return -code error [list {missing key} $key]
	}
    }
    ::fileutil::magic::cfront::Debug {
   	puts [list parsed $record]
    }
}

proc ::fileutil::magic::cfront::parsefloat {tree node} {
    set line [$tree get $node line]
    set cursor [$tree get $node cursor]
    # If only [scan] had a @ conversion character like [binary scan]
    set line2 [string range $line $cursor end]
    if {[scan $line2 %e%n num count] < 0} {
	parseerror {invalid floating point number}
    }
    set cursor [expr {$cursor + $count}]
    $tree set $node cursor $cursor

    # These suffixes are not used in magic files
    #if {[regexp -start $cursor {\A([fFlL)} -> modifier]} {
    #    advance [string length $modifier]]
    #}
    return $num
}

proc ::fileutil::magic::cfront::parseint {tree node} {
    set line [$tree get $node line]
    set cursor [$tree get $node cursor]
    # If only [scan] had a @ conversion character like [binary scan]
    set line2 [string range $line $cursor end]
    if {[set scanres [scan $line2 %lli%n num n]] < 1} {
	parseerror {invalid number}
    }
    set cursor [expr {$cursor + $n}]
    $tree set $node cursor $cursor
    # Thse suffixes are not used in magic files
    #if {[regexp -start $cursor {\A([uU]?[lL]{1,2})} -> modifier]} {
    #    advance [string length $modifier]]
    #}
    return $num
}


proc ::fileutil::magic::cfront::parsetype {tree node} {
    variable types_numeric_re
    variable types_numeric_short
    variable types_string_re
    variable types_string_short
    variable types_notimplemented_re
    set line [$tree get $node line]
    set cursor [$tree get $node cursor]
    $tree set $node mod {}
    $tree set $node mand {}
    if {[regexp -start $cursor {\A\s*(\w+)} $line match type]} {
	advance [string length $match]
	switch -regexp -matchvar match $type \
	    ^(u?)($types_numeric_re)$ - ^($types_string_re)$ {

	    set type [lindex $match end]
	    if {[llength $match] == 3} {
		#numeric
		if {[dict exists $types_numeric_short $type]} {
		    set type [dict get $types_numeric_short $type]
		}
		$tree set $node type $type

		# {to do} {Current design doesn't use sign, right?  Is it
		# really not needed?}
		$tree set $node sgn [dict get {{} 1 u 0} [
		    lindex $match 1]]

		parsetypenummod $tree $node
	    } else {
		lassign $match match type
		if {[dict exists $types_string_short $type]} {
		    set type [dict get $types_string_short $type]
		}
		$tree set $node type $type

		# No modifying operator for strings
		parsetypemod $tree $node

		if {$type eq {search} && [$tree get $node mand] eq {}} {
		    parseerror {search has no number}
		}
	    }
	} \
	^(name|use)$ {
	    $tree set $node type [lindex $match end] 
	} \
	$types_notimplemented_re {
	    parseerror {type not implemented}
	} \
	default {
	    parseerror {unknown type}
	}
    } else {
	parseerror {no type}
    }
}

proc ::fileutil::magic::cfront::parsetypemod {tree node} {
    # For numeric types , $mod is a list of modifiers and $mand is either a
    # number or the empty strinng .
    variable typemodifiers
    variable numeric_modifier_allowed
    set type [$tree get $node type]
    if {[advance 1 char] ne {/}} {
	rewind 1
	return
    }
    set res [dict create] 
    while 1 {
	if {[advance 1 char] eq {/}} {
	    continue
	}
	if {[string is space $char]} {
	    break
	}
	if {[dict exists $typemodifiers $type] && $char in [dict get $typemodifiers $type]} {
	    dict set res $char {}
	} elseif {$type in $numeric_modifier_allowed} {
	    rewind 1
	    if {[catch {parseint $tree $node} mand]} {
		# Whatever it is, it isn't a number.  Let the next parsing step
		# handle it .
		break
	    } else {
		$tree set $node mand $mand  ; # numeric modifier
	    }
	} else {
	    parseerror {bad modifier}
	}
    }
    $tree set $node mod [dict keys $res]
}

proc ::fileutil::magic::cfront::parsetypenummod {tree node} {
    # For numeric types, $mod is an operator and $mand is a number
    set line [$tree get $node line]
    set cursor [$tree get $node cursor]
    if {[regexp -start $cursor {\A([-&|^+*/%])} \
	$line match mod]} {
	advance [string length $match]
	$tree set $node mod $mod
	# {to do} {parse floats?}
	$tree set $node mand [parseint $tree $node] ; # mod operand
    } else {
	$tree set $node mod {}
	$tree set $node mand {} 
    }
}


proc ::fileutil::magic::cfront::parsestringval {tree node} {
    variable floattestops
    variable inttestops
    variable stringtestops
    advance w1 char 
    set val {}
    set line [$tree get $node line]
    while 1 {
	# break on whitespace or empty string
	if {[string is space $char] || $char eq {}} break
	switch $char [dict create  \
	    \\ {
		advance 1 char
		if {[string is space $char]} {
		    append val \\$char
		} else {
		    # extra backslashes because of interaction with glob
		    switch -glob $char [dict create \
			{\\} {
			    append val {\\}
			} \t {
			    parsewarning {use \t instead of \<tab>}
			    append val \\t
			} > - < - & - ^ - = - ! - ( - ) - . {
			    if {$char in [list {*}$stringtestops \
				{*}$floattestops {*}$inttestops]} {
				parsewarning {no need to escape operators}
			    }
			    append val $char 
			} a - b - f - n - r - t - v {
			    append val \\$char
			} x {
			    set cursor [$tree get $node cursor]
			    if {[regexp -start $cursor \
				{\A([0-9A-Fa-f]{1,2})} $line match char2]} {
				advance [string length $match] 
				append val \\x$char2
			    } else {
				parseerror {malformed \x escape sequence}
			    }
			} \[0-7\] {
			    set cursor [$tree get $node cursor]
			    append val \\$char
			    if {[regexp -start $cursor \
				{\A([0-7]{1,2})} $line match char2]} {
				advance [string length $match] 
				append val $char2
			    }
			} default {
			    parseerror {Could not handle escape sequence in value}
			}
		    ]
		}
	    } default {
		if {[string is space $char] || $char in [
		    list \# \{ \} \[  \] \" \$ \; \n]} {
		    append val \\
		}
		append val $char
	    }
	]
	advance 1 char
    }
    $tree set $node val $val
}

proc ::fileutil::magic::cfront::parsetestverbatim {tree node} {
    switch [$tree get $node type] {
	name {
	    $tree set $node rel 1
	}
	use {
	    set cursor [$tree get $node cursor]
	    # order matters in regular expression : longest match must come
	    # first in parenthesized
	    if {[regexp -start $cursor {\A\s*(?:\\\^|\^)} [$tree get $node line] match]} {
		advance [string length $match]
		$tree set $node iendian 1
	    } else {
		$tree set $node iendian 0
	    }
	}

    }
    parsestringval $tree $node
}

proc ::fileutil::magic::cfront::parsetest {tree node} {
    variable floattestops
    variable inttestops
    variable stringtestops
    variable types_numeric_real
    variable types_numeric_short
    variable types_string
    variable types_verbatim
    set type [$tree get $node type]
    if {$type in $types_verbatim} {
	parsetestverbatim $tree $node
	return
    }
    $tree set $node compinvert 0
    set testinvert 0
    set comp ==
    advance w1 char
    if {$char eq {x}} {
	advance 1 char
	if {[string is space $char]} {
	    $tree set $node testinvert 0
	    $tree set $node comp x
	    $tree set $node val {}
	    return
	} else {
	    rewind 1
	}
    }

    if {$type in $types_string} {
	while 1  {
	    if {$char in $stringtestops} {
		if {$char eq {!}} {
		    set testinvert 1
		} else {
		    set comp $char
		    # Exclamation must precede any normal operator
		    break
		}
		advance w1 char
	    } else {
		rewind 1
		break
	    }
	}
	parsestringval $tree $node
    } elseif {$type in [list {*}[
	array names ::fileutil::magic::rt::typemap] {*}[
	dict keys $types_numeric_short]]} {
	if {$type in $types_numeric_real} {
	    set ops $floattestops
	    set parsecmd parsefloat
	} else {
	    set ops $inttestops 
	    set parsecmd parseint
	}

	while 1 {
	    if {$char in $ops} {
		if {$char eq {~}} {
		    $tree set $node compinvert 1 
		} elseif {$char eq {!}} {
		    set testinvert 1
		} else {
		    set comp $char
		    # Exclamation and tilde must precede any normal operator
		    break
		}
		advance w1 char
	    } else {
		rewind 1
		break
	    }
	}
	$tree set $node val [$parsecmd $tree $node]
    } else {
	parseerror {don't know how to parse the test or this type}
    }
    switch $comp {
	= {
	    set comp ==
	}
    }
    # This facilitates Switch creation by [treegen1]
    if {$testinvert && ($comp eq {==})} {
	set comp !=
	set testinvert 0
    }
    $tree set $node testinvert $testinvert
    $tree set $node comp $comp 
}

proc ::fileutil::magic::cfront::parseoffset {tree node} {

    # Offset parser.
    # Syntax:
    #   ( ?&? number ?.[bslBSL]? ?[+-]? ?number? )

    # This was all fine and dandy, but didn't do spaces where spaces might
    # exist between lexical elements in the wild, and ididn't do offset
    # operators

    #set n {([-+]?[0-9]+|[-+]?0x[0-9A-Fa-f]+)[UL]*}

    ##"\\((&?)(${n})((\\.\[bslBSL])?)()(\[+-]?)(${n}?)\\)"
    #set o \
    #    "^(&?)${n}((?:\\.\[bslBSL])?)(?:(\[-+*/%&|^])${n})?(?:(\[+-])(\\()?${n}\\)?)?$"
    ##     |   |   |                     |            |        |      |    |
    ##     1   2   3                     4            5        6      7    8 
    ##                            1    2    3     4   5        6    7     8   
    #set ok [regexp $o $offset -> irel base type  iop ioperand sign ind idx]


    variable offsetopts
    variable indir_typemap
    $tree set $node rel 0 ;   # relative
    $tree set $node ind 0 ;   # indirect
    $tree set $node ir 0 ;    # indirect relative
    $tree set $node it {} ;   # indir_type
    $tree set $node ioi 0 ;   # indirect offset invert
    $tree set $node iir 0 ;   # indirect indirect relative 
    $tree set $node ioo 0 ;   # indirect_offset_op
    $tree set $node io 0 ;    # indirect offset
    advance w1 char
    if {$char eq {&}} {
	advance w1 char
	$tree set $node rel 1
    }

    if {$char eq {(}} {
	$tree set $node ind 1

	if {[advance w1] eq {&}} {
	    $tree set $node ir 1
	} else {
	    rewind 1
	}
	$tree set $node o [parseint $tree $node]

	# $char is used below if it's not "."
	if {[advance w1 char] eq {.}} {
	    advance w1 it
	    if {[dict exists $indir_typemap $it]} {
		set it [
		    dict get $indir_typemap $it]
	    } else {
		parseerror {bad indirect offset type}
	    }
	    advance w1 char
	} else {
	    set it long
	}
	$tree set $node it $it


	# The C implementation does this, so we will , too .
	if {$char eq {~}} {
	    advance w1 char
	    $tree set $node ioi 1
	}

	if {$char in $offsetopts} {
	    $tree set $node ioo $char
	    if {[advance w1] in {(}} {
		$tree set $node iir 1
	    } else {
		rewind 1
	    }
	    $tree set $node io [parseint $tree $node]
	    if {[$tree get $node iir]} {
		if {[advance w1] ne {)}} {
		    parseerror {
			expected closing parenthesis for indirect indirect offset offset
		    }
		}
	    }
	    advance w1 char
	}

	if {$char ne {)}} {
	    parseerror {
		expected close parenthesis for indirect offset 
	    }
	}
    } else {
	rewind 1
	$tree set $node o [parseint $tree $node]
    }
}

proc ::fileutil::magic::cfront::parseoffsetmod {tree node} {
    advance w1 char
    if {$char eq {~}} {
	$tree set $node offset_invert 1
	advance w1 char
    } else {
	$tree set $node offset_invert 0
    }
    switch $char {
	+ - - - * - / - % - & - | - ^ {
	    $tree set $node offset_mod_op $char
	    $tree set $node offset_mod [parseint $tree $node]
	}
	default {
	    $tree set $node offset_mod_op {}
	    $tree set $node offset_mod {}
	    rewind 1
	    # no offset modifier
	}
    }
}

proc ::fileutil::magic::cfront::parsemsg {tree node} {
    advance w
    set line [$tree get $node line]
    set cursor [$tree get $node cursor]
    ##leave \b in the message for [emit] to parse
    #regexp -start $cursor {\A(\b|\\b)?(.*)$} $line match b line
    #if {$b ne {}} {
    #    $tree set $node space 0
    #} else {
    #    $tree set $node space 1
    #}
    set line [string range $line $cursor end]
    $tree set $node desc $line
}

# process a magic file
proc ::fileutil::magic::cfront::process {tree file {maxlevel 10000}} {


    variable level	;# level of line
    variable linenum	;# line number

    set level  0

    set linenum 0
    set records {}
    set rejected 0
    set script {}
    if {[$tree keyexists root files]} {
	set files [$tree get root files]
    } else {
	set files {}
    }
    set fileidx [llength $files] 
    if {$file in $files} {
	return -code error [list {already processed file} $file]
    }
    lappend files $file
    $tree set root files $files
    $tree set root level -1
    set node root
    ::fileutil::foreachLine line $file {
   	incr linenum
	# Only trim the left side . White space on the the right side could be
	# part of an escape sequence , and trimming would munge it .
   	set line [string trimleft $line]
   	if {[string index $line 0] eq {#}} {
   	    continue	;# skip comments
   	} elseif {$line eq {}} {
   	    continue	;# skip blank lines
   	} else {
   	    # parse line
	    if {[regexp {!:(\S+)\s*(.*)$} $line -> extname extdata]} {
		if {$rejected} {
		    continue
		}
		if {$node eq {root}} {


		    return -code error [list {malformed magic file}]







		}
		$tree set $node ext_$extname $extdata


	    } else {
		# calculate the line's level


		set unlevel [string trimleft $line >]

		set level   [expr {[string length $line] - [string length $unlevel]}]

		set line $unlevel

		if {$level > $maxlevel} {
		    return -code continue "Skip - too high a level"
		}
		if {$level > 0} {
		    if {$rejected} {

			continue

		    }
		    while {[$tree keyexists $node level] && [$tree get $node level] >= $level} {


			set node [$tree parent $node]


		    }


		    if {$level > [$tree get $node level]+1} {

			return -code error [



			    list {level more than one greater than parent level} \


				file $file linenum $linenum line $line]




		    }
		    set node [$tree insert $node end]


		} else {


		    set rejected 0
		    set node [$tree insert root end]



		    set node0 $node
		}
		$tree set $node file $fileidx






		$tree set $node line $line


		$tree set $node linenum $linenum


		$tree set $node level $level


		if {[catch {parseline $tree $node} cres copts]} {

		    set errorcode [dict get $copts -errorcode]
		    if {[lindex $errorcode 0] eq {fumagic} && [
			lindex $errorcode 1] eq {parse error}} {
			$tree delete $node0

			set rejected 1
			puts stderr [list Rejected {bad parse}]
			puts stderr [dict get $copts -errorinfo]
			continue	;# skip erroring lines
		    } else {








			return -options $copts $cres




		    }






		}


	    }








   	}

   	# collect some summaries
   	::fileutil::magic::cfront::Debug {
   	    variable types
   	    set types($type) [$tree get $node type]
   	    variable quals
   	    set quals($qual) [$tree get $node qual]
   	}

   	#puts $linenum level:$level offset:$offset type:$type
	#puts qual:$qual compare:$compare val:'$val' desc:'$desc'



    }


}






































# compile up magic files or directories of magic files into a single recognizer.
proc ::fileutil::magic::cfront::compile {args} {
    set tree [tree]

    foreach arg $args {
   	if {[file type $arg] eq  {directory}} {
   	    foreach file [glob [file join $arg *]] {
   		process $tree $file


   	    }
	    #append tcl "magic::file_start $file" \n
	    #append tcl [run $script1] \n

   	} else {
   	    set file $arg
   	    process $tree $file



	    #append tcl "magic::file_start $file" \n
	    #append tcl [run $script1] \n
   	}
    }

    #puts stderr $script
    ::fileutil::magic::cfront::Debug {puts "\# $args"}

    # Historically, this command converted the output of [process] , which was
    # a list , into a tree . Now it post-processes the tree .
    cgen 2tree $tree

    set tests [cgen treegen $tree root]
    set named [$tree get root named]
    set tcl [string map [list @named@ [list $named] @tests@ [list $tests]] {
	yield [info coroutine]
	set named @named@
	foreach test @tests@ {
	    set level 0
	    set ext {}
	    set mime {}
	    try $test
	    lassign [resultv] found weight result
	    if {$found}  {
		yield [list $weight $result $mime [split $ext /]]
	    }
	}
	return
    }]

    ::fileutil::magic::cfront::Debug {puts [treedump $t]}
    #set tcl [run $script]

    return $tcl
}

proc ::fileutil::magic::cfront::procdef {procname args} {

    set pspace [namespace qualifiers $procname]

    if {$pspace eq ""} {
	return -code error "Cannot generate recognizer in the global namespace"
    }


    append script "package require fileutil::magic::rt"
    append script "\nnamespace eval [list $pspace] \{"
    append script "    namespace import ::fileutil::magic::rt::*"
    append script "\}"
    append script \n 
    append script "proc [list $procname] {} {
	[eval [linsert $args 0 compile]]\n
    }"
    return $script 
}

proc ::fileutil::magic::cfront::install {args} {
    foreach arg $args {
	set path [file tail $arg]
	eval [procdef ::fileutil::magic::/${path}::run $arg]
    }
    return
}

proc ::fileutil::magic::cfront::tree {} {
    set tree [::struct::tree]

    $tree set root path ""
    $tree set root otype Root
    $tree set root type root
    $tree set root named {}
    $tree set root message "unknown"
    return $tree
}

# ### ### ### ######### ######### #########
## Internal, debugging.

if {!$::fileutil::magic::cfront::debug} {
    # This procedure definition is optimized out of using code by the
    # core bcc. It knows that neither argument checks are required,

Changes to modules/fumagic/cgen.tcl.

1
2
3
4
5

6
7
8
9
10
11
12
# cgen.tcl --
#
#	Generator core for compiler of magic(5) files into recognizers
#	based on the 'rtcore'.
#

# Copyright (c) 2004-2005 Colin McCormack <[email protected]>
# Copyright (c) 2005      Andreas Kupries <[email protected]>
#
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
# 
# RCS: @(#) $Id: cgen.tcl,v 1.7 2007/06/23 03:39:34 andreas_kupries Exp $





>







1
2
3
4
5
6
7
8
9
10
11
12
13
# cgen.tcl --
#
#	Generator core for compiler of magic(5) files into recognizers
#	based on the 'rtcore'.
#
# Copyright (c) 2016      Poor Yorick     <[email protected]>
# Copyright (c) 2004-2005 Colin McCormack <[email protected]>
# Copyright (c) 2005      Andreas Kupries <[email protected]>
#
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
# 
# RCS: @(#) $Id: cgen.tcl,v 1.7 2007/06/23 03:39:34 andreas_kupries Exp $
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36



37


38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55




56
57
58
59
60
61
62
63
64
65
# Wiki page last updated: ???
#
#####

# ### ### ### ######### ######### #########
## Requirements

package require Tcl 8.4
package require fileutil::magic::rt ; # Runtime core, for Access to the typemap
package require struct::list        ; # Our data structures.
package require struct::tree        ; #

package provide fileutil::magic::cgen 1.0

# ### ### ### ######### ######### #########
## Implementation




namespace eval ::fileutil::magic::cgen {


    # Import the runtime typemap into our scope.
    variable ::fileutil::magic::rt::typemap

    # The tree most operations use for their work.
    variable tree {}

    # Generator data structure.
    variable regions

    # Type mapping for indirect offsets.
    # empty -> long/Q, because this uses native byteorder.

    array set otmap {
        .b c    .B c
        .s s    .S S
        .l i    .L I
	{} Q
    }





    # Export the API
    namespace export 2tree treedump treegen
}


# Optimisations:

# reorder tests according to expected or observed frequency this
# conflicts with reduction in strength optimisations.







|


<

|




>
>
>

>
>









|
<
|
<
<
<
<
<
|
>
>
>
>
|
<
<







21
22
23
24
25
26
27
28
29
30

31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52

53





54
55
56
57
58
59


60
61
62
63
64
65
66
# Wiki page last updated: ???
#
#####

# ### ### ### ######### ######### #########
## Requirements

package require Tcl 8.6
package require fileutil::magic::rt ; # Runtime core, for Access to the typemap
package require struct::list        ; # Our data structures.


package provide fileutil::magic::cgen 1.2

# ### ### ### ######### ######### #########
## Implementation

namespace eval ::fileutil::magic {
    namespace export *
}
namespace eval ::fileutil::magic::cgen {
    namespace ensemble create
    namespace export *
    # Import the runtime typemap into our scope.
    variable ::fileutil::magic::rt::typemap

    # The tree most operations use for their work.
    variable tree {}

    # Generator data structure.
    variable regions

    # Export the API

    namespace export 2tree treedump treegen






   # Assumption : the parser folds the test inversion operator into equality and
   # inequality operators .
    variable offsetskey {
	type o rel ind ir it ioi ioo iir io compinvert mod mand
    }


}


# Optimisations:

# reorder tests according to expected or observed frequency this
# conflicts with reduction in strength optimisations.
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111












112
113
114
115
116
117
118
119
120
121
122
123
124
125

126
127

128
129


130
131
132


133

134
135
136

137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
# - String tests at same level over overlapping ranges can be
#   written as sub-string comparisons over the maximum range
#   this saves re-reading the same string from file.
#
# - common prefix strings will have to be guarded against, by
#   sorting string values, then sorting the tests in reverse length order.


proc ::fileutil::magic::cgen::path {tree} {
    # Annotates the tree. In each node we store the path from the root
    # to this node, as list of nodes, with the current node the last
    # element. The root node is never stored in the path.

    $tree set root path {}
    foreach child [$tree children root] {
   	$tree walk $child -type dfs node {
   	    set path [$tree get [$tree parent $node] path]
   	    lappend path [$tree index $node]
   	    $tree set $node path $path
   	}
    }
    return
}

proc ::fileutil::magic::cgen::tree_el {tree parent file line type qual comp offset val message args} {













    # Recursively creates and annotates a node for the specified
    # tests, and its sub-tests (args).

    set     node [$tree insert $parent end]
    set     path [$tree get    $parent path]
    lappend path [$tree index  $node]
    $tree set $node path $path

    # generate a proc call type for the type, Numeric or String
    variable ::fileutil::magic::rt::typemap

    switch -glob -- $type {
   	*byte* -

   	*short* -
   	*long* -

   	*date* {
   	    set otype N


   	    set type [lindex $typemap($type) 1]
   	}
   	*string {


   	    set otype S

   	}
   	default {
   	    puts stderr "Unknown type: '$type'"

   	}
    }

    # Stores the type determined above, and the arguments into
    # attributes of the new node.

    foreach key {line type qual comp offset val message file otype} {
   	if {[catch {
   	    $tree set $node $key [set $key]
   	} result]} {
	    upvar ::errorInfo eo
   	    puts "Tree: $eo - $file $line $type"
   	}
    }

    # now add children
    foreach el $args {
	eval [linsert $el 0 tree_el $tree $node $file]
   	# 8.5 # tree_el $tree $node $file {*}$el
    }
    return $node
}

proc ::fileutil::magic::cgen::2tree {script} {

    # Converts a recognizer which is in a simple script form into a
    # tree.

    variable tree
    set tree [::struct::tree]

    $tree set root path ""
    $tree set root otype Root
    $tree set root type root
    $tree set root message "unknown"

    # generate a test for each match
    set file "unknown"
    foreach el $script {
   	#puts "EL: $el"
   	if {[lindex $el 0] eq "file"} {
   	    set file [lindex $el 1]
   	} else {
	    set node [eval [linsert $el 0 tree_el $tree root $file]]
	    # 8.5 # set more [tree_el $tree root $file {*}$el]
   	    append result $node
   	}
    }
    optNum $tree root
    #optStr $tree root
    puts stderr "Script contains [llength [$tree children root]] discriminators"
    path $tree

    # Decoding the offsets, determination if we have to handle







<
















|
>
>
>
>
>
>
>
>
>
>
>
>




<
<
<
<






>


>

|
>
>
|

|
>
>
|
>
|


>






<
<
<
<
<
<
<
<
<

|
<
|

|


|

<
<
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
<
<







88
89
90
91
92
93
94

95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127




128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158









159
160

161
162
163
164
165
166
167


168
















169


170
171
172
173
174
175
176
# - String tests at same level over overlapping ranges can be
#   written as sub-string comparisons over the maximum range
#   this saves re-reading the same string from file.
#
# - common prefix strings will have to be guarded against, by
#   sorting string values, then sorting the tests in reverse length order.


proc ::fileutil::magic::cgen::path {tree} {
    # Annotates the tree. In each node we store the path from the root
    # to this node, as list of nodes, with the current node the last
    # element. The root node is never stored in the path.

    $tree set root path {}
    foreach child [$tree children root] {
   	$tree walk $child -type dfs node {
   	    set path [$tree get [$tree parent $node] path]
   	    lappend path [$tree index $node]
   	    $tree set $node path $path
   	}
    }
    return
}

proc ::fileutil::magic::cgen::tree_el {tree node} {
    set parent [$tree parent $node]
    if {[$tree keyexists $parent path]} {
	set path [$tree get $parent path]
    } else {
	set path {} 
    }
    lappend path [$tree index $node]
    $tree set $node path $path

    foreach name {type} {
	set $name [$tree get $node $name]
    }

    # Recursively creates and annotates a node for the specified
    # tests, and its sub-tests (args).






    # generate a proc call type for the type, Numeric or String
    variable ::fileutil::magic::rt::typemap

    switch -glob -- $type {
   	*byte* -
	*double* -
   	*short* -
   	*long* -
	*quad* -
   	*date* {
   	    $tree set $node otype N
   	}
   	clear - default - search - regex - *string* {
   	    $tree set $node otype S
   	}
	name {
	    $tree set $node otype A
	}
	use {
	    $tree set $node otype U
	}
   	default {
   	    puts stderr "Unknown type: '$type'"
	    $tree set $node otype Unknown
   	}
    }

    # Stores the type determined above, and the arguments into
    # attributes of the new node.










    # now add children
    foreach el [$tree children $node] {

	tree_el $tree $el
    }
    return
}

proc ::fileutil::magic::cgen::2tree {tree} {



    foreach child [$tree children root] {
















	tree_el $tree $child


    }
    optNum $tree root
    #optStr $tree root
    puts stderr "Script contains [llength [$tree children root]] discriminators"
    path $tree

    # Decoding the offsets, determination if we have to handle
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269

270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289




290
291
292
293
294
295
296
297

    foreach el $strings {
   	#if {[$tree get $el otype] eq "String"} {puts "[$tree getall $el] - [string length [$tree get $el val]]"}
	if {[$tree get $el comp] eq "x"} {
	    continue
	}

	set offset [$tree get $el offset]
	set len    [string length [$tree get $el val]]
	lappend regions([list $offset $len]) $el
    }
}

proc ::fileutil::magic::cgen::isNum {tree node} {
    return [expr {"N" eq [$tree get $node otype]}]
}

proc ::fileutil::magic::cgen::switchNSort {tree n1 n2} {
    return [expr {[$tree get $n1 val] - [$tree get $n1 val]}]
}

proc ::fileutil::magic::cgen::optNum {tree node} {

    array set offsets {}

    # traverse each numeric element of this node's children,
    # categorising them

    set kids [$tree children $node]
    foreach child $kids {
	optNum $tree $child
    }

    set numerics [$tree children $node filter ::fileutil::magic::cgen::isNum]
    #puts stderr "optNum: $node: $numerics"
    if {[llength $numerics] < 2} {
	return
    }

    foreach el $numerics {
	if {[$tree get $el comp] ne "=="} {
	    continue
	}




	lappend offsets([$tree get $el type],[$tree get $el offset],[$tree get $el qual]) $el
    }

    #puts "Offset: stderr [array get offsets]"
    foreach {match nodes} [array get offsets] {
	if {[llength $nodes] < 2} {
	    continue
	}







|

|












>

















|


>
>
>
>
|







234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288

    foreach el $strings {
   	#if {[$tree get $el otype] eq "String"} {puts "[$tree getall $el] - [string length [$tree get $el val]]"}
	if {[$tree get $el comp] eq "x"} {
	    continue
	}

	set o [$tree get $el o]
	set len    [string length [$tree get $el val]]
	lappend regions([list $o $len]) $el
    }
}

proc ::fileutil::magic::cgen::isNum {tree node} {
    return [expr {"N" eq [$tree get $node otype]}]
}

proc ::fileutil::magic::cgen::switchNSort {tree n1 n2} {
    return [expr {[$tree get $n1 val] - [$tree get $n1 val]}]
}

proc ::fileutil::magic::cgen::optNum {tree node} {
    variable offsetskey
    array set offsets {}

    # traverse each numeric element of this node's children,
    # categorising them

    set kids [$tree children $node]
    foreach child $kids {
	optNum $tree $child
    }

    set numerics [$tree children $node filter ::fileutil::magic::cgen::isNum]
    #puts stderr "optNum: $node: $numerics"
    if {[llength $numerics] < 2} {
	return
    }

    foreach el $numerics {
	if {[$tree get $el comp] ne {==}} {
	    continue
	}
	set key {}
	foreach name $offsetskey {
	    lappend key [$tree get $el $name]
	}
	lappend offsets([join $key ,]) $el
    }

    #puts "Offset: stderr [array get offsets]"
    foreach {match nodes} [array get offsets] {
	if {[llength $nodes] < 2} {
	    continue
	}
305
306
307
308
309
310
311
312
313
314
315
316
317
318

319
320
321
322
323
324
325
326



327
328
329
330
331
332
333
334
335
336
337
338



339

340

341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416

417
418
419
420
421
422
423
424
425
426
427
428
429
430
431

432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
		puts stderr "* clashes with <[$tree getall $matcher($nv)]>"
		puts stderr "*====================================="
	    } else {
		set matcher($nv) $n
	    }
	}

	foreach {type offset qual} [split $match ,] break
	set switch [$tree insert $node [$tree index [lindex $nodes 0]]]
	$tree set $switch otype   Switch
	$tree set $switch message $match
	$tree set $switch offset  $offset
	$tree set $switch type    $type
	$tree set $switch qual    $qual


	set nodes [lsort -command [list ::fileutil::magic::cgen::switchNSort $tree] $nodes]

	eval [linsert $nodes 0 $tree move $switch end]
	# 8.5 # $tree move $switch end {*}$nodes
	set     path [$tree get [$tree parent $switch] path]
	lappend path [$tree index $switch]
	$tree set $switch path $path



    }
}

proc ::fileutil::magic::cgen::Offsets {tree} {

    # Indicator if a node has to save field location information for
    # relative addressing. The 'kill' attribute is an accumulated
    # 'save' over the whole subtree. It will be used to determine when
    # level information was destroyed by subnodes and has to be
    # regenerated at the current level.

    $tree walk root -type dfs node {



	$tree set $node save 0

	$tree set $node kill 0

    }

    # We walk from the leafs up to the root, synthesizing the data
    # needed, as we go.
    $tree walk root -type dfs -order post node {
	if {$node eq "root"} continue
	DecodeOffset $tree $node [$tree get $node offset]

	# If the current node's parent is a switch, and the node has
	# to save, then the switch has to save. Because the current
	# node is not relevant during code generation anymore, the
	# switch is.

	if {[$tree get $node save]} {
	    # We save, therefore we kill.
	    $tree set $node kill 1
	    if {[$tree get [$tree parent $node] otype] eq "Switch"} {
		$tree set [$tree parent $node] save 1
	    }
	} else {
	    # We don't save i.e. kill, but we may inherit it from
	    # children which kill.

	    foreach c [$tree children $node] {
		if {[$tree get $c kill]} {
		    $tree set $node kill 1
		    break
		}
	    }
	}
    }
}

proc ::fileutil::magic::cgen::DecodeOffset {tree node offset} {
    if {[string match "(*)" $offset]} {
	# Indirection offset. (Decoding is non-trivial, therefore
	# packed into a proc).

	set ind 1 ; # Indirect location
	foreach {rel base itype idelta} [DecodeIndirectOffset $offset] break

    } elseif {[string match "&*" $offset]} {
	# Direct relative offset. (Decoding is trivial)

	set ind    0       ; # Direct location
	set rel    1       ; # Relative
	set base   [string range $offset 1 end] ; # Base Delta
	set itype  {}      ; # No data for indirect
	set idelta {}      ; # s.a.

    } else {
	set ind    0       ; # Direct location
	set rel    0       ; # Absolute
	set base   $offset ; # Here!
	set itype  {}      ; # No data for indirect
	set idelta {}      ; # s.a.
    }

    # Store the expanded data back into the tree.

    foreach v {ind rel base itype idelta} {
	$tree set $node $v [set $v]
    }

    # For nodes with adressing relative to last field above the latter
    # has to save this information.

    if {$rel} {
	$tree set [$tree parent $node] save 1
    }
    return
}

proc ::fileutil::magic::cgen::DecodeIndirectOffset {offset} {
    variable otmap ; # Offset typemap.


    # Offset parser.
    # Syntax:
    #   ( ?&? number ?.[bslBSL]? ?[+-]? ?number? )

    set n {(([0-9]+)|(0x[0-9A-Fa-f]+))}
    set o "\\((&?)(${n})((\\.\[bslBSL])?)(\[+-]?)(${n}?)\\)"
    #         |   | ||| ||               |       | |||
    #         1   2 345 67               8       9 012
    #         ^   ^     ^                ^       ^
    #         rel base  type             sign    index
    #
    #                            1   2    3 4 5 6    7 8    9   0 1 2
    set ok [regexp $o $offset -> rel base _ _ _ type _ sign idx _ _ _]

    if {!$ok} {

        return -code error "Bad offset \"$offset\""
    }

    # rel is in {"", &}, map to 0|1
    if {$rel eq ""} {set rel 0} else {set rel 1}

    # base is a number, enforce decimal. Not optional.
    set base [expr $base]

    # Type is in .b .s .l .B .S .L, and "". Map to a regular magic
    # type code.
    set type $otmap($type)

    # sign is in {+,-,""}. Map to -|"" (Becomes sign of index)
    if {$sign eq "+"} {set sign ""}

    # Index is optional number. Enforce decimal, empty is zero. Add in
    # the sign as well for a proper signed index.

    if {$idx eq ""} {set idx 0}
    set idx $sign[expr $idx]

    return [list $rel $base $type $idx]
}

proc ::fileutil::magic::cgen::treedump {tree} {
    set result ""
    $tree walk root -type dfs node {
	set path  [$tree get $node path]
	set depth [llength $path]

	append result [string repeat "  " $depth] [list $path] ": " [$tree get $node type]:

	if {[$tree keyexists $node offset]} {
	    append result " ,O|[$tree get $node offset]|"

	    set x {}
	    foreach v {ind rel base itype idelta} {lappend x [$tree get $node $v]}
	    append result "=<[join $x !]>"
	}
	if {[$tree keyexists $node qual]} {
	    set q [$tree get $node qual]
	    if {$q ne ""} {
		append result " ,q/$q/"
	    }







|


|
|
|
<
>








>
>
>












>
>
>
|
>
|
>





|
<









|
















<
<
<
<

<
<
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
|
|
>
|
<
<
|
<
<
<
<
<
<
<
<
<
|
|
>
|
<
|
<
<
|
<
|
|
<
<
|
|
<
|
|
<
<
|
<
<
|
|










|
|


|







296
297
298
299
300
301
302
303
304
305
306
307
308

309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345

346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371




372


373
































374
375
376
377
378


379









380
381
382
383

384


385

386
387


388
389

390
391


392


393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
		puts stderr "* clashes with <[$tree getall $matcher($nv)]>"
		puts stderr "*====================================="
	    } else {
		set matcher($nv) $n
	    }
	}

	foreach $offsetskey [split $match ,] break
	set switch [$tree insert $node [$tree index [lindex $nodes 0]]]
	$tree set $switch otype   Switch
	$tree set $switch desc $match
	foreach name $offsetskey {
	    $tree set $switch $name [set $name]

	}

	set nodes [lsort -command [list ::fileutil::magic::cgen::switchNSort $tree] $nodes]

	eval [linsert $nodes 0 $tree move $switch end]
	# 8.5 # $tree move $switch end {*}$nodes
	set     path [$tree get [$tree parent $switch] path]
	lappend path [$tree index $switch]
	$tree set $switch path $path

	set level [$tree get [$tree parent $switch] level]
	$tree set $switch level [expr {$level+1}]
    }
}

proc ::fileutil::magic::cgen::Offsets {tree} {

    # Indicator if a node has to save field location information for
    # relative addressing. The 'kill' attribute is an accumulated
    # 'save' over the whole subtree. It will be used to determine when
    # level information was destroyed by subnodes and has to be
    # regenerated at the current level.

    $tree walk root -type dfs node {
	$tree set $node kill 0
	if {[$tree get $node otype] ne {Root} &&
	    ([$tree get $node rel] || [$tree get $node ir])} {
	    $tree set $node save 1
	} else {
	    $tree set $node save 0
	}
    }

    # We walk from the leafs up to the root, synthesizing the data
    # needed, as we go.
    $tree walk root -type dfs -order post node {
	if {$node eq {root}} continue


	# If the current node's parent is a switch, and the node has
	# to save, then the switch has to save. Because the current
	# node is not relevant during code generation anymore, the
	# switch is.

	if {[$tree get $node save]} {
	    # We save, therefore we kill.
	    $tree set $node kill 1
	    if {[$tree get [$tree parent $node] otype] eq {Switch}} {
		$tree set [$tree parent $node] save 1
	    }
	} else {
	    # We don't save i.e. kill, but we may inherit it from
	    # children which kill.

	    foreach c [$tree children $node] {
		if {[$tree get $c kill]} {
		    $tree set $node kill 1
		    break
		}
	    }
	}
    }
}








# Useful when debugging
































proc ::fileutil::magic::cgen::stack {tree node} {
    set res {}
    set files [$tree get root files]
    while 1 {
	set s [dict create \


	    file [lindex $files [$tree get $node file]] \









	    linenum [$tree get $node linenum]]
	if {[$tree keyexists $node origin]} {
	    set origin [$tree get $node origin]
	    dict set s origin [dict create \

		name [$tree get $origin val] \


		file [lindex $files [$tree get $origin file]] \

		linenum [$tree get $origin linenum]]
	}


	set res [linsert $res 0 $s]
	set node [$tree parent $node]

	if {$node eq {root}} {
	    break


	}


    }
    return $res
}

proc ::fileutil::magic::cgen::treedump {tree} {
    set result ""
    $tree walk root -type dfs node {
	set path  [$tree get $node path]
	set depth [llength $path]

	append result [string repeat "  " $depth] [list $path] ": " [$tree get $node type]:

	if {[$tree keyexists $node o]} {
	    append result " ,O|[$tree get $node o]|"

	    set x {}
	    foreach v {ind rel base itype iop ioperand idelta} {lappend x [$tree get $node $v]}
	    append result "=<[join $x !]>"
	}
	if {[$tree keyexists $node qual]} {
	    set q [$tree get $node qual]
	    if {$q ne ""} {
		append result " ,q/$q/"
	    }
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521

522

523

524
525





526

527

528




529




530
531














532
533
534
535
536
537
538
539
540
541
542
543





544

545
546
547
548
549
550
551
552
553


554

555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586








587
588
589
590
591



592
593
594




595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612

613
614
615
616
617
618
619


620
621
622
623
624
625
626
627
628

629

630
631
632
633
634
635
636
637
638

639
640
641
642


643
644
645
646
647
648
649
650
651
652
653
654
655
656
657



658


659
660
661
662
663








664


665
666
667
668
669
670
671
	}

	if {[$tree keyexists $node otype]} {
	    append result " " [$tree get $node otype]/[$tree get $node save]
	}

	if {$depth == 1} {
	    set msg [$tree get $node message]
	    set n $node
	    while {($n != {}) && ($msg == "")} {
		set n [lindex [$tree children $n] 0]
		if {$n != {}} {
		    set msg [$tree get $n message]
		}
	    }
	    append result " " ( $msg )
	    if {[$tree keyexists $node file]} {
		append result " - " [$tree get $node file]
	    }
	}

	#append result " <" [$tree getall $node] >
	append result \n
    }
    return $result
}

proc ::fileutil::magic::cgen::treegen {tree node} {
    return "[treegen1 $tree $node]\nresult\n"
}

proc ::fileutil::magic::cgen::treegen1 {tree node} {
    variable ::fileutil::magic::rt::typemap

    set result ""
    foreach k {otype type offset comp val qual message save path} {
	if {[$tree keyexists $node $k]} {
	    set $k [$tree get $node $k]

	}

    }


    set level [llength $path]







    # Generate code for each node per its type.






    switch $otype {




	N -
	S {














	    if {$save} {
		# We have to save field data for relative adressing under this
		# leaf.
		if {$otype eq "N"} {
		    set type [list Nx $level $type]
		} elseif {$otype eq "S"} {
		    set type [list Sx $level]
		}
	    } else {
		# Regular fetching of information.
		if {$otype eq "N"} {
		    set type [list N $type]





		} elseif {$otype eq "S"} {

		    set type S
		}
	    }

	    set offset [GenerateOffset $tree $node]

	    if {$qual eq ""} {
		append result "if \{\[$type $offset $comp [list $val]\]\} \{"
	    } else {


		append result "if \{\[$type $offset $comp [list $val] $qual\]\} \{"

	    }

	    if {[$tree isleaf $node]} {
		if {$message ne ""} {
		    append result "emit [list $message]"
		} else {
		    append result "emit [$tree get $node path]"
		}
	    } else {
		# If we saved data the child branches may destroy
		# level information. We regenerate it if needed.

		if {$message ne ""} {
		    append result "emit [list $message]\n"
		}

		set killed 0
		foreach child [$tree children $node] {
		    if {$save && $killed && [$tree get $child rel]} {
			# This location already does not regenerate if
			# the killing subnode was last. We also do not
			# need to regenerate if the current subnode
			# does not use relative adressing.
			append result "L $level;"
			set killed 0
		    }
		    append result [treegen1 $tree $child]
		    set killed [expr {$killed || [$tree get $child kill]}]
		}
		#append result "\nreturn \$result"
	    }









	    append result "\}\n"
	}
	Root {
	    foreach child [$tree children $node] {
		append result [treegen1 $tree $child]



	    }
	}
	Switch {




	    set offset [GenerateOffset $tree $node]

	    if {$save} {
		set fetch "Nvx $level"
	    } else {
		set fetch Nv
	    }

	    append fetch " " $type " " $offset
	    if {$qual ne ""} {
		append fetch " " $qual
	    }
	    append result "switch -- \[$fetch\] "

	    set scan [lindex $typemap($type) 1]

	    set ckilled 0
	    foreach child [$tree children $node] {

		binary scan [binary format $scan [$tree get $child val]] $scan val
		append result "$val \{"

		if {$save && $ckilled} {
		    # This location already does not regenerate if
		    # the killing subnode was last. We also do not
		    # need to regenerate if the current subnode


		    # does not use relative adressing.
		    append result "L $level;"
		    set ckilled 0
		}

		if {[$tree isleaf $child]} {
		    append result "emit [list [$tree get $child message]]"
		} else {
		    set killed 0

		    append result "emit [list [$tree get $child message]]\n"

		    foreach grandchild [$tree children $child] {
			if {$save && $killed && [$tree get $grandchild rel]} {
			    # This location already does not regenerate if
			    # the killing subnode was last. We also do not
			    # need to regenerate if the current subnode
			    # does not use relative adressing.
			    append result "L $level;"
			    set killed 0
			}

			append result [treegen1 $tree $grandchild]
			set killed [expr {$killed || [$tree get $grandchild kill]}]
		    }
		}



		set ckilled [expr {$ckilled || [$tree get $child kill]}]
		append result "\} "
	    }
	    append result "\n"
	}
    }
    return $result
}

proc ::fileutil::magic::cgen::GenerateOffset {tree node} {
    # Examples:
    # direct absolute:     45      -> 45
    # direct relative:    &45      -> [R 45]
    # indirect absolute:  (45.s+1) -> [I 45 s 1]



    # indirect relative: (&45.s+1) -> [I [R 45] s 1]



    foreach v {ind rel base itype idelta} {
	set $v [$tree get $node $v]
    }









    if {$rel} {set base "\[R $base\]"}


    if {$ind} {set base "\[I $base $itype $idelta\]"}
    return $base
}

# ### ### ### ######### ######### #########
## Ready for use.
# EOF







|




|















<
<
<
<


|
<
|
|
>
|
>
|
>

|
>
>
>
>
>
|
>
|
>
|
>
>
>
>
|
>
>
>
>


>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
|
|
<
|
<
<
<
|
|
<

>
>
>
>
>
|
>
|
<
<
|
<
|
|
|
|
>
>
|
>


|
<
|
|
<
<
<
<
<
|
<
|

<
<

<
<
<
<
<
<
<
<
|
<




>
>
>
>
>
>
>
>
|



|
>
>
>



>
>
>
>
|

|
|




<
<
|
<
|





>
|
|
|
|
<
|
|
>
>
|
|
<
|
|
|
|

<
>
|
>

<
<
<
<
<
|
<
|
>
|
|
|
|
>
>
|
|
|

|









|
>
>
>
|
>
>

|



>
>
>
>
>
>
>
>
|
>
>
|
|





424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451




452
453
454

455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501

502



503
504

505
506
507
508
509
510
511
512
513


514

515
516
517
518
519
520
521
522
523
524
525

526
527





528

529
530


531








532

533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567


568

569
570
571
572
573
574
575
576
577
578
579

580
581
582
583
584
585

586
587
588
589
590

591
592
593
594





595

596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
	}

	if {[$tree keyexists $node otype]} {
	    append result " " [$tree get $node otype]/[$tree get $node save]
	}

	if {$depth == 1} {
	    set msg [$tree get $node desc]
	    set n $node
	    while {($n != {}) && ($msg == "")} {
		set n [lindex [$tree children $n] 0]
		if {$n != {}} {
		    set msg [$tree get $n desc]
		}
	    }
	    append result " " ( $msg )
	    if {[$tree keyexists $node file]} {
		append result " - " [$tree get $node file]
	    }
	}

	#append result " <" [$tree getall $node] >
	append result \n
    }
    return $result
}

proc ::fileutil::magic::cgen::treegen {tree node} {




    variable ::fileutil::magic::rt::typemap

    set result {} 

    set named [$tree get root named]
    set otype [$tree get $node otype]
    set level [$tree get $node level]

    set indent \n[string repeat \t [expr {$level > 0 ? $level-1 : 0}]]

    # Generate code for each node per its type.

    switch $otype {
	A {
	    set file [$tree get $node file]
	    set val [$tree get $node val]
	    if {[dict exists named $file$val]} {
		return -code error [list {name already exists} $file $val]
	    }
	    set aresult {}
	    foreach child [$tree children $node] {
		lappend aresult [treegen $tree $child]
	    }
	    dict set named $file $val [join $aresult \n]
	    $tree set root named $named
	    return
	}
	U {
	    set file [$tree get $node file]
	    set val [$tree get $node val]
	    append result "U [list $file] [list $val]\n" 
	}
	N -
	S {
	    set names {type mod mand testinvert compinvert comp val desc kill save path}
	    foreach name $names {
		set $name [$tree get $node $name]
	    }

	    set o [GenerateOffset $tree $node]

	    if {$val eq {}} {
		# If the value is the empty string, armor it.  Otherwise, it's
		# already been armored.
		set val [list $val]
	    }

	    if {$otype eq {N}} {
		if {$kill} {
		    # We have to save field data for relative adressing under this
		    # leaf.

		    set type [list Nx $type]



		} else {
		    # Regular fetching of information.

		    set type [list N $type]
		}
		# $type and $o are expanded via substitution 
		append result "${indent}if \{\[$type $o [list $testinvert] [
		    list $compinvert] [list $mod] [list $mand] [
		    list $comp] $val\]\} \{>\n"
	    } elseif {$otype eq {S}} {
		switch $comp {
		    == {set comp eq}


		    != {set comp ne}

		}
		if {$kill} {
		    set type [list Sx $type]
		} else {
		    set type [list S $type]
		}
		append result "${indent}if \{\[$type $o [list $testinvert] [
		    list $mod] [list $mand] [list $comp] $val\]\} \{>\n"
	    }

	    if {[$tree isleaf $node] && $desc ne {}} {

		append result "${indent}emit [list $desc]"
	    } else {





		if {$desc ne {}} {

		    append result "${indent}emit [list $desc]\n"
		}


		foreach child [$tree children $node] {








		    append result [treegen $tree $child]

		}
		#append result "\nreturn \$result"
	    }

	    if {[$tree keyexists $node ext_mime]} {
		append result "${indent}mime [$tree get $node ext_mime]\n"
	    }

	    if {[$tree keyexists $node ext_ext]} {
		append result "${indent}ext [$tree get $node ext_ext]\n"
	    }

	    append result "\n<\}\n"
	}
	Root {
	    foreach child [$tree children $node] {
		lappend result [treegen $tree $child]
		if {[lindex $result end] eq {}} {
		    set result [lreplace $result[set result {}] end end]
		}
	    }
	}
	Switch {
	    set names {o type compinvert mod mand kill save}
	    foreach name $names {
		set $name [$tree get $node $name]
	    }
	    set o [GenerateOffset $tree $node]

	    if {$kill} {
		set fetch Nvx
	    } else {
		set fetch Nv
	    }



	    append fetch " $type $o [list $compinvert] [list $mod] [list $mand]"

	    append result "${indent}switch -- \[$fetch\] "

	    set scan [lindex $typemap($type) 1]

	    set ckilled 0
	    foreach child [$tree children $node] {
		# See ::fileutil::magic::rt::rtscan
		if {$scan eq {me}} {
		    set scan I
		}


		# get value in binary form, then back to numeric
		# this avoids problems with sign, as both values are
		# [binary scan]-converted identically
		binary scan [binary format $scan [$tree get $child val]] $scan val

		append result "$val \{>;"


		set desc [$tree get $child desc]
		if {[$tree isleaf $child] && $desc ne {}} {
		    append result "emit [list [$tree get $child desc]]"
		} else {

		    if {$desc ne {}} {
			append result "emit [list [$tree get $child desc]]\n"
		    }
		    foreach grandchild [$tree children $child] {





			append result [treegen $tree $grandchild]

		    }
		}
		if {[$tree keyexists $child ext_mime]} {
		    append result "${indent}mime [$tree get $child ext_mime]\n"
		}

		if {[$tree keyexists $child ext_ext]} {
		    append result "${indent}ext [$tree get $child ext_ext]\n"
		}

		append result ";<\} "
	    }
	    append result "\n<\n"
	}
    }
    return $result
}

proc ::fileutil::magic::cgen::GenerateOffset {tree node} {
    # Examples:
    # direct absolute:     45      -> 45
    # direct relative:    &45      -> [R 45]
    # indirect absolute:  (45.s+1) -> [I 45 s + 0 1]
    # indirect absolute (indirect offset):  (45.s+(1)) -> [I 45 s + 1 1]
    # relative indirect absolute:  &(45.s+1) -> [R [I 45 s + 0 1]]
    # relative indirect absolute (indirect offset):  &(45.s+(1)) -> [R [I 45 s + 1 1]]
    # indirect relative: (&45.s+1) -> [I [R 45] s op 0 1]
    # relative indirect relative: &(&45.s+1) -> [R [I [R 45] s + 0 1]]
    # relative indirect relative: &(&45.s+(1)) -> [R [I [R 45] s + 1 1]]

    foreach v {o rel ind ir it ioi iir ioo io} {
	set $v [$tree get $node $v]
    }

    #foreach v {ind rel base itype iop ioperand iindir idelta} {
    #    set $v [$tree get $node $v]
    #}

    if {$ind} {
	if {$ir} {set o "\[R $o]"}
	set o "\[I $o [list $it] [list $ioi] [list $ioo] [list $iir] [list $io]\]"
    }
    if {$rel} {
	set o "\[R $o\]"
    }
    
    return $o
}

# ### ### ### ######### ######### #########
## Ready for use.
# EOF

Changes to modules/fumagic/filetypes.man.

22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48

The core part of the recognizer was generated from a "magic(5)" file
containing the checks to perform to recognize files, and associated
file-types.

[para]

[emph Beware!] This recognizer is large, about 276 Kilobyte of
generated Tcl code.

[list_begin definitions]

[call [cmd ::fileutil::magic::filetype] [arg filename]]

This command is similar to the command [cmd fileutil::fileType].

[para]

The output of the command for the specified file is a string
describing the type of the file.

[para]

This list will be empty if the type of the file is not recognized.

[list_end]








|










|
|







22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48

The core part of the recognizer was generated from a "magic(5)" file
containing the checks to perform to recognize files, and associated
file-types.

[para]

[emph Beware!] This recognizer is large, about 752 Kilobyte of
generated Tcl code.

[list_begin definitions]

[call [cmd ::fileutil::magic::filetype] [arg filename]]

This command is similar to the command [cmd fileutil::fileType].

[para]

Returns a list containing a list of descriptions, mimetype and a list file
extensions.  Returns an empty string if the file content is not recognized.

[para]

This list will be empty if the type of the file is not recognized.

[list_end]

Changes to modules/fumagic/filetypes.tcl.

more than 10,000 changes

Changes to modules/fumagic/filetypes.test.

62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104

105
106
107




108
109
110
111
112
113
114
115
116
117
118

119







120
121
122
123
124
125
126
127
128
129
130
131
132








133
134


135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181

182













183
184
185
} {0 {}}

test fumagic.filetype-1.5 {test elf executable} {
    set f [makeElfFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeElfFile
    list $res $msg
} {0 {ELF 32-bit LSB AT&T WE32100 - invalid byte order, relocatable, \(\) \(SYSV\)}}

test fumagic.filetype-1.6 {test simple text} {
    set f [makeTextFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeTextFile
    list $res $msg
} {0 {}}

test fumagic.filetype-1.7 {test script file} {
    set f [makeScriptFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeScriptFile
    list $res $msg
} {0 {a /bin/tclsh script text executable}}

test fumagic.filetype-1.8 {test html text} {
    set f [makeHtmlFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeHtmlFile
    list $res $msg
} {0 {HTML document text}}

# 1.9/.10 possibly broken output.
test fumagic.filetype-1.9 {test xml text} {
    set f [makeXmlFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeXmlFile
    list $res $msg
} {0 {XML document text \"	XML XML %.3s document text broken XML document text}}

test fumagic.filetype-1.10 {test xml with dtd text} {
    set f [makeXmlDTDFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeXmlDTDFile
    list $res $msg

} {0 {XML document text \"	XML XML %.3s document text broken XML document text}}

test fumagic.filetype-1.11 {test PGP message} {




    set f [makePGPFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removePGPFile
    list $res $msg
} {0 {PGP armored data message}}

test fumagic.filetype-1.12 {test binary graphic jpeg} {
    set f [makeJpegFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeJpegFile
    list $res $msg

} {0 {JPEG image data , JFIF standard  1. %02d , thumbnail 2x 2}}








test fumagic.filetype-1.13 {test binary graphic gif} {
    set f [makeGifFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeGifFile
    list $res $msg
} {0 {GIF image data , version 89a,}}

test fumagic.filetype-1.14 {test binary graphic png} {
    set f [makePngFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removePngFile
    list $res $msg








} {0 {PNG image data, CORRUPTED, PNG image data, CORRUPTED}}



test fumagic.filetype-1.15 {test binary graphic tiff} {
    set f [makeTiffFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeTiffFile
    list $res $msg
} {0 {TIFF image data, big-endian}}

test fumagic.filetype-1.16 {test binary pdf} {
    set f [makePdfFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removePdfFile
    list $res $msg
} {0 {PDF document , version 1 .2}}

test fumagic.filetype-1.17 {test text ps} {
    set f [makePSFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removePSFile
    list $res $msg
} {0 {PostScript document text}}

test fumagic.filetype-1.18 {test text eps} {
    set f [makeEPSFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeEPSFile
    list $res $msg
} {0 {PostScript document text}}

test fumagic.filetype-1.19 {test binary gravity_wave_data_frame} {
    set f [makeIgwdFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeIgwdFile
    list $res $msg
} {0 {}}

test fumagic.filetype-1.20 {test binary compressed bzip} {
    set f [makeBzipFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeBzipFile
    list $res $msg
} {0 {bzip2 compressed data bzip compressed data , version: h , compression block size 900k}}

test fumagic.filetype-1.21 {test binary compressed gzip} {
    set f [makeGzipFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeGzipFile
    list $res $msg

} {0 {gzip compressed data , unknown method , ASCII , from MS-DOS}}














testsuiteCleanup
return







|













|






|

<





|






>
|

|
>
>
>
>




|

|




>
|
>
>
>
>
>
>
>






|






>
>
>
>
>
>
>
>
|

>
>





|






|






|






|













|






>
|
>
>
>
>
>
>
>
>
>
>
>
>
>



62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91

92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
} {0 {}}

test fumagic.filetype-1.5 {test elf executable} {
    set f [makeElfFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeElfFile
    list $res $msg
} {0 {{ELF 32-bit LSB {*unknown arch 0x0*} (SYSV)} {} {}}}

test fumagic.filetype-1.6 {test simple text} {
    set f [makeTextFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeTextFile
    list $res $msg
} {0 {}}

test fumagic.filetype-1.7 {test script file} {
    set f [makeScriptFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeScriptFile
    list $res $msg
} {0 {{a {/bin/tclsh script text executable}} {} {}}}

test fumagic.filetype-1.8 {test html text} {
    set f [makeHtmlFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeHtmlFile
    list $res $msg
} {0 {{{HTML document text}} text/html {}}}


test fumagic.filetype-1.9 {test xml text} {
    set f [makeXmlFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeXmlFile
    list $res $msg
} {0 {{XML {1.0 document text}} text/xml {}}}

test fumagic.filetype-1.10 {test xml with dtd text} {
    set f [makeXmlDTDFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeXmlDTDFile
    list $res $msg
} {0 {{XML {1.0 document text}} text/xml {}}}


test fumagic.filetype-1.11 {
	test PGP message. Their are multiple matches, and the longest match should
	carry greater weight, and thus be the one returnes.  If the match is "PGP
	armored data message", this isn't happening.
} {
    set f [makePGPFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removePGPFile
    list $res $msg
} {0 {{{PGP message}} application/pgp {}}}

test fumagic.filetype-1.12.0 {test binary graphic jpeg} {
    set f [makeJpegFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeJpegFile
    list $res $msg
} {0 {{{JPEG image data, JFIF standard 1.02, resolution (DPI), density 300x316, segment length 16}} image/jpeg {jpeg jpg jpe jfif}}}

#the result should actually be 128x112, but current magic files indicate "byte" instead of "ubyte"
test fumagic.filetype-1.12.1 {test binary graphic jpeg} {
    set f [makeJpeg2File]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeJpeg2File
    list $res $msg
} {0 {{{JPEG image data, JFIF standard 1.02, resolution (DPI), density 300x316, segment length 16, thumbnail -128x112}} image/jpeg {jpeg jpg jpe jfif}}}

test fumagic.filetype-1.13 {test binary graphic gif} {
    set f [makeGifFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeGifFile
    list $res $msg
} {0 {{{GIF image data, version 89a,} {43 x} 64} image/gif {}}}

test fumagic.filetype-1.14 {test binary graphic png} {
    set f [makePngFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removePngFile
    list $res $msg
} {0 {{{PNG image data, 0 x} 0, 0-bit} image/png {}}}

#{To do} {implement a "wild guess" mode}
#test fumagic.filetype-1.14.1 {test binary graphic png} {
#    set f [makePngFile]
#    set res [catch {fileutil::magic::filetype $f} msg]
#    removePngFile
#    list $res $msg
#} {0 {PNG image data, CORRUPTED, PNG image data, CORRUPTED}}

# the file doesn't really provide a direntries value, so not sure what the
# result means here, but any number is good enough for this test
test fumagic.filetype-1.15 {test binary graphic tiff} {
    set f [makeTiffFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeTiffFile
    list $res $msg
} {0 {{{TIFF image data, big-endian, direntries=19789}} image/tiff {}}}

test fumagic.filetype-1.16 {test binary pdf} {
    set f [makePdfFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removePdfFile
    list $res $msg
} {0 {{{PDF document, version 1.2}} application/pdf {}}}

test fumagic.filetype-1.17 {test text ps} {
    set f [makePSFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removePSFile
    list $res $msg
} {0 {{{PostScript document text}} application/postscript {}}}

test fumagic.filetype-1.18 {test text eps} {
    set f [makeEPSFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeEPSFile
    list $res $msg
} {0 {{{PostScript document text}} application/postscript {}}}

test fumagic.filetype-1.19 {test binary gravity_wave_data_frame} {
    set f [makeIgwdFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeIgwdFile
    list $res $msg
} {0 {}}

test fumagic.filetype-1.20 {test binary compressed bzip} {
    set f [makeBzipFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeBzipFile
    list $res $msg
} {0 {{{bzip2 compressed data, block size = 900k}} application/x-bzip2 {}}}

test fumagic.filetype-1.21 {test binary compressed gzip} {
    set f [makeGzipFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeGzipFile
    list $res $msg
} {0 {{{gzip compressed data, reserved method, ASCII, last modified: 1}} application/x-gzip {}}}

test fumagic.filetype-1.22 {test pstring} {
    set f [makewsdlFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removewsdlFile
    list $res $msg
} {0 {{{PHP WSDL cache,} {version 0x03, created 7, uri: "hello", source: "some source", target_ns: "and a target"}} {} {}}}
 
test fumagic.filetype-1.23 {regular expressions} {
    set f [makeCSourceFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeCSourceFile
    list $res $msg
} {0 {{{C source text}} text/x-c {}}}

testsuiteCleanup
return

Changes to modules/fumagic/fumagic.testsupport.

25
26
27
28
29
30
31
32
33
34
35
36
37
38

39

40
41
42
43

44
45
46
47
48
49

50
51
52
53
54
55
56
abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz
}

# ### ### ### ######### ######### #########
## Creates a series of commands for the creation of small data files
## for various file formats.

foreach {name data} [list \
	Empty  {} \
	Bin    "\u0000" \
	Elf    [cat "\x7F" "ELF" "\x01\x01\x01\x00\x00"] \
	Bzip   "BZh91AY&SY\x01\x01\x01\x00\x00" \
	Gzip   "\x1f\x8b\x01\x01\x01\x00\x00" \
	Jpeg   [cat "\xFF\xD8\xFF\xE0\x00\x10JFIF" "\x00\x01\x02\x01\x01\x2c"] \

	Gif    "GIF89a\x2b\x00\x40\x00\xf7\xff\x00" \

	Png    "\x89PNG\x00\x01\x02\x01\x01\x2c" \
	Tiff   "MM\x00\*\x00\x01\x02\x01\x01\x2c" \
	Pdf    "%PDF-1.2 \x00\x01\x02\x01\x01\x2c" \
	Igwd   "IGWD\x00\x01\x02\x01\x01\x2c"

	] {
    proc make${name}File   {} [list makeBinaryFile $data $name]
    proc remove${name}File {} [list removeFile           $name]
}

foreach {name data} [list \

	PS     "%!PS-ADOBO-123 EPSF-1.4" \
	EPS    "%!PS-ADOBO-123 EPSF-1.4" \
	Text   "simple text" \
	Script "#!/bin/tclsh" \
	Html   "<html></html>" \
	Xml    $xmlData \
	XmlDTD $xmlDataWithDTD \







|





|
>

>
|


|
>





|
>







25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz
}

# ### ### ### ######### ######### #########
## Creates a series of commands for the creation of small data files
## for various file formats.

foreach {name data} [dict create \
	Empty  {} \
	Bin    "\u0000" \
	Elf    [cat "\x7F" "ELF" "\x01\x01\x01\x00\x00"] \
	Bzip   "BZh91AY&SY\x01\x01\x01\x00\x00" \
	Gzip   "\x1f\x8b\x01\x01\x01\x00\x00" \
	Jpeg   [cat "\xFF\xD8\xFF\xE0\x00\x10JFIF" "\x00\x01\x02\x01\x01\x2c\x01\x3c"] \
	Jpeg2   [cat "\xFF\xD8\xFF\xE0\x00\x10JFIF" "\x00\x01\x02\x01\x01\x2c\x01\x3c\x80\x70"] \
	Gif    "GIF89a\x2b\x00\x40\x00\xf7\xff\x00" \
	Png    "\x89PNG\x0D\x0A\x1A\x0A" \
	PngMalformed "\x89PNG\x00\x01\x02\x01\x01\x2c" \
	Tiff   "MM\x00\*\x00\x01\x02\x01\x01\x2c" \
	Pdf    "%PDF-1.2 \x00\x01\x02\x01\x01\x2c" \
	Igwd   "IGWD\x00\x01\x02\x01\x01\x2c" \
	wsdl "wsdl\x03 \x07\x00\x00\x00\x05\x00\x00\x00hello\x0b\x00\x00\x00some source\x0c\x00\x00\x00and a targetxxxmore text" 
	] {
    proc make${name}File   {} [list makeBinaryFile $data $name]
    proc remove${name}File {} [list removeFile           $name]
}

foreach {name data} [dict create \
	CSource "#include <stdio.h>\nint main(int argc, char *argv[]) {int a;}" \
	PS     "%!PS-ADOBO-123 EPSF-1.4" \
	EPS    "%!PS-ADOBO-123 EPSF-1.4" \
	Text   "simple text" \
	Script "#!/bin/tclsh" \
	Html   "<html></html>" \
	Xml    $xmlData \
	XmlDTD $xmlDataWithDTD \

Changes to modules/fumagic/pkgIndex.tcl.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
if {![package vsatisfies [package provide Tcl] 8.4]} {return}

# Recognizers
package ifneeded fileutil::magic::filetype 1.0.2 [list source [file join $dir filetypes.tcl]]
package ifneeded fileutil::magic::mimetype 1.0.2 [list source [file join $dir mimetypes.tcl]]

# Runtime
package ifneeded fileutil::magic::rt 1.0 [list source [file join $dir rtcore.tcl]]

# Compiler packages
package ifneeded fileutil::magic::cgen   1.0 [list source [file join $dir cgen.tcl]]
package ifneeded fileutil::magic::cfront 1.0 [list source [file join $dir cfront.tcl]]



|


|



|


|
|



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
if {![package vsatisfies [package provide Tcl] 8.6]} {return}

# Recognizers
package ifneeded fileutil::magic::filetype 1.1.2 [list source [file join $dir filetypes.tcl]]
package ifneeded fileutil::magic::mimetype 1.0.2 [list source [file join $dir mimetypes.tcl]]

# Runtime
package ifneeded fileutil::magic::rt 1.2 [list source [file join $dir rtcore.tcl]]

# Compiler packages
package ifneeded fileutil::magic::cgen   1.2 [list source [file join $dir cgen.tcl]]
package ifneeded fileutil::magic::cfront 1.2 [list source [file join $dir cfront.tcl]]



Changes to modules/fumagic/rtcore.man.

24
25
26
27
28
29
30








31
32
33
34
35
36
37
[package fileutil::magic::filetypes], and the two engine compiler
packages [package fileutil::magic::cgen] and
[package fileutil::magic::cfront].

[section COMMANDS]

[list_begin definitions]









[call [cmd ::fileutil::magic::rt::open] [arg filename]]

This command initializes the runtime and prepares the file
[arg filename] for use by the system.

This command has to be invoked first, before any other command of this







>
>
>
>
>
>
>
>







24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
[package fileutil::magic::filetypes], and the two engine compiler
packages [package fileutil::magic::cgen] and
[package fileutil::magic::cfront].

[section COMMANDS]

[list_begin definitions]

[call [cmd ::fileutil::magic::rt::>]] 

Shorthand for [cmd incr level].

[call [cmd ::fileutil::magic::rt::<]] 

Shorthand for [cmd incr level -1].

[call [cmd ::fileutil::magic::rt::open] [arg filename]]

This command initializes the runtime and prepares the file
[arg filename] for use by the system.

This command has to be invoked first, before any other command of this
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186

[para]

The special comparison operator [const x] signals that no comparison
should be done, or, in other words, that the fetched value will always
match [arg val].

[call [cmd ::fileutil::magic::rt::Nvx] [arg atlevel] [arg type] [arg offset] [opt [arg qual]]]

This command behaves like [cmd ::fileutil::magic::rt::Nv], except that
it additionally remembers the location in the file after the fetch in
the calling context, for the level [arg atlevel], for later use by
[cmd ::fileutil::magic::rt::R].

[call [cmd ::fileutil::magic::rt::Nx] [arg atlevel] [arg type] [arg offset] [arg comp] [arg val] [opt [arg qual]]]

This command behaves like [cmd ::fileutil::magic::rt::N], except that
it additionally remembers the location in the file after the fetch in
the calling context, for the level [arg atlevel], for later use by
[cmd ::fileutil::magic::rt::R].

[call [cmd ::fileutil::magic::rt::S] [arg offset] [arg comp] [arg val] [opt [arg qual]]]

This command behaves like [cmd ::fileutil::magic::rt::N], except that
it fetches and compares strings, not numeric data. The fetched value
is also stored in the internal string buffer instead of the numeric
buffer.

[call [cmd ::fileutil::magic::rt::Sx] [arg atlevel] [arg offset] [arg comp] [arg val] [opt [arg qual]]]

This command behaves like [cmd ::fileutil::magic::rt::S], except that
it additionally remembers the location in the file after the fetch in
the calling context, for the level [arg atlevel], for later use by
[cmd ::fileutil::magic::rt::R].

[call [cmd ::fileutil::magic::rt::L] [arg newlevel]]

This command sets the current level in the calling context to
[arg newlevel]. The command returns the empty string as its result.








|



|


|



|









|



|







155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194

[para]

The special comparison operator [const x] signals that no comparison
should be done, or, in other words, that the fetched value will always
match [arg val].

[call [cmd ::fileutil::magic::rt::Nvx] [arg type] [arg offset] [opt [arg qual]]]

This command behaves like [cmd ::fileutil::magic::rt::Nv], except that
it additionally remembers the location in the file after the fetch in
the calling context, for the current level, for later use by
[cmd ::fileutil::magic::rt::R].

[call [cmd ::fileutil::magic::rt::Nx] [arg type] [arg offset] [arg comp] [arg val] [opt [arg qual]]]

This command behaves like [cmd ::fileutil::magic::rt::N], except that
it additionally remembers the location in the file after the fetch in
the calling context, for the current, for later use by
[cmd ::fileutil::magic::rt::R].

[call [cmd ::fileutil::magic::rt::S] [arg offset] [arg comp] [arg val] [opt [arg qual]]]

This command behaves like [cmd ::fileutil::magic::rt::N], except that
it fetches and compares strings, not numeric data. The fetched value
is also stored in the internal string buffer instead of the numeric
buffer.

[call [cmd ::fileutil::magic::rt::Sx] [arg offset] [arg comp] [arg val] [opt [arg qual]]]

This command behaves like [cmd ::fileutil::magic::rt::S], except that
it additionally remembers the location in the file after the fetch in
the calling context, for the current level, for later use by
[cmd ::fileutil::magic::rt::R].

[call [cmd ::fileutil::magic::rt::L] [arg newlevel]]

This command sets the current level in the calling context to
[arg newlevel]. The command returns the empty string as its result.

202
203
204
205
206
207
208




209
210
211
212
213
214
215

[para]

In other words, the command computes an absolute location in the file
based on the relative [arg offset] and returns it as its result. The
base the offset is added to is the last location remembered for the
level in the calling context.





[list_end]

[section {NUMERIC TYPES}]

[list_begin definitions]
[def [const byte]]    8-bit integer







>
>
>
>







210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227

[para]

In other words, the command computes an absolute location in the file
based on the relative [arg offset] and returns it as its result. The
base the offset is added to is the last location remembered for the
level in the calling context.

[call [cmd ::fileutil::magic::rt::U] [arg fileindex] [arg name]]

Use a named test script at the current level.

[list_end]

[section {NUMERIC TYPES}]

[list_begin definitions]
[def [const byte]]    8-bit integer

Changes to modules/fumagic/rtcore.tcl.

1
2
3
4

5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22

23


24
25





26







27







28

29












30
31
32
33
34
35
36
37
38
39
40
41
42
43
44








45
46

47

48
49
50
51
52
53
54
55
56






57
58
59
60
61
62
63
64
65
66
67






























68
69
70
71
72
73
74
75
76
77
78
79
80
81

82
83
84
85
86
87
88

89
90
91
92
93
94
95
96
97
98
99
100

101
102

103


104





105
106
107
108
109
110
111

112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150

151
152
153
154
155
156
157


158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209

210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251












































252

253
254



255
256
257


258
259
260
261







262


263
264
265

266
267
268
269
270
271




272

273
274
275
276







277

278
279




280





281


282
283
284
285


286
287
288







289

290

291










292





293
294
295
296


297







298

299
300
301






302


303









304














305







306



307

308



309
310


311
312
313
314
315
316


317




318

319
320














321


322
323
324




325
326
327

328
329
330

331







332
333



334



335

336













337
338
339





340
341



342

343



344




345





346

347


348


349


350

351





352

353


354
355
356
357
358
359
360
361
362
363
364
365
366
367




368


369


370



371
372
373
374
375
376
377








378
379
380
381
382
383
384
385
386


387
388

389
390
391


392
393
394
395
396
397
398
399
400
401










402
403
404
405




406

407

408
409
410
411
412
413
414
415
416
417
418
419
420

421
422
423
424
425
426
427
# rtcore.tcl --
#
#	Runtime core for file type recognition engines written in pure Tcl.
#

# Copyright (c) 2004-2005 Colin McCormack <[email protected]>
# Copyright (c) 2005      Andreas Kupries <[email protected]>
#
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
# 
# RCS: @(#) $Id: rtcore.tcl,v 1.5 2005/09/28 04:51:19 andreas_kupries Exp $

#####
#
# "mime type recognition in pure tcl"
# http://wiki.tcl.tk/12526
#
# Tcl code harvested on:  10 Feb 2005, 04:06 GMT
# Wiki page last updated: ???
#
#####


# TODO - Required Functionality:



# implement full offset language





# implement pstring (pascal string, blerk)







# implement regex form (blerk!)







# implement string qualifiers














# ### ### ### ######### ######### #########
## Requirements

package require Tcl 8.4

# ### ### ### ######### ######### #########
## Implementation

namespace eval ::fileutil::magic::rt {
    # Configuration flag. (De)activate debugging output.
    # This is done during initialization.
    # Changes at runtime have no effect.

    variable debug 0









    # Runtime state.


    variable fd     {}     ; # Channel to file under scrutiny

    variable strbuf {}     ; # Input cache [*].
    variable cache         ; # Cache of fetched and decoded numeric
    array set cache {}	   ; # values.
    variable result {}     ; # Accumulated recognition result.
    variable string {}     ; # Last recognized string | For substitution
    variable numeric -9999 ; # Last recognized number | into the message

    variable  last         ; # Behind last fetch locations,
    array set last {}      ; # per nesting level.







    # [*] The vast majority of magic strings are in the first 4k of the file.

    # Export APIs (full public, recognizer public)
    namespace export open close file_start result
    namespace export emit offset Nv N S Nvx Nx Sx L R I
}

# ### ### ### ######### ######### #########
## Public API, general use.































# open the file to be scanned
proc ::fileutil::magic::rt::open {file} {
    variable result {}
    variable string {}
    variable numeric -9999
    variable strbuf
    variable fd
    variable cache

    set fd [::open $file]
    ::fconfigure $fd -translation binary

    # fill the string cache
    set strbuf [::read $fd 4096]


    # clear the fetch cache
    catch {unset cache}
    array set cache {}

    return $fd
}


proc ::fileutil::magic::rt::close {} {
    variable fd
    ::close $fd
    return
}

# mark the start of a magic file in debugging
proc ::fileutil::magic::rt::file_start {name} {
    ::fileutil::magic::rt::Debug {puts stderr "File: $name"}
}


# return the emitted result
proc ::fileutil::magic::rt::result {{msg ""}} {

    variable result


    if {$msg ne ""} {emit $msg}





    return -code return $result
}

proc ::fileutil::magic::rt::resultv {{msg ""}} {
    variable result
    if {$msg ne ""} {emit $msg}
    return $result

}

# ### ### ### ######### ######### #########
## Public API, for use by a recognizer.

# emit a message
proc ::fileutil::magic::rt::emit {msg} {
    variable string
    variable numeric
    variable result

    set map [list \
	    \\b "" \
		%c [format %c $numeric] \
	    %s  $string \
	    %ld $numeric \
	    %d  $numeric \
	    ]

    lappend result [::string map $map $msg]
    return
}

# handle complex offsets - TODO
proc ::fileutil::magic::rt::offset {where} {
    ::fileutil::magic::rt::Debug {puts stderr "OFFSET: $where"}
    return 0
}

proc ::fileutil::magic::rt::Nv {type offset {qual ""}} {
    variable typemap
    variable numeric

    # unpack the type characteristics
    foreach {size scan} $typemap($type) break

    # fetch the numeric field from the file
    set numeric [Fetch $offset $size $scan]


    if {$qual ne ""} {
	# there's a mask to be applied
	set numeric [expr $numeric $qual]
    }

    ::fileutil::magic::rt::Debug {puts stderr "NV $type $offset $qual: $numeric"}
    return $numeric


}

# Numeric - get bytes of $type at $offset and $compare to $val
# qual might be a mask
proc ::fileutil::magic::rt::N {type offset comp val {qual ""}} {
    variable typemap
    variable numeric

    # unpack the type characteristics
    foreach {size scan} $typemap($type) break

    # fetch the numeric field
    set numeric [Fetch $offset $size $scan]

    # Would moving this before the fetch an optimisation ? The
    # tradeoff is that we give up filling the cache, and it is unclear
    # how often that value would be used. -- Profile!
    if {$comp eq "x"} {
	# anything matches - don't care
	return 1
    }

    # get value in binary form, then back to numeric
    # this avoids problems with sign, as both values are
    # [binary scan]-converted identically
    binary scan [binary format $scan $val] $scan val

    if {$qual ne ""} {
	# there's a mask to be applied
	set numeric [expr $numeric $qual]
    }

    # perform comparison
    set c [expr $val $comp $numeric]

    ::fileutil::magic::rt::Debug {puts stderr "numeric $type: $val $comp $numeric / $qual - $c"}
    return $c
}

proc ::fileutil::magic::rt::S {offset comp val {qual ""}} {
    variable fd
    variable string

    # convert any backslashes
    set val [subst -nocommands -novariables $val]

    if {$comp eq "x"} {
	# match anything - don't care, just get the value
	set string ""

	# Query: Can we use GetString here ?
	# Or at least the strbuf cache ?


	# move to the offset
	::seek $fd $offset
	while {
	    ([::string length $string] < 100) &&
	    [::string is print [set c [::read $fd 1]]]
	} {
	    if {[::string is space $c]} {
		break
	    }
	    append string $c
	}

	return 1
    }

    # get the string and compare it
    set string [GetString $offset [::string length $val]]
    set cmp    [::string compare $val $string]
    set c      [expr $cmp $comp 0]

    ::fileutil::magic::rt::Debug {
	puts "String '$val' $comp '$string' - $c"
	if {$c} {
	    puts "offset $offset - $string"
	}
    }
    return $c
}

proc ::fileutil::magic::rt::Nvx {atlevel type offset {qual ""}} {
    variable typemap
    variable numeric
    variable last

    upvar 1 level l
    set  l $atlevel

    # unpack the type characteristics
    foreach {size scan} $typemap($type) break

    # fetch the numeric field from the file












































    set numeric [Fetch $offset $size $scan]


    set last($atlevel) [expr {$offset + $size}]




    if {$qual ne ""} {
	# there's a mask to be applied


	set numeric [expr $numeric $qual]
    }

    ::fileutil::magic::rt::Debug {puts stderr "NV $type $offset $qual: $numeric"}







    return $numeric


}

# Numeric - get bytes of $type at $offset and $compare to $val

# qual might be a mask
proc ::fileutil::magic::rt::Nx {atlevel type offset comp val {qual ""}} {
    variable typemap
    variable numeric
    variable last





    upvar 1 level l

    set  l $atlevel

    # unpack the type characteristics
    foreach {size scan} $typemap($type) break









    set last($atlevel) [expr {$offset + $size}]





    # fetch the numeric field





    set numeric [Fetch $offset $size $scan]



    if {$comp eq "x"} {
	# anything matches - don't care
	return 1


    }

    # get value in binary form, then back to numeric







    # this avoids problems with sign, as both values are

    # [binary scan]-converted identically

    binary scan [binary format $scan $val] $scan val
















    if {$qual ne ""} {
	# there's a mask to be applied
	set numeric [expr $numeric $qual]
    }










    # perform comparison

    set c [expr $val $comp $numeric]

    ::fileutil::magic::rt::Debug {puts stderr "numeric $type: $val $comp $numeric / $qual - $c"}






    return $c


}
























proc ::fileutil::magic::rt::Sx {atlevel offset comp val {qual ""}} {







    variable fd



    variable string

    variable last




    upvar 1 level l


    set  l $atlevel

    # convert any backslashes
    set val [subst -nocommands -novariables $val]

    if {$comp eq "x"} {


	# match anything - don't care, just get the value




	set string ""


	# Query: Can we use GetString here ?














	# Or at least the strbuf cache ?



	# move to the offset
	::seek $fd $offset




	while {
	    ([::string length $string] < 100) &&
	    [::string is print [set c [::read $fd 1]]]

	} {
	    if {[::string is space $c]} {
		break

	    }







	    append string $c
	}







	set last($atlevel) [expr {$offset + [string length $string]}]















	return 1
    }






    set len [::string length $val]
    set last($atlevel) [expr {$offset + $len}]





    # get the string and compare it



    set string [GetString $offset $len]




    set cmp    [::string compare $val $string]





    set c      [expr $cmp $comp 0]




    ::fileutil::magic::rt::Debug {


	puts "String '$val' $comp '$string' - $c"


	if {$c} {

	    puts "offset $offset - $string"





	}

    }


    return $c
}
proc ::fileutil::magic::rt::L {newlevel} {
    # Regenerate level information in the calling context.
    upvar 1 level l ; set l $newlevel
    return
}

proc ::fileutil::magic::rt::I {base type delta} {
    # Handling of base locations specified indirectly through the
    # contents of the inspected file.

    variable typemap
    foreach {size scan} $typemap($type) break




    return [expr {[Fetch $base $size $scan] + $delta}]


}






proc ::fileutil::magic::rt::R {base} {
    # Handling of base locations specified relative to the end of the
    # last field one level above.

    variable last   ; # Remembered locations.
    upvar 1 level l ; # The level to get data from.
    return [expr {$last($l) + $base}]








}

# ### ### ### ######### ######### #########
## Internal. Retrieval of the data used in comparisons.

# fetch and cache a numeric value from the file
proc ::fileutil::magic::rt::Fetch {where what scan} {
    variable cache
    variable numeric


    variable fd


    if {![info exists cache($where,$what,$scan)]} {
	::seek $fd $where
	binary scan [::read $fd $what] $scan numeric


	set cache($where,$what,$scan) $numeric

	# Optimization: If we got 4 bytes, i.e. long we implicitly
	# know the short and byte data as well. Should put them into
	# the cache. -- Profile: How often does such an overlap truly
	# happen ?

    } else {
	set numeric $cache($where,$what,$scan)
    }










    return $numeric
}

proc ::fileutil::magic::rt::GetString {offset len} {




    # We have the first 1k of the file cached

    variable string

    variable strbuf
    variable fd

    set end [expr {$offset + $len - 1}]
    if {$end < 4096} {
	# in the string cache, copy the requested part.
	set string [::string range $strbuf $offset $end]
    } else {
	# an unusual one, move to the offset and read directly from
	# the file.
	::seek $fd $offset
	set string [::read $fd $len]
    }

    return $string
}

# ### ### ### ######### ######### #########
## Internal, debugging.

if {!$::fileutil::magic::rt::debug} {




>


















>
|
>
>
|
<
>
>
>
>
>
|
>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
|
>
|
>
>
>
>
>
>
>
>
>
>
>
>



|











>
>
>
>
>
>
>
>


>

>




<
<
|


>
>
>
>
>
>





|





>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>



|
<






|


>







>












>

|
>

>
>
|
>
>
>
>
>
|


|
|
<
|
>





|
|
|
|
|
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
|
|
<
<
|
<
<

>
|
|
<
<
|
<
|
>
>
|
|
<
<
<
<
<
|
|
<
|
<
<
|
<
<
<
<
<
<
<
|
<
<
<
<

<
<
<
<
|
<
<
|
<
<
|
|
<
<
<

|
<
|
<
<
|
|
<
<
>
|
|
<
<
<
<
<
<
<
|
<
<
|
<
<
|
<
|
<
<
|
<
<
|
|
|
<
|


|

|
|
<
<
<





>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>

<
>
>
>

<
|
>
>
|


<
>
>
>
>
>
>
>
|
>
>
|
|
|
>
|
|
<
<
<
|
>
>
>
>
|
>
|
|
|
|
>
>
>
>
>
>
>
|
>
|
|
>
>
>
>
|
>
>
>
>
>
|
>
>
|
|
|
|
>
>

|
|
>
>
>
>
>
>
>
|
>
|
>
|
>
>
>
>
>
>
>
>
>
>
|
>
>
>
>
>
|
<
|
|
>
>
|
>
>
>
>
>
>
>
|
>
|
|
<
>
>
>
>
>
>
|
>
>
|
>
>
>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
|
>
>
>
|
>
|
>
>
>
|
|
>
>
|
|
<
<

|
>
>
|
>
>
>
>
|
>
|
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>
>
|
|
|
>
>
>
>
|
|
|
>
|
|
<
>
|
>
>
>
>
>
>
>
|
|
>
>
>
|
>
>
>
|
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
|
|

>
>
>
>
>
|
|
>
>
>

>
|
>
>
>
|
>
>
>
>
|
>
>
>
>
>
|
>

>
>

>
>
|
>
>
|
>
|
>
>
>
>
>
|
>
|
>
>
|







|


<

|
>
>
>
>
|
>
>
|
>
>
|
>
>
>
|





|
>
>
>
>
>
>
>
>








|
>
>


>


|
>
>
|







|

>
>
>
>
>
>
>
>
>
>



|
>
>
>
>
|
>
|
>













>







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28

29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96


97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150

151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198

199
200
201
202
203
204
205
206
207
208
209
210
211



















212
213
214


215


216
217
218
219


220

221
222
223
224
225





226
227

228


229







230




231




232


233


234
235



236
237

238


239
240


241
242
243







244


245


246

247


248


249
250
251

252
253
254
255
256
257
258



259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310

311
312
313
314

315
316
317
318
319
320

321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336



337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409

410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425

426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483


484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526

527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625

626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
# rtcore.tcl --
#
#	Runtime core for file type recognition engines written in pure Tcl.
#
# Copyright (c) 2016      Poor Yorick     <[email protected]>
# Copyright (c) 2004-2005 Colin McCormack <[email protected]>
# Copyright (c) 2005      Andreas Kupries <[email protected]>
#
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
# 
# RCS: @(#) $Id: rtcore.tcl,v 1.5 2005/09/28 04:51:19 andreas_kupries Exp $

#####
#
# "mime type recognition in pure tcl"
# http://wiki.tcl.tk/12526
#
# Tcl code harvested on:  10 Feb 2005, 04:06 GMT
# Wiki page last updated: ???
#
#####

#TODO  {
#    {Required Functionality} {
#	{implement full offset language} {
#	    done
#

#	    by pooryorick
#
#	    time {2016 06}
#	}
#
#	{implement pstring (pascal string, blerk)} {
#	    done
#
#	    by pooryorick
#
#	    time {2016 06}
#}
#
#	{implement regex form (blerk!)} {
#	    done
#
#	    by pooryorick
#
#	    time {2016 06}
#	}
#
#	{implement string qualifiers} {
#	    done
#	    
#	    by pooryorick
#
#	    time {2016 06}
#	}
#
#	{Maybe distinguish between binary and text tests, like file(n)}
#	
#	{process and use strength directives}
#
#    }
#}

# ### ### ### ######### ######### #########
## Requirements

package require Tcl 8.6

# ### ### ### ######### ######### #########
## Implementation

namespace eval ::fileutil::magic::rt {
    # Configuration flag. (De)activate debugging output.
    # This is done during initialization.
    # Changes at runtime have no effect.

    variable debug 0

    # The maximum size of a substring to inspect from the file in question 
    variable maxstring 64

    # The maximum length of any %s substitution in a resulting description is
    variable maxpstring 64

    variable regexdefaultlen 4096

    # Runtime state.

    variable cursor 0      ; # The current offset
    variable fd     {}     ; # Channel to file under scrutiny
    variable found 0       ; # Whether the last test produced a match
    variable strbuf {}     ; # Input cache [*].
    variable cache         ; # Cache of fetched and decoded numeric
    array set cache {}	   ; # values.
    variable result {}     ; # Accumulated recognition result.


    variable extracted     ; # The value extracted for inspection
    variable  last         ; # Behind last fetch locations,
    array set last {}      ; # per nesting level.
    variable weight 0      ; # The weight of the current part. 
                           ; # Basically string length of the contributing of
			   ; # the potentially-matching part.

    variable weighttotal 0 ; # The aggregate weight of the matching components of
			   ; # the current test.

    # [*] The vast majority of magic strings are in the first 4k of the file.

    # Export APIs (full public, recognizer public)
    namespace export open close file_start result
    namespace export emit ext mime offset Nv N S Nvx Nx Sx L R I resultv U < >
}

# ### ### ### ######### ######### #########
## Public API, general use.

proc ::fileutil::magic::rt::> {} {
    upvar level level
    incr level
}

proc ::fileutil::magic::rt::< {} {
    upvar level level
    incr level -1
}

proc ::fileutil::magic::rt::classify {data} {
    set bin_rx {[\x00-\x08\x0b\x0e-\x1f]}
    if {[regexp $bin_rx $data] } {
        return binary
    } else {
        return text
    }
}

proc ::fileutil::magic::rt::mime value {
    upvar 1 mime mime
    set mime $value
}

proc ::fileutil::magic::rt::ext value {
    upvar 1 ext ext
    set ext $value
}


# open the file to be scanned
proc ::fileutil::magic::rt::open {file} {
    variable result {}
    variable extracted {} 

    variable strbuf
    variable fd
    variable cache

    set fd [::open $file]
    ::fconfigure $fd -translation binary
        
    # fill the string cache
    set strbuf [::read $fd 4096]
	set class [classify $strbuf]

    # clear the fetch cache
    catch {unset cache}
    array set cache {}

    return $fd
}


proc ::fileutil::magic::rt::close {} {
    variable fd
    ::close $fd
    return
}

# mark the start of a magic file in debugging
proc ::fileutil::magic::rt::file_start {name} {
    ::fileutil::magic::rt::Debug {puts stderr "File: $name"}
}


# return the emitted result
proc ::fileutil::magic::rt::result {{msg {}}} {
    variable found
    variable result
    variable weight
    variable weighttotal
    if {$msg ne {}} {emit $msg}
    set res [list $found $weighttotal $result]
    set found 0
    set weight 0
    set weighttotal 0
    set result {}
    return -code return $res 
}

proc ::fileutil::magic::rt::resultv {{msg {}}} {
    try result on return result {

	return $result
    }
}

# ### ### ### ######### ######### #########
## Public API, for use by a recognizer.

# emit a description 
proc ::fileutil::magic::rt::emit msg {
    variable found
    variable maxpstring
    variable extracted
    variable result



















    variable weight
    variable weighttotal
    set found 1


    incr weighttotal $weight



    #set map [list \
    #    \\b "" \
    #    %c [apply {extracted {


    #        if {[catch {format %c $extracted} result]} {

    #    	return {}
    #        }
    #        return $result

    #    }} $extracted] \





    #    %s  [string trim [string range $extracted 0 $maxpstring]] \
    #    %ld $extracted \

    #    %d  $extracted \


    #]







    #[::string map $map $msg]









    # {to do} {Is only taking up to the first newline really a good general rule?}


    regexp {\A[^\n\r]*} $extracted extracted2



    regsub -all {\s+} $extracted2 { } extracted2




    set arguments {}

    set count [expr {[string length $msg] - [string length [


	string map {% {}} $msg]]}]
    for {set i 0} {$i < $count} {incr i} {


	lappend arguments $extracted2
    }
    catch {set msg [format $msg {*}$arguments]}










    # Assumption: [regexp] leaves $msg untouched if it fails


    regexp {\A(\b|\\b)?(.*)$} $msg match b msg

    if {$b ne {} && [llength $result]} {


	lset result end [lindex $result end]$msg


    } else {
	lappend result $msg
    }

    return
}

proc ::fileutil::magic::rt::Nv {type offset compinvert mod mand} {
    variable typemap
    variable extracted
    variable weight




    # unpack the type characteristics
    foreach {size scan} $typemap($type) break

    # fetch the numeric field from the file
    set extracted [Fetch $offset $size $scan]

    if {$compinvert && $extracted ne {}} {
	set extracted [expr ~$extracted]
    }
    if {$mod ne {} && $extracted ne {}} {
	# there's a mask to be applied
	set extracted [expr $extracted $mod $mand]
    }

    ::fileutil::magic::rt::Debug {puts stderr "NV $type $offset $mod: $extracted"}
    set weight [string length $extracted]
    return $extracted
}

proc ::fileutil::magic::rt::use {named file name} {
    if [dict exists $named $file $name] {
	set script [dict get $named $file $name]
    } else {
	dict for {file val} $named {
	    if {[dict exists $val $name]} {
		set script [dict get $val $name]
		break
	    }
	}
    }
    if {![info exists script]} {
	return -code error [list {name not found} $key]
    }
    return $script
}

# Numeric - get bytes of $type at $offset and $compare to $val
# qual might be a mask
proc ::fileutil::magic::rt::N {
    type offset testinvert compinvert mod mand comp val} {
    variable typemap
    variable extracted
    variable weight

    # unpack the type characteristics
    foreach {size scan} $typemap($type) break

    # fetch the numeric field
    set extracted [Fetch $offset $size $scan]
    if {$extracted eq {}} {


	# Rules like the following, from the jpeg file, imply that
	# in the absence of an extracted value, a numerical value of 
	# 0 should be used


	# From jpeg:
	    ## Next, show thumbnail info, if it exists:
	    #>>18    byte        !0      \b, thumbnail %dx
	set extracted 0
    }


    # Would moving this before the fetch an optimisation ? The
    # tradeoff is that we give up filling the cache, and it is unclear
    # how often that value would be used. -- Profile!
    if {$comp eq {x}} {
	set weight 0
	# anything matches - don't care
	if {$testinvert} {
	    return 0
	} else {
	    return 1
	}
    }

    if {[string match $scan *me]} {
	set data [me4 $data]
	set scan I 



    }
    # get value in binary form, then back to numeric
    # this avoids problems with sign, as both values are
    # [binary scan]-converted identically (see [treegen1])
    binary scan [binary format $scan $val] $scan val

    if {$compinvert && $extracted ne {}} {
	set extracted [expr ~$extracted]
    }

    # perform comparison
    if {$mod ne {}} {
	# there's a mask to be applied
	set extracted [expr $extracted $mod $mand]
    }
    switch $comp {
	& {
	    set c [expr {($extracted & $val) == $val}]
	}
	^ {
	    set c [expr {($extracted & ~$val) == $extracted}]
	}
	== - != - < - > {
	    set c [expr $extracted $comp $val]
	}
	default {
	    #Should never reach this
	    return -code error [list {unknown comparison operator} $comp]
	}
    }
    # Do this last to minimize shimmering
    set weight [string length $extracted]

    ::fileutil::magic::rt::Debug {
	puts stderr "numeric $type: $val $t$comp $extracted / $mod - $c"
    }
    if {$testinvert} {
	set c [expr {!$c}]
	return $c 
    } else {
	return $c
    }
}

proc ::fileutil::magic::rt::S {type offset testinvert mod mand comp val} {
    variable cursor
    variable extracted
    variable fd
    variable maxstring
    variable regexdefaultlen
    variable weight

    # $compinvert is currently ignored for strings

    set weight [string length $val]

    switch $type {
	pstring {
	    set ptype B
	    set vincluded 0
	    # The last pstring type specifier wins 
	    foreach item $mod {
		if {$item eq {J}} {
		    set vincluded 1
		} else {
		    set ptype $item
		}
	    }
	    lassign [dict get {B {b 1} H {S 2} h {s 2} L {I 4} l {i 4}} $ptype] scan slength
	    set length [GetString $offset $slength]
	    set offset $cursor 
	    binary scan $length ${scan}u length
	    if {$vincluded} {

		set length [expr {$length - $slength}]
	    }
	    set extracted [GetString $offset $length]
	    set c [Smatch $val $comp $extracted $mod]
	}
	regex {
	    if {$mand eq {}} {
		set mand $regexdefaultlen 
	    }
	    set extracted [GetString $offset $mand]
	    if {[regexp $val $extracted match]} {
		set weight [string length $match]
	        set c 1
	    } else {
	        set c 0
	    }

	}
	search {
	    set limit $mand
	    set extracted [GetString $offset $limit]
	    if {[string first $val $extracted] >= 0} {
		set weight [string length $val]
		set c 1
	    } else {
		set c 0
	    }
	} default {
	    # get the string and compare it
	    switch $type bestring16 - lestring16 {
		set extracted [GetString $offset $maxstring]
		set extracted [string range $extracted 0 1]
		switch $type bestring16 {
		    set extracted [binary scan $extracted Su]
		} lestring16 {
		    set extracted [binary scan $extracted Su]
		}
		set extracted [format %c $extracted]
	    } default {
		# If $val is 0, give [emit] something to work with .
		if {$val eq  "\0"} {
		    set extracted [GetString $offset $maxstring]
		} else {
		    set extracted [GetString $offset [string length $val]]
		}
	    }
	    set c [Smatch $val $comp $extracted $mod]
	}
    }


    ::fileutil::magic::rt::Debug {
	puts "String '$val' $comp '$extracted' - $c"
	if {$c} {
	    puts "offset $offset - $extracted"
	}
    }
    if {$testinvert} {
	return [expr {!$c}]
    } else {
	return $c
    }
}

proc ::fileutil::magic::rt::Smatch {val op string mod} {
    variable weight
    if {$op eq {x}} {
	set weight 0
	return 1
    }

    if {![string length $string]} {
	# Nothing matches an empty $string.
	return 0
    }



    if {$op eq {>} && [string length $val] > [string length $string]} {
	return 1
    }

    # To preserve the semantics, the w operation must occur prior to the W
    # operation (Assuming the interpretation that w makes all whitespace
    # optional, relazing the requirements of W) .
    if {{w} in $mod} {
	regsub -all {\s} $string[set string {}] {} string
	regsub -all {\s} $val[set val {}] {} val
    }

    if {{W} in $mod} {
	set blanklen [::tcl::mathfunc::max 0 {*}[
	    lmap blanks [lrange [regexp -all -inline {(\s+)} $val] 1 end] {
	    expr {[$lindex blanks 1] - [$lindex blanks 0]}
	}]]
	if {![regexp "\s{$blanklen}" $string]} {
	    ::fileutil::magic::rt::Debug {
		puts "String '$val' $op '$string' - $c"
		if {$c} {
		    puts "offset $offset - $string"
		}
	    }
	    return 0
	}

	regsub -all {\s+} $string[set string {}] { } string
	regsub -all {\s+} $val[set val {}] { } val
    }


    if {{T} in $mod} {
	set string [string trim $string[set string {}]]
	set val [string tolower $val[set val {}]]
    }

    set string [string range $string  0 [string length $val]-1]

    # The remaining code may assume that $string and $val have the same length
    # .


    set opnum [dict get {< -1 == 0 eq 0 != 0 ne 0 > 1} $op]

    if {{c} in $mod || {C} in $mod} {
	if {{c} in $mod && {C} in $mod} {
	    set string [string tolower $string[set string {}]]
	    set val [string tolower $val[set val {}]]
	} elseif {{c} in $mod} {
	    foreach sc [split $string] vc [split $val] {
		if {[string is lower $sc]} {
		    set vc [string tolower $vc]
		}
		if {[::string compare $val $string] != $opnum} {
		    set res 0
		    break
		}
	    }
	} elseif {{C} in $mode} {
	    foreach vc [split $val] sc [split $string]  {
		if {[string is upper $vc]} {
		    set sc [string toupper $sc]
		}
		if {[::string compare $val $string] != $opnum} {
		    set res 0
		    break
		}
	    }
	}
    } else {
	set res [expr {[::string compare $string $val] == $opnum}]
    }
    if {$op in {!= ne}} {
	set res [expr {!$res}]
    }
    set weight [string length $val]
    return $res
}

proc ::fileutil::magic::rt::Nvx {type offset compinvert mod mand} {
    variable typemap
    variable extracted
    variable last
    variable weight

    upvar 1 level l
    # unpack the type characteristics
    foreach {size scan} $typemap($type) break
    set last($l) [expr {$offset + $size}]

    set extracted [Nv $type $offset $compinvert $mod $mand]

    ::fileutil::magic::rt::Debug {puts stderr "NVx $type $offset $extracted $mod $mand"}
    return $extracted
}

# Numeric - get bytes of $type at $offset and $compare to $val
# qual might be a mask
proc ::fileutil::magic::rt::Nx {
    type offset testinvert compinvert mod mand comp val} {

    variable cursor
    variable typemap
    variable extracted
    variable last
    variable weight

    upvar 1 level l

    set res [N $type $offset $testinvert $compinvert $mod $mand $comp $val]

    ::fileutil::magic::rt::Debug {
	puts stderr "Nx numeric $type: $val $comp $extracted / $qual - $c"
    }
    set last($l) $cursor
    return $res
}

proc ::fileutil::magic::rt::Sx {
    type offset testinvert mod mand comp val} {
    variable cursor
    variable extracted
    variable fd
    variable last
    variable weight

    upvar 1 level l

    set res [S $type $offset $testinvert $mod $mand $comp $val]
    set last($l) $cursor
    return $res
}
proc ::fileutil::magic::rt::L {newlevel} {
    # Regenerate level information in the calling context.
    upvar 1 level l ; set l $newlevel
    return
}

proc ::fileutil::magic::rt::I {offset it ioi ioo iir io} {
    # Handling of base locations specified indirectly through the
    # contents of the inspected file.

    variable typemap
    foreach {size scan} $typemap($it) break
    if {$iir} {
	set io [Fetch [expr $offset + $io] $size $scan]
    }
    set data [Fetch [expr $offset $ioo $io] $size $scan]

    if {$ioi} {
	set data [expr {~$data}]
    }
    if {$ioo ne {}} {
	set data [expr $data $ioo $io]
    }
    return $data
}

proc ::fileutil::magic::rt::R base {
    # Handling of base locations specified relative to the end of the
    # last field one level above.

    variable last   ; # Remembered locations.
    upvar 1 level l ; # The level to get data from.
    return [expr {$last([expr {$l-1}]) + $base}]
}


proc ::fileutil::magic::rt::U {file name} {
    upvar level l
    upvar named named
    set script [use $named $file $name]
    tailcall ::try $script
}

# ### ### ### ######### ######### #########
## Internal. Retrieval of the data used in comparisons.

# fetch and cache a numeric value from the file
proc ::fileutil::magic::rt::Fetch {where what scan} {
    variable cache
    variable cursor
    variable extracted
    variable strbuf
    variable fd

    # {to do} id3 length
    if {![info exists cache($where,$what,$scan)]} {
	::seek $fd $where
	set data [::read $fd $what]
	incr cursor [string length $data]
	set extracted [rtscan $data $scan]
	set cache($where,$what,$scan) [list $extracted $cursor]

	# Optimization: If we got 4 bytes, i.e. long we implicitly
	# know the short and byte data as well. Should put them into
	# the cache. -- Profile: How often does such an overlap truly
	# happen ?

    } else {
	lassign $cache($where,$what,$scan) extracted cursor
    }
    return $extracted
}

proc ::fileutil::magic::rt::rtscan {data scan} {
    if {$scan eq {me}} {
	set data [me4 $data]
	set scan I 
    }
    set numeric {}
    binary scan $data $scan numeric
    return $numeric
}

proc ::fileutil::magic::rt::me4 data {
	binary scan $data a4 chars
	set data [binary format a4 [lindex $chars 1] [
	lindex $chars 0] [lindex $chars 3] [lindex $chars 2]]
}

proc ::fileutil::magic::rt::GetString {offset len} {
    variable cursor
    # We have the first 1k of the file cached
    variable strbuf
    variable fd

    set end [expr {$offset + $len - 1}]
    if {$end < 4096} {
	# in the string cache, copy the requested part.
	set string [::string range $strbuf $offset $end]
    } else {
	# an unusual one, move to the offset and read directly from
	# the file.
	::seek $fd $offset
	set string [::read $fd $len]
    }
    set cursor [expr {$offset + [string length $string]}]
    return $string
}

# ### ### ### ######### ######### #########
## Internal, debugging.

if {!$::fileutil::magic::rt::debug} {
455
456
457
458
459
460
461
462
463
464

465
466
467
468















469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488



489
490


491
492


493
494
495
496
497
498
499
500
501
    variable typemap
    global tcl_platform

    # Set the definitions for all types which have their endianess
    # explicitly specified n their name.

    array set typemap {
	byte    {1 c}  ubyte    {1 c}
	beshort {2 S}  ubeshort {2 S}
	leshort {2 s}  uleshort {2 s}

	belong  {4 I}  ubelong  {4 I}
	lelong  {4 i}  ulelong  {4 i}  
	bedate  {4 S}  ledate   {4 s}
	beldate {4 I}  leldate  {4 i}
















	long  {4 Q} ulong  {4 Q} date  {4 Q} ldate {4 Q}
	short {2 Y} ushort {2 Y}
    }

    # Now set the definitions for the types without explicit
    # endianess. They assume/use 'native' byteorder. We also put in
    # special forms for the compiler, so that it can use short names
    # for the native-endian types as well.

    # generate short form names
    foreach {n v} [array get typemap] {
	foreach {len scan} $v break
	#puts stderr "Adding $scan - [list $len $scan]"
	set typemap($scan) [list $len $scan]
    }

    # The special Q and Y short forms are incorrect, correct now to
    # use the proper native endianess.




    if {$tcl_platform(byteOrder) eq "littleEndian"} {
	array set typemap {Q {4 i} Y {2 s}}


    } else {
	array set typemap {Q {4 I} Y {2 S}}


    }
}

::fileutil::magic::rt::Init
# ### ### ### ######### ######### #########
## Ready for use.

package provide fileutil::magic::rt 1.0
# EOF







|
|
|
>
|
|


>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

|
|

















>
>
>

|
>
>

|
>
>







|

756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
    variable typemap
    global tcl_platform

    # Set the definitions for all types which have their endianess
    # explicitly specified n their name.

    array set typemap {
	byte    {1 c}
	beshort {2 S}
	leshort {2 s}
	bedouble {8 Q}
	belong  {4 I}
	lelong  {4 i}
	bedate  {4 S}  ledate   {4 s}
	beldate {4 I}  leldate  {4 i}
	bedouble {8 Q}
	beqdate {8 W}
	beqldate {8 W}
	bequad {8 W} 
	ledouble {8 q}
	leqdate {8 w}
	leqldate {8 w}
	lequad {8 w}
	lequad {8 w} 
	leqwdate {8 w}
	medate  {4 me}
	melong  {4 me}
	meldate  {4 me}
	lestring16 {2 s}
	bestring16 {2 S}

	long  {4 Q} date  {4 Q} ldate {4 Q}
	short {2 Y} quad {8 W} 
    }

    # Now set the definitions for the types without explicit
    # endianess. They assume/use 'native' byteorder. We also put in
    # special forms for the compiler, so that it can use short names
    # for the native-endian types as well.

    # generate short form names
    foreach {n v} [array get typemap] {
	foreach {len scan} $v break
	#puts stderr "Adding $scan - [list $len $scan]"
	set typemap($scan) [list $len $scan]
    }

    # The special Q and Y short forms are incorrect, correct now to
    # use the proper native endianess.

    # {to do} {Is ldate done correctly in the procedure?  What is its byte
    # order anyway?  Native?}

    if {$tcl_platform(byteOrder) eq "littleEndian"} {
	array set typemap {Q {4 i} Y {2 s}
	    short {2 s} long {4 i} quad {8 w}
	}
    } else {
	array set typemap {Q {4 I} Y {2 S}
	    short {2 S} long {4 I} quad {8 W}
	}
    }
}

::fileutil::magic::rt::Init
# ### ### ### ######### ######### #########
## Ready for use.

package provide fileutil::magic::rt 1.2
# EOF

Changes to modules/fumagic/tmc.

24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#
#	Same as (1), but does not write to stdout. Instead the part of
#	the 'tclfile' delineated by marker lines containing "BEGIN
#	GENERATED CODE" and "END GENERATED CODE" is replaced with the
#	generated code.

package require Tcl 8.4
lappend auto_path [file dirname [file normalize [info script]]] ; # This directory
lappend auto_path [file dirname [lindex $auto_path end]]        ; # and the one above
#puts *\t[join $auto_path \n*\t]
package require fileutil::magic::cfront

# ### ### ### ######### ######### #########
## Internal data and status

namespace eval ::tmc {







|
|







24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#
#	Same as (1), but does not write to stdout. Instead the part of
#	the 'tclfile' delineated by marker lines containing "BEGIN
#	GENERATED CODE" and "END GENERATED CODE" is replaced with the
#	generated code.

package require Tcl 8.4
set auto_path [linsert $auto_path 0 [file dirname [file normalize [info script]]]] ; # This directory
set auto_path [linsert $auto_path 0 [file dirname [lindex $auto_path end]]]]        ; # and the one above
#puts *\t[join $auto_path \n*\t]
package require fileutil::magic::cfront

# ### ### ### ######### ######### #########
## Internal data and status

namespace eval ::tmc {