Tcl Library Source Code

Check-in [c48961ab8e]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Numerous improvements and bug fixes. Updated analyzer script from file-5.34 magic files.
Timelines: family | ancestors | descendants | both | pooryorick
Files: files | file ages | folders
SHA3-256: c48961ab8e97217bfca48720e6e3946292d952d85e6cac9eebc6decf002b8a85
User & Date: pooryorick 2018-08-21 22:48:24
References
2018-08-22
06:33 Closed ticket [ff8428ae15]: fileutil::magic::mimetype does not recognize HTML5 files plus 6 other changes artifact: 8514dbdadb user: pooryorick
06:28 Closed ticket [4ff12e3abd]: fileutil::magic fails to identify mimetype plus 6 other changes artifact: 05cef5483c user: pooryorick
Context
2021-04-30
21:46
Numerous improvements and bug fixes. Updated analyzer script from file-5.34 magic files. check-in: 179fe8c361 user: pooryorick tags: trunk
2018-08-26
10:53
remove duplicated code in ::mime::buildmessageaux check-in: 0587b55b96 user: yorick tags: pooryorick
2018-08-21
22:48
Numerous improvements and bug fixes. Updated analyzer script from file-5.34 magic files. check-in: c48961ab8e user: pooryorick tags: pooryorick
2018-08-15
05:23
code cleanup check-in: 6b167d6d17 user: pooryorick tags: pooryorick
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to modules/fumagic/cfront.man.

27
28
29
30
31
32
33
34
35
36


37
38
39
40
41
42
43

[section COMMANDS]

[list_begin definitions]

[call [cmd ::fileutil::magic::cfront::compile] [arg path]...]

This command takes the paths of one or more files and directories and
compiles all the files, and the files in all the directories into a
single recognizer for all the file types specified in these files.



[para]

All the files have to be in the format specified by magic(5).

[para]








|
|
|
>
>







27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45

[section COMMANDS]

[list_begin definitions]

[call [cmd ::fileutil::magic::cfront::compile] [arg path]...]

This command takes the paths of one or more files and directories and compiles
all the files, and the files in all the directories into a single analyzer for
all the file types specified in these files.  It returns a list whose first
item is a list per-file dictionaries of analyzer scripts and whose second item
is a list of analyzer commands.

[para]

All the files have to be in the format specified by magic(5).

[para]

Changes to modules/fumagic/cfront.tcl.

47
48
49
50
51
52
53


54
55
56
57
58
59
60
61
62
63
64
65
66
67
68

69
70



71
72
73
74
75
76
77

78
79
80

81
82
83
84
85
86
87
88
89
90
91

92
93
94
95
96
97
98
99

100
101
102
103
104
105
106
    variable debug 0

    # Make backend functionality accessible
    namespace import ::fileutil::magic::cgen

    namespace export compile generate install



    variable floattestops {= < > !}
    variable inttestops {= < > & ^ ~ !}
    variable stringtestops { > < = !}
    variable offsetopts {& | ^ + - * / %}
    variable stringmodifiers {W w c C t b T}
    variable typemodifiers [dict create \
	indirect r \
	search $stringmodifiers \
	string $stringmodifiers \
	pstring [list {*}$stringmodifiers B H h L l J] \
	regex {c s l} \
    ]
    set numeric_modifier_allowed {regex search}
	
    variable types_numeric_short { 

	dC byte d1 byte C byte 1 byte ds short d2 short S short 2 short dI long
	dL long d4 long I long L long 4 long  d8 quad 8 quad dQ quad Q quad



    }

    variable types_numeric_re [join [list {*}[
	array names ::fileutil::magic::rt::typemap] {*}[
	dict keys $types_numeric_short]] |]

    variable types_string_short [dict create s string] 


    variable types_string {
	bestring clear default indirect lestring pstring regex search string

    }
    variable types_string_re [join [list {*}[
	dict keys $types_string_short] {*}$types_string] |]

    variable types_verbatim {name use}

    variable types_notimplemented {}
    variable types_notimplemented_re [join $types_notimplemented |]

    variable types_numeric_real {
	float double befloat bedouble lefloat ledouble

    }

    variable indir_typemap [dict create \
	b byte c byte e ledouble f ledouble g ledouble i leid3 h leshort \
	s leshort l lelong B byte C byte E bedouble F bedouble G bedouble \
	H beshort I beid3 L belong m ME S beshort]

}


proc ::fileutil::magic::cfront::advance {len args} {
    upvar node node tree tree
    if {[llength args]} {
	upvar [lindex $args 0] res
    }
    set res {}







>
>













|
|
>

|
>
>
>


|
<
|


>


|
>

|
|



|
<

|
|
>








>







47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79

80
81
82
83
84
85
86
87
88
89
90
91
92
93
94

95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
    variable debug 0

    # Make backend functionality accessible
    namespace import ::fileutil::magic::cgen

    namespace export compile generate install

    namespace upvar ::fileutil::magic::rt typemap typemap

    variable floattestops {= < > !}
    variable inttestops {= < > & ^ ~ !}
    variable stringtestops { > < = !}
    variable offsetopts {& | ^ + - * / %}
    variable stringmodifiers {W w c C t b T}
    variable typemodifiers [dict create \
	indirect r \
	search $stringmodifiers \
	string $stringmodifiers \
	pstring [list {*}$stringmodifiers B H h L l J] \
	regex {c s l} \
    ]
    set numeric_modifier_allowed {regex search}

    variable types_numeric_short
    foreach {shortname name} {
	dC byte d1 byte C byte 1 byte ds short d2 short S short 2 short dI long
	dL long d4 long I long L long 4 long d8 quad 8 quad dQ quad Q quad
    } {
	dict set types_numeric_short $shortname $name
	dict set types_numeric_short u$shortname u$name
    }

    variable types_numeric_all [list {*}[

	array names typemap] {*}[dict keys $types_numeric_short]]

    variable types_string_short [dict create s string] 
    variable types_string_short [dict create us ustring] 

    variable types_string {
	bestring clear indirect lestring lestring16 pstring regex search
	string ustring
    }
    variable types_string_all [list {*}[
	dict keys $types_string_short] {*}$types_string]

    variable types_verbatim {name use}

    variable types_notimplemented {der}


    variable types_numeric_real
    foreach name {float double befloat bedouble lefloat ledouble} {
	lappend types_numeric_real $name u$name
    }

    variable indir_typemap [dict create \
	b byte c byte e ledouble f ledouble g ledouble i leid3 h leshort \
	s leshort l lelong B byte C byte E bedouble F bedouble G bedouble \
	H beshort I beid3 L belong m ME S beshort]

}


proc ::fileutil::magic::cfront::advance {len args} {
    upvar node node tree tree
    if {[llength args]} {
	upvar [lindex $args 0] res
    }
    set res {}
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
	}
    }
    set line [$tree get $node line]
    $tree set $node cursor $cursor
    return $res
}

proc ::fileutil::magic::cfront::rewind len {
    upvar node node tree tree
    set cursor [$tree get $node cursor]
    incr cursor -$len
    $tree set $node cursor $cursor
}

proc ::fileutil::magic::cfront::parseerror args {
    upvar node node tree tree
    set cursor [$tree get $node cursor]
    set line [$tree get $node line]
    set files [$tree get root files]
    set file [lindex files [$tree get $node file]]
    return -code error -errorcode [list fumagic {parse error}] [
	list [lmap arg $args {string trim $arg}] \
	file $file \
	linenenum [$tree get $node linenum] \
	cursor $cursor \
	line [list \
	    [string range $line 0 ${cursor}-1] \
	    [string range $line $cursor end]]]
}

proc ::fileutil::magic::cfront::parsewarning args {
    upvar node node tree tree
    catch {parseerror {*}$args} res options
    puts stderr [dict get $options -errorinfo]
}


# parse an individual line
variable ::fileutil::magic::cfront::parsedkeys {
}
proc ::fileutil::magic::cfront::parseline {tree node} {
    variable parsedkeys
    set line [$tree get $node line]
    $tree set $node cursor 0 
    parseoffset $tree $node
    parsetype $tree $node
    parsetest $tree $node
    parsemsg $tree $node

    set record [$tree getall $node]
    foreach key $parsedkeys {
	if {![dict exists $record $key]} {
	    return -code error [list {missing key} $key]
	}
    }
    ::fileutil::magic::cfront::Debug {
   	puts [list parsed $record]
    }
}

proc ::fileutil::magic::cfront::parsefloat {tree node} {
    set line [$tree get $node line]
    set cursor [$tree get $node cursor]
    # If only [scan] had a @ conversion character like [binary scan]
    set line2 [string range $line $cursor end]
    if {[scan $line2 %e%n num count] < 0} {
	parseerror {invalid floating point number}
    }
    set cursor [expr {$cursor + $count}]
    $tree set $node cursor $cursor

    # These suffixes are not used in magic files
    #if {[regexp -start $cursor {\A([fFlL)} -> modifier]} {
    #    advance [string length $modifier]]
    #}
    return $num
}

proc ::fileutil::magic::cfront::parseint {tree node} {
    set line [$tree get $node line]
    set cursor [$tree get $node cursor]
    # If only [scan] had a @ conversion character like [binary scan]
    set line2 [string range $line $cursor end]
    if {[set scanres [scan $line2 %lli%n num n]] < 1} {
	parseerror {invalid number}
    }
    set cursor [expr {$cursor + $n}]
    $tree set $node cursor $cursor
    # Thse suffixes are not used in magic files
    #if {[regexp -start $cursor {\A([uU]?[lL]{1,2})} -> modifier]} {
    #    advance [string length $modifier]]
    #}
    return $num
}


proc ::fileutil::magic::cfront::parsetype {tree node} {
    variable types_numeric_re
    variable types_numeric_short
    variable types_string_re
    variable types_string_short
    variable types_notimplemented_re
    set line [$tree get $node line]
    set cursor [$tree get $node cursor]
    $tree set $node mod {}
    $tree set $node mand {}
    set num_or_string {
    }
    if {[regexp -start $cursor {\A\s*(\w+)} $line match type]} {
	advance [string length $match]
	switch -regexp -matchvar match $type \
	    ^(u?)($types_numeric_re)$ - ^(u?)($types_string_re)$ {

	    lassign $match -> sgn type
	    # {to do} {Current design doesn't use sign, right?  Is it
	    # really not needed?}
	    $tree set $node sgn [dict get {{} 1 u 0} $sgn]

	    if {[regexp ^($types_numeric_re)$ $type]} {
		if {[dict exists $types_numeric_short $type]} {
		    set type [dict get $types_numeric_short $type]
		}
		$tree set $node type $type
		parsetypenummod $tree $node
	    } else {
		if {[dict exists $types_string_short $type]} {
		    set type [dict get $types_string_short $type]
		}
		$tree set $node type $type
		# No modifying operator for strings
		parsetypemod $tree $node

		if {$type eq {search} && [$tree get $node mand] eq {}} {
		    parseerror {search has no number}
		}
	    }


	} \
	^(name|use)$ {
	    $tree set $node type [lindex $match end] 
	} \
	$types_notimplemented_re {
	    parseerror {type not implemented}
	} \
	default {
	    parseerror {unknown type}
	}
    } else {
	parseerror {no type}
    }
}

proc ::fileutil::magic::cfront::parsetypemod {tree node} {
    # For numeric types , $mod is a list of modifiers and $mand is either a
    # number or the empty strinng .
    variable typemodifiers
    variable numeric_modifier_allowed
    set type [$tree get $node type]
    if {[advance 1 char] ne {/}} {
	rewind 1
	return
    }
    set res [dict create] 
    while 1 {
	if {[advance 1 char] eq {/}} {
	    continue
	}
	if {[string is space $char]} {
	    break
	}
	if {[dict exists $typemodifiers $type] && $char in [dict get $typemodifiers $type]} {
	    dict set res $char {}
	} elseif {$type in $numeric_modifier_allowed} {
	    rewind 1
	    if {[catch {parseint $tree $node} mand]} {
		# Whatever it is, it isn't a number.  Let the next parsing step
		# handle it .
		break
	    } else {
		$tree set $node mand $mand  ; # numeric modifier
	    }
	} else {
	    parseerror {bad modifier}
	}
    }
    $tree set $node mod [dict keys $res]
}

proc ::fileutil::magic::cfront::parsetypenummod {tree node} {
    # For numeric types, $mod is an operator and $mand is a number
    set line [$tree get $node line]
    set cursor [$tree get $node cursor]
    if {[regexp -start $cursor {\A([-&|^+*/%])} \
	$line match mod]} {
	advance [string length $match]
	$tree set $node mod $mod
	# {to do} {parse floats?}
	$tree set $node mand [parseint $tree $node] ; # mod operand
    } else {
	$tree set $node mod {}
	$tree set $node mand {} 
    }
}


proc ::fileutil::magic::cfront::parsestringval {tree node} {
    variable floattestops
    variable inttestops
    variable stringtestops
    advance w1 char 
    set val {}
    set line [$tree get $node line]
    while 1 {
	# break on whitespace or empty string
	if {[string is space $char] || $char eq {}} break
	switch $char [dict create  \
	    \\ {
		advance 1 char
		if {[string is space $char]} {
		    append val \\$char
		} else {
		    # extra backslashes because of interaction with glob
		    switch -glob $char [dict create \
			{\\} {
			    append val {\\}
			} \t {
			    parsewarning {use \t instead of \<tab>}
			    append val \\t
			} > - < - & - ^ - = - ! - ( - ) - . {
			    if {$char in [list {*}$stringtestops \
				{*}$floattestops {*}$inttestops]} {
				parsewarning {no need to escape operators}
			    }
			    append val $char 
			} a - b - f - n - r - t - v {
			    append val \\$char
			} x {
			    set cursor [$tree get $node cursor]
			    if {[regexp -start $cursor \
				{\A([0-9A-Fa-f]{1,2})} $line match char2]} {
				advance [string length $match] 
				append val \\x$char2
			    } else {
				parseerror {malformed \x escape sequence}
			    }
			} \[0-7\] {
			    set cursor [$tree get $node cursor]
			    append val \\$char
			    if {[regexp -start $cursor \
				{\A([0-7]{1,2})} $line match char2]} {
				advance [string length $match] 
				append val $char2
			    }
			} default {
			    parseerror {Could not handle escape sequence in value}
			}
		    ]
		}
	    } default {
		if {[string is space $char] || $char in [
		    list \# \{ \} \[  \] \" \$ \; \n]} {
		    append val \\
		}
		append val $char
	    }
	]
	advance 1 char
    }
    $tree set $node val $val
}

proc ::fileutil::magic::cfront::parsetestverbatim {tree node} {
    switch [$tree get $node type] {
	name {
	    $tree set $node rel 1
	}
	use {
	    set cursor [$tree get $node cursor]
	    # order matters in regular expression : longest match must come
	    # first in parenthesized
	    if {[regexp -start $cursor {\A\s*(?:\\\^|\^)} [$tree get $node line] match]} {
		advance [string length $match]
		$tree set $node iendian 1
	    } else {
		$tree set $node iendian 0
	    }
	}

    }
    parsestringval $tree $node
}

proc ::fileutil::magic::cfront::parsetest {tree node} {
    variable floattestops
    variable inttestops
    variable stringtestops
    variable types_numeric_real
    variable types_numeric_short
    variable types_string
    variable types_verbatim
    set type [$tree get $node type]
    if {$type in $types_verbatim} {
	parsetestverbatim $tree $node
	return
    }
    $tree set $node compinvert 0
    set testinvert 0
    set comp ==
    advance w1 char
    if {$char eq {x}} {
	advance 1 char
	if {[string is space $char]} {
	    $tree set $node testinvert 0
	    $tree set $node comp x
	    $tree set $node val {}
	    return
	} else {
	    rewind 1
	}
    }

    if {$type in $types_string} {
	while 1  {
	    if {$char in $stringtestops} {
		if {$char eq {!}} {
		    set testinvert 1
		} else {
		    set comp $char
		    # Exclamation must precede any normal operator
		    break
		}
		advance w1 char
	    } else {
		rewind 1
		break
	    }
	}
	parsestringval $tree $node
    } elseif {$type in [list {*}[
	array names ::fileutil::magic::rt::typemap] {*}[
	dict keys $types_numeric_short]]} {
	if {$type in $types_numeric_real} {
	    set ops $floattestops
	    set parsecmd parsefloat
	} else {
	    set ops $inttestops 
	    set parsecmd parseint
	}

	while 1 {
	    if {$char in $ops} {
		if {$char eq {~}} {
		    $tree set $node compinvert 1 
		} elseif {$char eq {!}} {
		    set testinvert 1
		} else {
		    set comp $char
		    # Exclamation and tilde must precede any normal operator
		    break
		}
		advance w1 char
	    } else {
		rewind 1
		break
	    }
	}
	$tree set $node val [$parsecmd $tree $node]
    } else {
	parseerror {don't know how to parse the test or this type}
    }
    switch $comp {
	= {
	    set comp ==
	}
    }
    # This facilitates Switch creation by [treegen1]
    if {$testinvert && ($comp eq {==})} {
	set comp !=
	set testinvert 0
    }
    $tree set $node testinvert $testinvert
    $tree set $node comp $comp 
}

proc ::fileutil::magic::cfront::parseoffset {tree node} {

    # Offset parser.
    # Syntax:
    #   ( ?&? number ?.[bslBSL]? ?[+-]? ?number? )

    # This was all fine and dandy, but didn't do spaces where spaces might
    # exist between lexical elements in the wild, and ididn't do offset
    # operators

    #set n {([-+]?[0-9]+|[-+]?0x[0-9A-Fa-f]+)[UL]*}

    ##"\\((&?)(${n})((\\.\[bslBSL])?)()(\[+-]?)(${n}?)\\)"
    #set o \
    #    "^(&?)${n}((?:\\.\[bslBSL])?)(?:(\[-+*/%&|^])${n})?(?:(\[+-])(\\()?${n}\\)?)?$"
    ##     |   |   |                     |            |        |      |    |
    ##     1   2   3                     4            5        6      7    8 
    ##                            1    2    3     4   5        6    7     8   
    #set ok [regexp $o $offset -> irel base type  iop ioperand sign ind idx]


    variable offsetopts
    variable indir_typemap
    $tree set $node rel 0 ;   # relative
    $tree set $node ind 0 ;   # indirect
    $tree set $node ir 0 ;    # indirect relative
    $tree set $node it {} ;   # indir_type
    $tree set $node ioi 0 ;   # indirect offset invert
    $tree set $node iir 0 ;   # indirect indirect relative 
    $tree set $node ioo + ;   # indirect_offset_op
    $tree set $node io 0 ;    # indirect offset
    advance w1 char
    if {$char eq {&}} {
	advance w1 char
	$tree set $node rel 1
    }

    if {$char eq {(}} {
	$tree set $node ind 1

	if {[advance w1] eq {&}} {
	    $tree set $node ir 1
	} else {
	    rewind 1
	}
	$tree set $node o [parseint $tree $node]

	# $char is used below if it's not "."
	if {[advance w1 char] eq {.}} {
	    advance w1 it
	    if {[dict exists $indir_typemap $it]} {
		set it [
		    dict get $indir_typemap $it]
	    } else {
		parseerror {bad indirect offset type}
	    }
	    advance w1 char
	} else {
	    set it long
	}
	$tree set $node it $it


	# The C implementation does this, so we will , too .
	if {$char eq {~}} {
	    advance w1 char
	    $tree set $node ioi 1
	}

	if {$char in $offsetopts} {
	    $tree set $node ioo $char
	    if {[advance w1] in {(}} {
		$tree set $node iir 1
	    } else {
		rewind 1
	    }
	    $tree set $node io [parseint $tree $node]
	    if {[$tree get $node iir]} {
		if {[advance w1] ne {)}} {
		    parseerror {
			expected closing parenthesis for indirect indirect offset offset
		    }
		}
	    }
	    advance w1 char
	}

	if {$char ne {)}} {
	    parseerror {
		expected close parenthesis for indirect offset 
	    }
	}
    } else {
	rewind 1
	$tree set $node o [parseint $tree $node]
    }
}

proc ::fileutil::magic::cfront::parseoffsetmod {tree node} {
    advance w1 char
    if {$char eq {~}} {
	$tree set $node offset_invert 1
	advance w1 char
    } else {
	$tree set $node offset_invert 0
    }
    switch $char {
	+ - - - * - / - % - & - | - ^ {
	    $tree set $node offset_mod_op $char
	    $tree set $node offset_mod [parseint $tree $node]
	}
	default {
	    $tree set $node offset_mod_op {}
	    $tree set $node offset_mod {}
	    rewind 1
	    # no offset modifier
	}
    }
}

proc ::fileutil::magic::cfront::parsemsg {tree node} {
    advance w
    set line [$tree get $node line]
    set cursor [$tree get $node cursor]
    ##leave \b in the message for [emit] to parse
    #regexp -start $cursor {\A(\b|\\b)?(.*)$} $line match b line
    #if {$b ne {}} {
    #    $tree set $node space 0
    #} else {
    #    $tree set $node space 1
    #}
    set line [string range $line $cursor end]
    $tree set $node desc $line
}

# process a magic file
proc ::fileutil::magic::cfront::process {tree file {maxlevel 10000}} {
    variable level	;# level of line
    variable linenum	;# line number

    set level  0

    set linenum 0
    set records {}
    set rejected 0
    set script {}
    if {[$tree keyexists root files]} {
	set files [$tree get root files]
    } else {
	set files {}
    }
    set fileidx [llength $files] 
    if {$file in $files} {
	return -code error [list {already processed file} $file]
    }
    lappend files $file
    $tree set root files $files
    $tree set root level -1
    set node root
    ::fileutil::foreachLine line $file {
   	incr linenum
	# Only trim the left side . White space on the the right side could be
	# part of an escape sequence , and trimming would munge it .
   	set line [string trimleft $line]
   	if {[string index $line 0] eq {#}} {
   	    continue	;# skip comments
   	} elseif {$line eq {}} {
   	    continue	;# skip blank lines
   	} else {
   	    # parse line
	    if {[regexp {!:(\S+)\s*([^\s]*).*$} $line -> extname extdata]} {
		if {$rejected} {
		    continue
		}
		if {$node eq {root}} {
		    return -code error [list {malformed magic file}]
		}
		$tree set $node ext_$extname $extdata
	    } else {
		# calculate the line's level
		set unlevel [string trimleft $line >]
		set level   [expr {[string length $line] - [string length $unlevel]}]
		set line $unlevel
		if {$level > $maxlevel} {
		    return -code continue "Skip - too high a level"
		}
		if {$level > 0} {
		    if {$rejected} {
			continue
		    }
		    while {[$tree keyexists $node level] && [$tree get $node level] >= $level} {
			set node [$tree parent $node]
		    }
		    if {$level > [$tree get $node level]+1} {
			return -code error [
			    list {level more than one greater than parent level} \
				file $file linenum $linenum line $line]
		    }
		    set node [$tree insert $node end]
		} else {
		    set rejected 0
		    set node [$tree insert root end]
		    set node0 $node
		}
		$tree set $node file $fileidx
		$tree set $node line $line
		$tree set $node linenum $linenum
		$tree set $node level $level
		if {[catch {parseline $tree $node} cres copts]} {
		    set errorcode [dict get $copts -errorcode]
		    if {[lindex $errorcode 0] eq {fumagic} && [
			lindex $errorcode 1] eq {parse error}} {
			$tree delete $node0
			set rejected 1
			puts stderr [list Rejected {bad parse}]
			puts stderr [dict get $copts -errorinfo]
			continue	;# skip erroring lines
		    } else {
			return -options $copts $cres
		    }

		}
	    }
   	}

   	# collect some summaries
   	::fileutil::magic::cfront::Debug {
   	    variable types
   	    set types($type) [$tree get $node type]
   	    variable quals
   	    set quals($qual) [$tree get $node qual]
   	}

   	#puts $linenum level:$level offset:$offset type:$type
	#puts qual:$qual compare:$compare val:'$val' desc:'$desc'

    }
}


# compile up magic files or directories of magic files into a single recognizer.
proc ::fileutil::magic::cfront::compile {args} {
    set tree [tree]

    foreach arg $args {
   	if {[file type $arg] eq  {directory}} {







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







126
127
128
129
130
131
132









































































































































































































































































































































































































































































































































































































































133
134
135
136
137
138
139
	}
    }
    set line [$tree get $node line]
    $tree set $node cursor $cursor
    return $res
}











































































































































































































































































































































































































































































































































































































































# compile up magic files or directories of magic files into a single recognizer.
proc ::fileutil::magic::cfront::compile {args} {
    set tree [tree]

    foreach arg $args {
   	if {[file type $arg] eq  {directory}} {
781
782
783
784
785
786
787

788
789

790
791





792









793
794
795


796
797






798
799

800




801
802
803

804
805
806
807
808
809
810
811

































































































































































































































































































































































































































































































































































































































































812
813
814
815
816
817
818
819
820
821
822

823
824
825
826
827
828
829

    ::fileutil::magic::cfront::Debug {puts [treedump $t]}
    #set tcl [run $script]

    return [list $named $tests]
}


proc ::fileutil::magic::cfront::generate {namespace args} {


    set pspace [namespace qualifiers $namespace]






    if {$pspace eq ""} {









	return -code error "Cannot generate recognizer in the global namespace"
    }



    lassign [compile {*}$args] named tests







    set script "namespace eval [list $namespace] {
	variable named [list $named]

	variable tests [list $tests]




    }"
    return $script 
}


proc ::fileutil::magic::cfront::install args {
    foreach arg $args {
	set path [file tail $arg]
	eval [generate ::fileutil::magic::/$path $arg]
    }
    return
}


































































































































































































































































































































































































































































































































































































































































proc ::fileutil::magic::cfront::tree {} {
    set tree [::struct::tree]

    $tree set root path ""
    $tree set root otype Root
    $tree set root type root
    $tree set root named {}
    $tree set root message "unknown"
    return $tree
}


# ### ### ### ######### ######### #########
## Internal, debugging.

if {!$::fileutil::magic::cfront::debug} {
    # This procedure definition is optimized out of using code by the
    # core bcc. It knows that neither argument checks are required,







>
|

>
|

>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
|
|
|
>
>


>
>
>
>
>
>
|
|
>
|
>
>
>
>
|


>




|



>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>











>







172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892

    ::fileutil::magic::cfront::Debug {puts [treedump $t]}
    #set tcl [run $script]

    return [list $named $tests]
}


proc ::fileutil::magic::cfront::generate args {

    set indent {}
    set pline {}

    while {[llength $args]} {
	set args [lassign $args[set args {}] key]
	switch $key {
	    compressed {
		set args [lassign $args[set args {}] val]
		if {$val} {
		    set indent {}
		    set pline {}
		} else {
		    set indent \t
		    set pline \n
		}
	    }
	    -- break
	    default {
		error [list {unknown argument}]
	    }
	}
    }

    lassign [compile {*}$args] named tests

    append script "variable named {\n"
	dict for {key val} $named {
	    append script "${indent}[list $key]"
		append script "$pline${indent}${indent}[list [string map [
		    list \n \n${indent}] $val]]\n"
	    }
    append script "$pline}\n"

    append script "proc analyze {} {\n"
	    foreach item $tests {
		append script "${indent}[string map [
		    list \n \n${indent}] $item]\n"
	    }
    append script "$pline}\n"

    return $script 
}


proc ::fileutil::magic::cfront::install args {
    foreach arg $args {
	set path [file tail $arg]
	eval [generate compressed 1 -- ::fileutil::magic::/$path $arg]
    }
    return
}


proc ::fileutil::magic::cfront::parseerror args {
    upvar node node tree tree
    set cursor [$tree get $node cursor]
    set line [$tree get $node line]
    set files [$tree get root files]
    set file [lindex $files [$tree get $node file]]
    return -code error -errorcode [list fumagic {parse error}] [
	list [lmap arg $args {string trim $arg}] \
	file $file \
	linenenum [$tree get $node linenum] \
	cursor $cursor \
	line [list \
	    [string range $line 0 ${cursor}-1] \
	    [string range $line $cursor end]]]
}


proc ::fileutil::magic::cfront::parsewarning args {
    upvar node node tree tree
    catch {parseerror {*}$args} res options
    puts stderr [list parse warning $res]
    #puts stderr [dict get $options -errorinfo]
}


# parse an individual line
variable ::fileutil::magic::cfront::parsedkeys {
}
proc ::fileutil::magic::cfront::parseline {tree node} {
    variable parsedkeys
    set line [$tree get $node line]
    $tree set $node cursor 0 
    parseoffset $tree $node
    parsetype $tree $node
    parsetest $tree $node
    parsemsg $tree $node

    set record [$tree getall $node]
    foreach key $parsedkeys {
	if {![dict exists $record $key]} {
	    return -code error [list {missing key} $key]
	}
    }
    ::fileutil::magic::cfront::Debug {
   	puts [list parsed $record]
    }
}


proc ::fileutil::magic::cfront::parsefloat {tree node} {
    set line [$tree get $node line]
    set cursor [$tree get $node cursor]
    # If only [scan] had a @ conversion character like [binary scan]
    set line2 [string range $line $cursor end]
    if {[scan $line2 %e%n num count] < 0} {
	parseerror {invalid floating point number}
    }
    set cursor [expr {$cursor + $count}]
    $tree set $node cursor $cursor

    # These suffixes are not used in magic files
    #if {[regexp -start $cursor {\A([fFlL)} -> modifier]} {
    #    advance [string length $modifier]]
    #}
    return $num
}


proc ::fileutil::magic::cfront::parseint {tree node} {
    set line [$tree get $node line]
    set cursor [$tree get $node cursor]
    # If only [scan] had a @ conversion character like [binary scan]
    set line2 [string range $line $cursor end]
    if {[set scanres [scan $line2 %lli%n num n]] < 1} {
	parseerror [list {invalid number} $line2]
    }
    set cursor [expr {$cursor + $n}]
    $tree set $node cursor $cursor
    # Thse suffixes are not used in magic files
    #if {[regexp -start $cursor {\A([uU]?[lL]{1,2})} -> modifier]} {
    #    advance [string length $modifier]]
    #}
    return $num
}


proc ::fileutil::magic::cfront::parsetype {tree node} {
    variable types_numeric_all
    variable types_numeric_short
    variable types_string_all
    variable types_string_short
    variable types_notimplemented
    set line [$tree get $node line]
    set cursor [$tree get $node cursor]
    $tree set $node mod {}
    $tree set $node mand {}
    set num_or_string {
    }
    if {[regexp -start $cursor {\A\s*(\w+)} $line match type]} {
	advance [string length $match]
	if {$type in $types_numeric_all} {
	    if {[dict exists $types_numeric_short $type]} {
		set type [dict get $types_numeric_short $type]
	    }
	    $tree set $node type $type
	    parsetypenummod $tree $node
	} elseif {$type in $types_string_all} {
	    if {[dict exists $types_string_short $type]} {
		set type [dict get $types_string_short $type]
	    }
	    $tree set $node type $type
	    # No modifying operator for strings
	    parsetypemod $tree $node

	    if {$type eq {search} && [$tree get $node mand] eq {}} {
		parsewarning {search has no number}
		# set the same default that file(1) sets
		$tree set $node mand 100
	    }
	} elseif {$type in {default name use}} {
	    $tree set $node type $type
	} elseif {$type in $types_notimplemented} {
	    parseerror {type not implemented}
	} else {
	    parseerror {unknown type}
	}
    } else {
	parseerror {no type}
    }
}


proc ::fileutil::magic::cfront::parsetypemod {tree node} {
    # For numeric types , $mod is a list of modifiers and $mand is either a
    # number or the empty string .
    variable typemodifiers
    variable numeric_modifier_allowed
    set type [$tree get $node type]
    if {[advance 1 char] ne {/}} {
	rewind 1
	return
    }
    set res [dict create] 
    while 1 {
	if {[advance 1 char] eq {/}} {
	    continue
	}
	if {[string is space $char]} {
	    break
	}
	if {[dict exists $typemodifiers $type] && $char in [dict get $typemodifiers $type]} {
	    dict set res $char {}
	} elseif {$type in $numeric_modifier_allowed} {
	    rewind 1
	    if {[catch {parseint $tree $node} mand]} {
		# Whatever it is, it isn't a number.  Let the next parsing step
		# handle it .
		break
	    } else {
		$tree set $node mand $mand  ; # numeric modifier
	    }
	} else {
	    parseerror {bad modifier}
	}
    }
    $tree set $node mod [dict keys $res]
}


proc ::fileutil::magic::cfront::parsetypenummod {tree node} {
    # For numeric types, $mod is an operator and $mand is a number
    set line [$tree get $node line]
    set cursor [$tree get $node cursor]
    if {[regexp -start $cursor {\A([-&|^+*/%=])} $line match mod]} {
	advance [string length $match]
	$tree set $node mod $mod
	# {to do} {parse floats?}
	$tree set $node mand [parseint $tree $node] ; # mod operand
    } else {
	$tree set $node mod {}
	$tree set $node mand {} 
    }
}


proc ::fileutil::magic::cfront::parsestringval {tree node} {
    variable floattestops
    variable inttestops
    variable stringtestops
    advance w1 char 
    set val {}
    set nodetype [$tree get $node type]
    set line [$tree get $node line]
    while 1 {
	# break on whitespace or empty string
	if {[string is space $char] || $char eq {}} break
	switch $char [dict create  \
	    \\ {
		advance 1 char
		if {[string is space $char]} {
		    append val \\$char
		} else {
		    # extra backslashes because of interaction with glob
		    switch -glob -- $char {
			\\\\ {
			    append val {\\}
			} \t {
			    parsewarning {use \t instead of \<tab>}
			    append val \\t
			} > - < - & - ^ - = - ! - ( - ) - . {
			    if {$char in [list {*}$stringtestops \
				{*}$floattestops {*}$inttestops]} {
				parsewarning {no need to escape operators}
			    }
			    append val $char 
			} a - b - f - n - r - t - v {
			    append val \\$char
			} x {
			    set cursor [$tree get $node cursor]
			    if {[regexp -start $cursor \
				{\A([0-9A-Fa-f]{1,2})} $line match char2]} {
				advance [string length $match] 
				append val \\x$char2
			    } else {
				parseerror {malformed \x escape sequence}
			    }
			} [0-7] {
			    set cursor [$tree get $node cursor]
			    append val \\$char
			    if {[regexp -start $cursor \
				{\A([0-7]{1,2})} $line match char2]} {
				advance [string length $match] 
				append val $char2
			    }
			} default {
			    if {$nodetype eq {regex}} {
				if {$char ni {[ ] ( ) . * ? ^ $ | \{ \}}} {
				    parsewarning [list {no need to escape}]
				}
			    } elseif {[string is print $char]} {
				if {$char ni {< > & ^ = !}} {
				    parsewarning [list {no need to escape}]
				}
			    }
			    append val [tclescape $char]
			}
		    }
		}
	    } default {
		append val [tclescape $char]
	    }
	]
	advance 1 char
    }
    $tree set $node val $val
}


proc ::fileutil::magic::cfront::parsetest {tree node} {
    variable floattestops
    variable inttestops
    variable stringtestops
    variable types_numeric_real
    variable types_numeric_all
    variable types_string
    variable types_verbatim
    set type [$tree get $node type]
    if {$type in $types_verbatim} {
	parsetestverbatim $tree $node
	return
    }
    $tree set $node compinvert 0
    set testinvert 0
    set comp ==
    advance w1 char
    if {$char eq {x}} {
	advance 1 char
	if {[string is space $char]} {
	    $tree set $node testinvert 0
	    $tree set $node comp x
	    $tree set $node val {}
	    return
	} else {
	    rewind 1
	}
    }

    if {$type in $types_string} {
	while 1  {
	    if {$char in $stringtestops} {
		if {$char eq {!}} {
		    set testinvert 1
		} else {
		    set comp $char
		    # Exclamation must precede any normal operator
		    break
		}
		advance w1 char
	    } else {
		rewind 1
		break
	    }
	}
	parsestringval $tree $node
    } elseif {$type in $types_numeric_all} {
	if {$type in $types_numeric_real} {
	    set ops $floattestops
	    set parsecmd parsefloat
	} else {
	    set ops $inttestops 
	    set parsecmd parseint
	}

	while 1 {
	    if {$char in $ops} {
		if {$char eq {~}} {
		    $tree set $node compinvert 1 
		} elseif {$char eq {!}} {
		    set testinvert 1
		} else {
		    set comp $char
		    # Exclamation and tilde must precede any normal operator
		    break
		}
		advance w1 char
	    } else {
		rewind 1
		break
	    }
	}
	$tree set $node val [$parsecmd $tree $node]
    } else {
	parseerror {don't know how to parse the test or this type}
    }
    switch $comp {
	= {
	    set comp ==
	}
    }
    # This facilitates Switch creation by [treegen1]
    if {$testinvert && ($comp eq {==})} {
	set comp !=
	set testinvert 0
    }
    $tree set $node testinvert $testinvert
    $tree set $node comp $comp 
}


proc ::fileutil::magic::cfront::parsetestverbatim {tree node} {
    switch [$tree get $node type] {
	name {
	    $tree set $node rel 1
	}
	use {
	    set cursor [$tree get $node cursor]
	    # order matters in regular expression : longest match must come
	    # first in parenthesized
	    if {[regexp -start $cursor {\A\s*(?:\\\^|\^)} [$tree get $node line] match]} {
		advance [string length $match]
		$tree set $node iendian 1
	    } else {
		$tree set $node iendian 0
	    }
	}

    }
    parsestringval $tree $node
}


proc ::fileutil::magic::cfront::parseoffset {tree node} {

    # Offset parser.
    # Syntax:
    #   ( ?&? number ?.[bslBSL]? ?[+-]? ?number? )

    # This was all fine and dandy, but didn't do spaces where spaces might
    # exist between lexical elements in the wild, and ididn't do offset
    # operators

    #set n {([-+]?[0-9]+|[-+]?0x[0-9A-Fa-f]+)[UL]*}

    ##"\\((&?)(${n})((\\.\[bslBSL])?)()(\[+-]?)(${n}?)\\)"
    #set o \
    #    "^(&?)${n}((?:\\.\[bslBSL])?)(?:(\[-+*/%&|^])${n})?(?:(\[+-])(\\()?${n}\\)?)?$"
    ##     |   |   |                     |            |        |      |    |
    ##     1   2   3                     4            5        6      7    8 
    ##                            1    2    3     4   5        6    7     8   
    #set ok [regexp $o $offset -> irel base type  iop ioperand sign ind idx]

    variable offsetopts
    variable indir_typemap
    $tree set $node rel 0 ;   # relative
    $tree set $node ind 0 ;   # indirect
    $tree set $node ir 0 ;    # indirect relative
    $tree set $node it {} ;   # indir_type
    $tree set $node ioi 0 ;   # indirect offset invert
    $tree set $node iir 0 ;   # indirect indirect relative 
    $tree set $node ioo + ;   # indirect_offset_op
    $tree set $node io 0 ;    # indirect offset
    advance w1 char
    if {$char eq {&}} {
	advance w1 char
	$tree set $node rel 1
    }

    if {$char eq {(}} {
	$tree set $node ind 1

	if {[advance w1] eq {&}} {
	    $tree set $node ir 1
	} else {
	    rewind 1
	}
	$tree set $node o [parseint $tree $node]

	# $char is used below if it's not "."
	if {[advance w1 char] in {. ,}} {
	    advance w1 it
	    if {[dict exists $indir_typemap $it]} {
		set it [
		    dict get $indir_typemap $it]
		    if {$char eq {.}} {
			set it u$it
		    } 
	    } else {
		parseerror {bad indirect offset type}
	    }
	    advance w1 char
	} else {
	    set it long
	}
	$tree set $node it $it


	# The C implementation does this, so we will , too .
	if {$char eq {~}} {
	    advance w1 char
	    $tree set $node ioi 1
	}

	if {$char in $offsetopts} {
	    $tree set $node ioo $char
	    if {[advance w1] in {(}} {
		$tree set $node iir 1
	    } else {
		rewind 1
	    }
	    $tree set $node io [parseint $tree $node]
	    if {[$tree get $node iir]} {
		if {[advance w1] ne {)}} {
		    parseerror {
			expected closing parenthesis for indirect indirect offset offset
		    }
		}
	    }
	    advance w1 char
	}

	if {$char ne {)}} {
	    parseerror {
		expected close parenthesis for indirect offset 
	    }
	}
    } else {
	rewind 1
	$tree set $node o [parseint $tree $node]
    }
}


proc ::fileutil::magic::cfront::parseoffsetmod {tree node} {
    advance w1 char
    if {$char eq {~}} {
	$tree set $node offset_invert 1
	advance w1 char
    } else {
	$tree set $node offset_invert 0
    }
    switch $char {
	+ - - - * - / - % - & - | - ^ {
	    $tree set $node offset_mod_op $char
	    $tree set $node offset_mod [parseint $tree $node]
	}
	default {
	    $tree set $node offset_mod_op {}
	    $tree set $node offset_mod {}
	    rewind 1
	    # no offset modifier
	}
    }
}


proc ::fileutil::magic::cfront::parsemsg {tree node} {
    advance w
    set line [$tree get $node line]
    set cursor [$tree get $node cursor]

    ##leave \b in the message for [emit] to parse
    #regexp -start $cursor {\A(\b|\\b)?(.*)$} $line match b line
    #if {$b ne {}} {
    #    $tree set $node space 0
    #} else {
    #    $tree set $node space 1
    #}

    set line [string range $line $cursor end]

    $tree set $node desc $line
}


# process a magic file
proc ::fileutil::magic::cfront::process {tree file {maxlevel 10000}} {
    variable level	;# level of line
    variable linenum	;# line number

    set level  0

    set linenum 0
    set records {}
    set rejected 0
    set script {}
    if {[$tree keyexists root files]} {
	set files [$tree get root files]
    } else {
	set files {}
    }
    set fileidx [llength $files] 
    if {$file in $files} {
	return -code error [list {already processed file} $file]
    }
    lappend files $file
    $tree set root files $files
    $tree set root level -1
    set node root
    ::fileutil::foreachLine line $file {
   	incr linenum
	# Only trim the left side . White space on the the right side could be
	# part of an escape sequence , and trimming would munge it .
   	set line [string trimleft $line]
   	if {[string index $line 0] eq {#}} {
   	    continue	;# skip comments
   	} elseif {$line eq {}} {
   	    continue	;# skip blank lines
   	} else {
   	    # parse line
	    if {[regexp {^\s*!:(\S+)\s*(.*?)\s*$} $line -> extname extdata]} {
		if {$rejected} {
		    continue
		}
		if {$node eq {root}} {
		    return -code error [list {malformed magic file}]
		}
		$tree set $node ext_$extname $extdata
	    } else {
		# calculate the line's level
		set unlevel [string trimleft $line >]
		set level   [expr {[string length $line] - [string length $unlevel]}]
		set line $unlevel
		if {$level > $maxlevel} {
		    return -code continue "Skip - too high a level"
		}
		if {$level > 0} {
		    if {$rejected} {
			continue
		    }
		    while {[$tree keyexists $node level] && [$tree get $node level] >= $level} {
			set node [$tree parent $node]
		    }
		    if {$level > [$tree get $node level]+1} {
			return -code error [
			    list {level more than one greater than parent level} \
				file $file linenum $linenum line $line]
		    }
		    set node [$tree insert $node end]
		} else {
		    set rejected 0
		    set node [$tree insert root end]
		    set node0 $node
		}
		$tree set $node file $fileidx
		$tree set $node line $line
		$tree set $node linenum $linenum
		$tree set $node level $level
		if {[catch {parseline $tree $node} cres copts]} {
		    set errorcode [dict get $copts -errorcode]
		    if {[lindex $errorcode 0] eq {fumagic} && [
			lindex $errorcode 1] eq {parse error}} {
			# don't delete the full node because the parts that
			# have been parsed so far might be useful
			#$tree delete $node0
			$tree delete $node
			set rejected 1
			puts stderr [list Rejected {bad parse}]
			puts stderr [dict get $copts -errorinfo]
			continue	;# skip erroring lines
		    } else {
			return -options $copts $cres
		    }

		}
	    }
   	}

   	# collect some summaries
   	::fileutil::magic::cfront::Debug {
   	    variable types
   	    set types($type) [$tree get $node type]
   	    variable quals
   	    set quals($qual) [$tree get $node qual]
   	}

   	#puts $linenum level:$level offset:$offset type:$type
	#puts qual:$qual compare:$compare val:'$val' desc:'$desc'

    }
}


proc ::fileutil::magic::cfront::rewind len {
    upvar node node tree tree
    set cursor [$tree get $node cursor]
    incr cursor -$len
    $tree set $node cursor $cursor
}


proc ::fileutil::magic::cfront::tclescape char {
	if {[string is space $char] || $char in [
	    list \# \{ \} \[  \] \" \$ \; \n]} {
	    append val \\
	}
	append val $char
	return $val
}


proc ::fileutil::magic::cfront::tree {} {
    set tree [::struct::tree]

    $tree set root path ""
    $tree set root otype Root
    $tree set root type root
    $tree set root named {}
    $tree set root message "unknown"
    return $tree
}


# ### ### ### ######### ######### #########
## Internal, debugging.

if {!$::fileutil::magic::cfront::debug} {
    # This procedure definition is optimized out of using code by the
    # core bcc. It knows that neither argument checks are required,

Changes to modules/fumagic/cgen.tcl.

53
54
55
56
57
58
59




60
61
62
63
64
65
66
    namespace export 2tree treedump treegen

   # Assumption : the parser folds the test inversion operator into equality and
   # inequality operators .
    variable offsetskey {
	type o rel ind ir it ioi ioo iir io compinvert mod mand
    }




}


# Optimisations:

# reorder tests according to expected or observed frequency this
# conflicts with reduction in strength optimisations.







>
>
>
>







53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
    namespace export 2tree treedump treegen

   # Assumption : the parser folds the test inversion operator into equality and
   # inequality operators .
    variable offsetskey {
	type o rel ind ir it ioi ioo iir io compinvert mod mand
    }

    variable indent {}
    variable indents {}
    variable innamed 0
}


# Optimisations:

# reorder tests according to expected or observed frequency this
# conflicts with reduction in strength optimisations.
87
88
89
90
91
92
93




























94
95
96
97
98
99
100
#
# - String tests at same level over overlapping ranges can be
#   written as sub-string comparisons over the maximum range
#   this saves re-reading the same string from file.
#
# - common prefix strings will have to be guarded against, by
#   sorting string values, then sorting the tests in reverse length order.





























proc ::fileutil::magic::cgen::path {tree} {
    # Annotates the tree. In each node we store the path from the root
    # to this node, as list of nodes, with the current node the last
    # element. The root node is never stored in the path.

    $tree set root path {}







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#
# - String tests at same level over overlapping ranges can be
#   written as sub-string comparisons over the maximum range
#   this saves re-reading the same string from file.
#
# - common prefix strings will have to be guarded against, by
#   sorting string values, then sorting the tests in reverse length order.


proc ::fileutil::magic::cgen::LessIndent {} {
    variable indent
    variable indents
    set size [expr {[string length $indent] - 1}]
    if {[dict exists $indents $size]} {
	set indent [dict get $indents $size]
    } else {
	set indent [string repeat \t $size]
	dict set indents $size $indent
    }
    return
}

proc ::fileutil::magic::cgen::MoreIndent {} {
    variable indent
    variable indents
    set size [expr {[string length $indent] + 1}]
    if {[dict exists $indents $size]} {
        set indent [dict get $indents $size]
    } else {
        set indent [string repeat \t $size]
        dict set indents $size $indent
    }
    return
}


proc ::fileutil::magic::cgen::path {tree} {
    # Annotates the tree. In each node we store the path from the root
    # to this node, as list of nodes, with the current node the last
    # element. The root node is never stored in the path.

    $tree set root path {}
128
129
130
131
132
133
134

135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150






151
152
153
154
155
156
157

    # generate a proc call type for the type, Numeric or String
    variable ::fileutil::magic::rt::typemap

    switch -glob -- $type {
   	*byte* -
	*double* -

   	*short* -
   	*long* -
	*quad* -
   	*date* {
   	    $tree set $node otype N
   	}
   	clear - default - search - regex - *string* {
   	    $tree set $node otype S
   	}
	name {
	    puts [list cromble otype [$tree getall $node]]
	    $tree set $node otype A
	}
	use {
	    $tree set $node otype U
	}






   	default {
   	    puts stderr "Unknown type: '$type'"
	    $tree set $node otype Unknown
   	}
    }

    # Stores the type determined above, and the arguments into







>






|



<





>
>
>
>
>
>







160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177

178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195

    # generate a proc call type for the type, Numeric or String
    variable ::fileutil::magic::rt::typemap

    switch -glob -- $type {
   	*byte* -
	*double* -
	*float* -
   	*short* -
   	*long* -
	*quad* -
   	*date* {
   	    $tree set $node otype N
   	}
   	clear - search - regex - *string* {
   	    $tree set $node otype S
   	}
	name {

	    $tree set $node otype A
	}
	use {
	    $tree set $node otype U
	}
	default {
	    $tree set $node otype D
	}
	indirect {
	    $tree set $node otype T
	}
   	default {
   	    puts stderr "Unknown type: '$type'"
	    $tree set $node otype Unknown
   	}
    }

    # Stores the type determined above, and the arguments into
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
	tree_el $tree $child
    }
    optNum $tree root
    #optStr $tree root
    puts stderr "Script contains [llength [$tree children root]] discriminators"
    path $tree

    # Decoding the offsets, determination if we have to handle
    # relative offsets, and where. The less, the better.
    Offsets $tree

    return $tree
}

proc ::fileutil::magic::cgen::isStr {tree node} {
    return [expr {"S" eq [$tree get $node otype]}]
}








<
<
<
<







208
209
210
211
212
213
214




215
216
217
218
219
220
221
	tree_el $tree $child
    }
    optNum $tree root
    #optStr $tree root
    puts stderr "Script contains [llength [$tree children root]] discriminators"
    path $tree





    return $tree
}

proc ::fileutil::magic::cgen::isStr {tree node} {
    return [expr {"S" eq [$tree get $node otype]}]
}

246
247
248
249
250
251
252



253


254
255
256
257
258
259
260
}

proc ::fileutil::magic::cgen::isNum {tree node} {
    return [expr {"N" eq [$tree get $node otype]}]
}

proc ::fileutil::magic::cgen::switchNSort {tree n1 n2} {



    return [expr {[$tree get $n1 val] - [$tree get $n1 val]}]


}

proc ::fileutil::magic::cgen::optNum {tree node} {
    variable offsetskey
    array set offsets {}

    # traverse each numeric element of this node's children,







>
>
>
|
>
>







280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
}

proc ::fileutil::magic::cgen::isNum {tree node} {
    return [expr {"N" eq [$tree get $node otype]}]
}

proc ::fileutil::magic::cgen::switchNSort {tree n1 n2} {

    # deal with the fact that [lsort] barfs if the result is larger than 32
    # bits
    set val1 [$tree get $n1 val]
    set val2 [$tree get $n2 val]
    expr {$val1 > $val2 ? 1 : $val1 < $val2 ? -1 : 0}
}

proc ::fileutil::magic::cgen::optNum {tree node} {
    variable offsetskey
    array set offsets {}

    # traverse each numeric element of this node's children,
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
	$tree set $switch path $path

	set level [$tree get [$tree parent $switch] level]
	$tree set $switch level [expr {$level+1}]
    }
}

proc ::fileutil::magic::cgen::Offsets {tree} {

    # Indicator if a node has to save field location information for
    # relative addressing. The 'kill' attribute is an accumulated
    # 'save' over the whole subtree. It will be used to determine when
    # level information was destroyed by subnodes and has to be
    # regenerated at the current level.

    $tree walk root -type dfs node {
	$tree set $node kill 0
	if {[$tree get $node otype] ne {Root} &&
	    ([$tree get $node rel] || [$tree get $node ir])} {
	    $tree set $node save 1
	} else {
	    $tree set $node save 0
	}
    }

    # We walk from the leafs up to the root, synthesizing the data
    # needed, as we go.
    $tree walk root -type dfs -order post node {
	if {$node eq {root}} continue

	# If the current node's parent is a switch, and the node has
	# to save, then the switch has to save. Because the current
	# node is not relevant during code generation anymore, the
	# switch is.

	if {[$tree get $node save]} {
	    # We save, therefore we kill.
	    $tree set $node kill 1
	    if {[$tree get [$tree parent $node] otype] eq {Switch}} {
		$tree set [$tree parent $node] save 1
	    }
	} else {
	    # We don't save i.e. kill, but we may inherit it from
	    # children which kill.

	    foreach c [$tree children $node] {
		if {[$tree get $c kill]} {
		    $tree set $node kill 1
		    break
		}
	    }
	}
    }
}


# Useful when debugging
proc ::fileutil::magic::cgen::stack {tree node} {
    set res {}
    set files [$tree get root files]
    while 1 {
	set s [dict create \







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







357
358
359
360
361
362
363
















































364
365
366
367
368
369
370
	$tree set $switch path $path

	set level [$tree get [$tree parent $switch] level]
	$tree set $switch level [expr {$level+1}]
    }
}


















































# Useful when debugging
proc ::fileutil::magic::cgen::stack {tree node} {
    set res {}
    set files [$tree get root files]
    while 1 {
	set s [dict create \
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
	    append result " " C([$tree get $node comp])
	}
	if {[$tree keyexists $node val]} {
	    append result " " V([$tree get $node val])
	}

	if {[$tree keyexists $node otype]} {
	    append result " " [$tree get $node otype]/[$tree get $node save]
	}

	if {$depth == 1} {
	    set msg [$tree get $node desc]
	    set n $node
	    while {($n != {}) && ($msg == "")} {
		set n [lindex [$tree children $n] 0]







|







412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
	    append result " " C([$tree get $node comp])
	}
	if {[$tree keyexists $node val]} {
	    append result " " V([$tree get $node val])
	}

	if {[$tree keyexists $node otype]} {
	    append result " " [$tree get $node otype]
	}

	if {$depth == 1} {
	    set msg [$tree get $node desc]
	    set n $node
	    while {($n != {}) && ($msg == "")} {
		set n [lindex [$tree children $n] 0]
446
447
448
449
450
451
452


453
454
455
456
457
458
459
460
461
462
463
464


465
466
467
468
469
470
471
472
473
474
475
476
477


478

479
480
481

482

483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512




513
514
515
516
517
518
519
520
521
522
523


524
525











526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545






546







547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571


572
573
574


575

576
577
578
579
580






581
582
583
584
585










586
587


588
589


590
591
592
593
594
595

596

597
598
599
600

601
602






603
604
605

606


607
608

609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625


626
627
628
629
630
631
632
633
634
635
636
637
638








639
640
641
642
643
644
645
646
647
648
	#append result " <" [$tree getall $node] >
	append result \n
    }
    return $result
}

proc ::fileutil::magic::cgen::treegen {tree node} {


    variable ::fileutil::magic::rt::typemap

    set result {} 
    set otype [$tree get $node otype]
    set level [$tree get $node level]

    set indent \n[string repeat \t [expr {$level > 0 ? $level-1 : 0}]]

    # Generate code for each node per its type.

    switch $otype {
	A {


	    set file [$tree get $node file]
	    set val [$tree get $node val]
	    if {[dict exists named $file$val]} {
		return -code error [list {name already exists} $file $val]
	    }
	    set aresult {}
	    foreach child [$tree children $node] {
		lappend aresult [treegen $tree $child]
	    }
	    set named [$tree get root named]
	    dict set named $file $val [join $aresult \n]
	    $tree set root named $named
	    return


	}

	U {
	    set file [$tree get $node file]
	    set val [$tree get $node val]

	    append result "U [list $file] [list $val]\n" 

	}
	N -
	S {
	    set names {type mod mand testinvert compinvert comp val desc kill save path}
	    foreach name $names {
		set $name [$tree get $node $name]
	    }

	    set o [GenerateOffset $tree $node]

	    if {$val eq {}} {
		# If the value is the empty string, armor it.  Otherwise, it's
		# already been armored.
		set val [list $val]
	    }

	    if {$otype eq {N}} {
		if {$kill} {
		    # We have to save field data for relative adressing under this
		    # leaf.
		    set type [list Nx $type]
		} else {
		    # Regular fetching of information.
		    set type [list N $type]
		}
		# $type and $o are expanded via substitution 
		append result "${indent}if \{\[$type $o [list $testinvert] [
		    list $compinvert] [list $mod] [list $mand] [
		    list $comp] $val\]\} \{>\n"
	    } elseif {$otype eq {S}} {




		switch $comp {
		    == {set comp eq}
		    != {set comp ne}
		}
		if {$kill} {
		    set type [list Sx $type]
		} else {
		    set type [list S $type]
		}
		append result "${indent}if \{\[$type $o [list $testinvert] [
		    list $mod] [list $mand] [list $comp] $val\]\} \{>\n"


	    }












	    if {[$tree isleaf $node] && $desc ne {}} {
		append result "${indent}emit [list $desc]"
	    } else {
		if {$desc ne {}} {
		    append result "${indent}emit [list $desc]\n"
		}
		foreach child [$tree children $node] {
		    append result [treegen $tree $child]
		}
		#append result "\nreturn \$result"
	    }

	    if {[$tree keyexists $node ext_mime]} {
		append result "${indent}mime [$tree get $node ext_mime]\n"
	    }

	    if {[$tree keyexists $node ext_ext]} {
		append result "${indent}ext [$tree get $node ext_ext]\n"
	    }







	    append result "\n<\}\n"







	}
	Root {
	    foreach child [$tree children $node] {
		lappend result [treegen $tree $child]
		if {[lindex $result end] eq {}} {
		    set result [lreplace $result[set result {}] end end]
		}
	    }
	}
	Switch {
	    set names {o type compinvert mod mand kill save}
	    foreach name $names {
		set $name [$tree get $node $name]
	    }
	    set o [GenerateOffset $tree $node]

	    if {$kill} {
		set fetch Nvx
	    } else {
		set fetch Nv
	    }

	    append fetch " $type $o [list $compinvert] [list $mod] [list $mand]"
	    append result "${indent}switch -- \[$fetch\] "



	    set scan [lindex $typemap($type) 1]

	    set ckilled 0


	    foreach child [$tree children $node] {

		# See ::fileutil::magic::rt::rtscan
		if {$scan eq {me}} {
		    set scan I
		}







		# get value in binary form, then back to numeric
		# this avoids problems with sign, as both values are
		# [binary scan]-converted identically
		binary scan [binary format $scan [$tree get $child val]] $scan val











		append result "$val \{>;"



		set desc [$tree get $child desc]
		if {[$tree isleaf $child] && $desc ne {}} {


		    append result "emit [list [$tree get $child desc]]"
		} else {
		    if {$desc ne {}} {
			append result "emit [list [$tree get $child desc]]\n"
		    }
		    foreach grandchild [$tree children $child] {

			append result [treegen $tree $grandchild]

		    }
		}
		if {[$tree keyexists $child ext_mime]} {
		    append result "${indent}mime [$tree get $child ext_mime]\n"

		}







		if {[$tree keyexists $child ext_ext]} {
		    append result "${indent}ext [$tree get $child ext_ext]\n"
		}




		append result ";<\} "
	    }

	    append result "\n"
	}
    }
    return $result
}

proc ::fileutil::magic::cgen::GenerateOffset {tree node} {
    # Examples:
    # direct absolute:     45      -> 45
    # direct relative:    &45      -> [R 45]
    # indirect absolute:  (45.s+1) -> [I 45 s + 0 1]
    # indirect absolute (indirect offset):  (45.s+(1)) -> [I 45 s + 1 1]
    # relative indirect absolute:  &(45.s+1) -> [R [I 45 s + 0 1]]
    # relative indirect absolute (indirect offset):  &(45.s+(1)) -> [R [I 45 s + 1 1]]
    # indirect relative: (&45.s+1) -> [I [R 45] s op 0 1]
    # relative indirect relative: &(&45.s+1) -> [R [I [R 45] s + 0 1]]
    # relative indirect relative: &(&45.s+(1)) -> [R [I [R 45] s + 1 1]]



    foreach v {o rel ind ir it ioi iir ioo io} {
	set $v [$tree get $node $v]
    }

    #foreach v {ind rel base itype iop ioperand iindir idelta} {
    #    set $v [$tree get $node $v]
    #}

    if {$ind} {
	if {$ir} {set o "\[R $o]"}
	set o "\[I $o [list $it] [list $ioi] [list $ioo] [list $iir] [list $io]\]"
    }








    if {$rel} {
	set o "\[R $o\]"
    }
    
    return $o
}

# ### ### ### ######### ######### #########
## Ready for use.
# EOF







>
>






<
<




>
>
|
|
|
|
|
|
|
|
|
|
|
|
|
>
>
|
>



>
|
>

|
<
|












|
|
<
<
<
<
<

<
|
|
|
|
<
>
>
>
>
|
|
|
|
|
<
<

|
|
|
>
>
|

>
>
>
>
>
>
>
>
>
>
>
|
|
|
|
|
|
|
|
|
|
|

|
|
|

|
|
|

>
>
>
>
>
>
|
>
>
>
>
>
>
>










|





<
<
<
|
<


|

>
>
|

<
>
>
|
>
|
|
|
|

>
>
>
>
>
>
|
|
|
|

>
>
>
>
>
>
>
>
>
>
|

>
>
|
|
>
>
|
<
|
|
|
|
>
|
>
|
|
|
|
>
|

>
>
>
>
>
>
|
|
|
>
|
>
>
|
|
>
|
















>
>













>
>
>
>
>
>
>
>










437
438
439
440
441
442
443
444
445
446
447
448
449
450
451


452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482

483
484
485
486
487
488
489
490
491
492
493
494
495
496
497





498

499
500
501
502

503
504
505
506
507
508
509
510
511


512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580



581

582
583
584
585
586
587
588
589

590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628

629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
	#append result " <" [$tree getall $node] >
	append result \n
    }
    return $result
}

proc ::fileutil::magic::cgen::treegen {tree node} {
    variable indent
    variable innamed
    variable ::fileutil::magic::rt::typemap

    set result {} 
    set otype [$tree get $node otype]
    set level [$tree get $node level]



    # Generate code for each node per its type.

    switch $otype {
	A {
	    incr innamed
	    try  {
		set file [$tree get $node file]
		set val [$tree get $node val]
		if {[dict exists named $file$val]} {
		    return -code error [list {name already exists} $file $val]
		}
		set aresult {}
		foreach child [$tree children $node] {
		    lappend aresult [treegen $tree $child]
		}
		set named [$tree get root named]
		dict set named $file $val [join $aresult \n]
		$tree set root named $named
		return
	    } finally {
		incr innamed -1
	    }
	}
	U {
	    set file [$tree get $node file]
	    set val [$tree get $node val]
	    # generateOffset is expanded via subsitution
	    append result "${indent}U [list $file] [list $val] [
		GenerateOffset $tree $node]\n" 
	}
	N - S - D {

	    set names {type mod mand testinvert compinvert comp val desc path}
	    foreach name $names {
		set $name [$tree get $node $name]
	    }

	    set o [GenerateOffset $tree $node]

	    if {$val eq {}} {
		# If the value is the empty string, armor it.  Otherwise, it's
		# already been armored.
		set val [list $val]
	    }

	    switch $otype {
		N {





		    set type [list N $type]

		    # $type and $o are expanded via substitution 
		    append result "${indent}if \{\[$type $o [list $testinvert] [
			list $compinvert] [list $mod] [list $mand] [
			list $comp] $val\]\} \{\n"

			MoreIndent
			append result "${indent}>\n"
		}
		S {
		    switch $comp {
			== {set comp eq}
			!= {set comp ne}
		    }



		    set type [list S $type]

		    append result "${indent}if \{\[$type $o [list $testinvert] [
			list $mod] [list $mand] [list $comp] $val\]\} \{\n"
			MoreIndent
			append result "${indent}>\n"
		}

		D {
		    set type [list D]
		    append result "${indent}if \{\[$type $o]\} \{\n" 
			MoreIndent
			append result "${indent}>\n"

		}
	    }

	    MoreIndent

		if {[$tree isleaf $node] && $desc ne {}} {
		    append result "${indent}emit [list $desc]\n"
		} else {
		    if {$desc ne {}} {
			append result "${indent}emit [list $desc]\n"
		    }
		    foreach child [$tree children $node] {
			append result [treegen $tree $child]\n
		    }
		    #append result "\nreturn \$result"
		}

		if {[$tree keyexists $node ext_mime]} {
		    append result "${indent}mime [list [$tree get $node ext_mime]]\n"
		}

		if {[$tree keyexists $node ext_ext]} {
		    append result "${indent}ext [list [$tree get $node ext_ext]]\n"
		}

		if {[$tree keyexists $node ext_strength]} {
		    append result "${indent}strength [list [$tree get $node ext_strength]]\n"
		}

	    LessIndent

	    append result ${indent}<\n
	    LessIndent
	    append result ${indent}\}\n
	}
	T {
	    set o [GenerateOffset $tree $node]
	    set mod [$tree get $node mod]
	    append result "${indent}T $o\n"
	}
	Root {
	    foreach child [$tree children $node] {
		lappend result [treegen $tree $child]
		if {[lindex $result end] eq {}} {
		    set result [lreplace $result[set result {}] end end]
		}
	    }
	}
	Switch {
	    set names {o type compinvert mod mand}
	    foreach name $names {
		set $name [$tree get $node $name]
	    }
	    set o [GenerateOffset $tree $node]




	    set fetch Nv


	    append fetch " $type $o [list $compinvert] [list $mod] [list $mand]"
	    append result "${indent}switch \[$fetch\] \{\n"

	    MoreIndent

		set scan [lindex $typemap($type) 1]


		foreach child [lsort -command [
		    list ::fileutil::magic::cgen::switchNSort $tree] [
			$tree children $node]] {

		    # See ::fileutil::magic::rt::rtscan
		    if {$scan eq {me}} {
			set scan I
		    }

		    set val [$tree get $child val]

		    if {[string match 0* $val]} {
			set val 0o$val
		    }

		    # get value in binary form, then back to numeric
		    # this avoids problems with sign, as both values are
		    # [binary scan]-converted identically
		    binary scan [binary format $scan $val] $scan val

		    if {[info exists lastval] && $lastval != $val} {
			LessIndent
			append result "${indent}\}\n"
		    } 

		    if {![info exists lastval] || $lastval != $val} {
			append result "${indent}$val \{\n"
			MoreIndent
		    }

		    append result "${indent}>\n"

		    MoreIndent

			set desc [$tree get $child desc]

			# emit, mime, and ext come first so that they are
			# picked up when child nodes produce results


			if {$desc ne {}} {
			    append result "${indent}emit [list $desc]\n"
			}

			if {[$tree keyexists $child ext_mime]} {
			    append result "${indent}mime [list [
				$tree get $child ext_mime]]\n"
			}

			if {[$tree keyexists $child ext_ext]} {
			    append result "${indent}ext [list [
				$tree get $child ext_ext]]\n"
			}

			if {![$tree isleaf $child]} {
			    foreach grandchild [$tree children $child] {
				append result [treegen $tree $grandchild]\n
			    }
			}
		    LessIndent

		    append result "${indent}<\n"

		    set lastval $val
		}

	    LessIndent
	    append result "${indent}\}\n"

	    LessIndent
	    append result "${indent}\}\n"
	}
    }
    return $result
}

proc ::fileutil::magic::cgen::GenerateOffset {tree node} {
    # Examples:
    # direct absolute:     45      -> 45
    # direct relative:    &45      -> [R 45]
    # indirect absolute:  (45.s+1) -> [I 45 s + 0 1]
    # indirect absolute (indirect offset):  (45.s+(1)) -> [I 45 s + 1 1]
    # relative indirect absolute:  &(45.s+1) -> [R [I 45 s + 0 1]]
    # relative indirect absolute (indirect offset):  &(45.s+(1)) -> [R [I 45 s + 1 1]]
    # indirect relative: (&45.s+1) -> [I [R 45] s op 0 1]
    # relative indirect relative: &(&45.s+1) -> [R [I [R 45] s + 0 1]]
    # relative indirect relative: &(&45.s+(1)) -> [R [I [R 45] s + 1 1]]

    variable innamed

    foreach v {o rel ind ir it ioi iir ioo io} {
	set $v [$tree get $node $v]
    }

    #foreach v {ind rel base itype iop ioperand iindir idelta} {
    #    set $v [$tree get $node $v]
    #}

    if {$ind} {
	if {$ir} {set o "\[R $o]"}
	set o "\[I $o [list $it] [list $ioi] [list $ioo] [list $iir] [list $io]\]"
    }

    # spec
    #   named instance direct offsets are relative to the offset of the
    #   previous matched entry
    if {$innamed} {
	set o "\[O $o]"
    }

    if {$rel} {
	set o "\[R $o\]"
    }
    
    return $o
}

# ### ### ### ######### ######### #########
## Ready for use.
# EOF

Changes to modules/fumagic/filetypes.tcl.

more than 10,000 changes

Changes to modules/fumagic/filetypes.test.

30
31
32
33
34
35
36

37
38
39
40
41

42
43
44
45
46
47
48
49

50
51
52
53
54
55
56

57
58
59
60
61
62
63

64
65
66
67
68
69
70

71
72
73
74
75
76
77

78
79
80
81
82
83
84

85
86
87
88
89
90
91

92
93
94
95
96
97
98

99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117

118
119
120
121
122
123
124

125
126
127
128
129
130
131
132

133
134
135
136
137
138
139

140
141
142
143
144
145
146

147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163

164
165
166
167
168
169
170

171
172
173
174
175
176
177

178
179
180
181
182
183
184

185
186
187
188
189
190
191

192
193
194
195
196
197
198

199
200
201
202
203
204
205

206
207
208
209
210
211
212
213

214
215
216
217
218
219
220


221
222
223
224
225
226
227

228
229
230
231
232
233
234
235










































236
237
238
239
240
241
242
243
244
245

246
247
248
249
250
251
252
253
254









255
256
257
}

# -------------------------------------------------------------------------
# Now the package specific tests....

set path [makeFile {} bogus]
removeFile bogus


test fumagic.filetype-1.1 {test file non-existance} {
    set res [catch {fileutil::magic::filetype $path} msg]
    list $res $msg
} [list 1 "file not found: \"$path\""]


test fumagic.filetype-1.2 {test file directory} {
    set f [makeDirectory fileTypeTest]
    set res [catch {fileutil::magic::filetype $f} msg]
    regsub {file[0-9]+} $msg {fileXXX} msg
    removeDirectory fileTypeTest
    list $res $msg
} {0 {directory application/x-directory {}}}


test fumagic.filetype-1.3 {test file empty} {
    set f [makeEmptyFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeEmptyFile
    list $res $msg
} {0 {}}


test fumagic.filetype-1.4 {test simple binary} {
    set f [makeBinFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeBinFile
    list $res $msg
} {0 {}}


test fumagic.filetype-1.5 {test elf executable} {
    set f [makeElfFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeElfFile
    list $res $msg
} {0 {{ELF 32-bit LSB executable, (SYSV)} {application x-executable} {}}}


test fumagic.filetype-1.6 {test simple text} {
    set f [makeTextFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeTextFile
    list $res $msg
} {0 {}}


test fumagic.filetype-1.7 {test script file} {
    set f [makeScriptFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeScriptFile
    list $res $msg
} {0 {{a {/bin/tclsh script text executable}} {} {}}}


test fumagic.filetype-1.8 {test html text} {
    set f [makeHtmlFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeHtmlFile
    list $res $msg
} {0 {{{HTML document text}} {text html} {}}}


test fumagic.filetype-1.9 {test xml text} {
    set f [makeXmlFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeXmlFile
    list $res $msg
} {0 {{XML {1.0 document text}} {text xml} {}}}


test fumagic.filetype-1.10 {test xml with dtd text} {
    set f [makeXmlDTDFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeXmlDTDFile
    list $res $msg
} {0 {{XML {1.0 document text}} {text xml} {}}}


test fumagic.filetype-1.11 {
	test PGP message. Their are multiple matches, and the longest match should
	carry greater weight, and thus be the one returned.  If the match is "PGP
	armored data message", this isn't happening.
} {
    set f [makePGPFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removePGPFile
    list $res $msg
} {0 {{{PGP message}} {application pgp} {}}}


test fumagic.filetype-1.12.0 {test binary graphic jpeg} {
    set f [makeJpegFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeJpegFile
    list $res $msg
} {0 {{{JPEG image data, JFIF standard 1.02, resolution (DPI), density 300x316, segment length 16}} {image jpeg} {jpeg jpg jpe jfif}}}


#the result should actually be 128x112, but current magic files indicate "byte" instead of "ubyte"
test fumagic.filetype-1.12.1 {test binary graphic jpeg} {
    set f [makeJpeg2File]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeJpeg2File
    list $res $msg
} {0 {{{JPEG image data, JFIF standard 1.02, resolution (DPI), density 300x316, segment length 16, thumbnail -128x112}} {image jpeg} {jpeg jpg jpe jfif}}}


test fumagic.filetype-1.13 {test binary graphic gif} {
    set f [makeGifFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeGifFile
    list $res $msg
} {0 {{{GIF image data, version 89a,} {43 x} 64} {image gif} {}}}


test fumagic.filetype-1.14 {test binary graphic png} {
    set f [makePngFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removePngFile
    list $res $msg
} {0 {{{PNG image data, 0 x} 0, 0-bit} {image png} {}}}


#{To do} {implement a "wild guess" mode}
#test fumagic.filetype-1.14.1 {test binary graphic png} {
#    set f [makePngFile]
#    set res [catch {fileutil::magic::filetype $f} msg]
#    removePngFile
#    list $res $msg
#} {0 {PNG image data, CORRUPTED, PNG image data, CORRUPTED}}

# The file doesn't really provide a direntries value, so not sure what the
# result means here, but any number is good enough for this test.
test fumagic.filetype-1.15 {test binary graphic tiff} {
    set f [makeTiffFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeTiffFile
    list $res $msg
} {0 {{{TIFF image data, big-endian, direntries=19789}} {image tiff} {}}}


test fumagic.filetype-1.16 {test binary pdf} {
    set f [makePdfFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removePdfFile
    list $res $msg
} {0 {{{PDF document, version 1.2}} {application pdf} {}}}


test fumagic.filetype-1.17 {test text ps} {
    set f [makePSFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removePSFile
    list $res $msg
} {0 {{{PostScript document text}} {application postscript} {}}}


test fumagic.filetype-1.18 {test text eps} {
    set f [makeEPSFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeEPSFile
    list $res $msg
} {0 {{{PostScript document text}} {application postscript} {}}}


test fumagic.filetype-1.19 {test binary gravity_wave_data_frame} {
    set f [makeIgwdFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeIgwdFile
    list $res $msg
} {0 {}}


test fumagic.filetype-1.20 {test binary compressed bzip} {
    set f [makeBzipFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeBzipFile
    list $res $msg
} {0 {{{bzip2 compressed data, block size = 900k}} {application x-bzip2} {}}}


test fumagic.filetype-1.21 {test binary compressed gzip} {
    set f [makeGzipFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeGzipFile
    list $res $msg
} {0 {{{gzip compressed data, reserved method, ASCII, last modified: 1}} {application x-gzip} {}}}


test fumagic.filetype-1.22 {test pstring} {
    set f [makeWsdlFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeWsdlFile
    list $res $msg
} {0 {{{PHP WSDL cache,} {version 0x03, created 7, uri: "hello", source: "some source", target_ns: "and a target"}} {} {}}}
 

test fumagic.filetype-1.23 {regular expressions} {
    set f [makeCSourceFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeCSourceFile
    list $res $msg
} {0 {{{C source text}} {text x-c} {}}}



test fumagic.filetype-1.24 {ustring} {
    set f [makeXzFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeXzFile
    list $res $msg
} {0 {{{XZ compressed data}} {application x-xz} {}}}


test fumagic.filetype-1.25 {
    tests negative relative offsets 
} {
    set f [makePdf2File]
    set res [catch {fileutil::magic::filetype $f} msg]
    removePdf2File
    list $res $msg
} {0 {{{PDF document, version 1.3}} {application pdf} {}}}











































test fumagic.filetype-1.26 {
    Tests comparisons against the empty string when a file is malformed or
    missing data at specified offsets.
} {
    set f [makePeFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removePeFile
    list $res $msg
} {0 {{{MS-DOS executable}} {application x-dosexec} {}}}


test fumagic.filetype-1.27 {
    Tests indirect offsets, as well as the "default" test type. 
} {
    set f [makePe2File]
    set res [catch {fileutil::magic::filetype $f} msg]
    removePe2File
    list $res $msg
} {0 {{{PE32 executable} (GUI) {Intel 80386, for MS Windows}} {application x-dosexec} {}}}










testsuiteCleanup
return







>





>








>






|
>






|
>






|
>






|
>







>







>







>



















>







>








>







>







>









|
<





|
>







>







>







>






|
>







>






|
>








>







>
>







>

|






>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>










>








|
>
>
>
>
>
>
>
>
>



30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171

172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
}

# -------------------------------------------------------------------------
# Now the package specific tests....

set path [makeFile {} bogus]
removeFile bogus


test fumagic.filetype-1.1 {test file non-existance} {
    set res [catch {fileutil::magic::filetype $path} msg]
    list $res $msg
} [list 1 "file not found: \"$path\""]


test fumagic.filetype-1.2 {test file directory} {
    set f [makeDirectory fileTypeTest]
    set res [catch {fileutil::magic::filetype $f} msg]
    regsub {file[0-9]+} $msg {fileXXX} msg
    removeDirectory fileTypeTest
    list $res $msg
} {0 {directory application/x-directory {}}}


test fumagic.filetype-1.3 {test file empty} {
    set f [makeEmptyFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeEmptyFile
    list $res $msg
} {0 {empty {} {}}}


test fumagic.filetype-1.4 {test simple binary} {
    set f [makeBinFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeBinFile
    list $res $msg
} {0 {{} {} {}}}


test fumagic.filetype-1.5 {test elf executable} {
    set f [makeElfFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeElfFile
    list $res $msg
} {0 {{ELF 32-bit LSB executable, {*unknown arch 0x0*} (SYSV)} {application x-executable} {}}}


test fumagic.filetype-1.6 {test simple text} {
    set f [makeTextFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeTextFile
    list $res $msg
} {0 {{} {} {}}}


test fumagic.filetype-1.7 {test script file} {
    set f [makeScriptFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeScriptFile
    list $res $msg
} {0 {{a {/bin/tclsh script text executable}} {} {}}}


test fumagic.filetype-1.8 {test html text} {
    set f [makeHtmlFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeHtmlFile
    list $res $msg
} {0 {{{HTML document text}} {text html} {}}}


test fumagic.filetype-1.9 {test xml text} {
    set f [makeXmlFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeXmlFile
    list $res $msg
} {0 {{XML {1.0 document text}} {text xml} {}}}


test fumagic.filetype-1.10 {test xml with dtd text} {
    set f [makeXmlDTDFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeXmlDTDFile
    list $res $msg
} {0 {{XML {1.0 document text}} {text xml} {}}}


test fumagic.filetype-1.11 {
	test PGP message. Their are multiple matches, and the longest match should
	carry greater weight, and thus be the one returned.  If the match is "PGP
	armored data message", this isn't happening.
} {
    set f [makePGPFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removePGPFile
    list $res $msg
} {0 {{{PGP message}} {application pgp} {}}}


test fumagic.filetype-1.12.0 {test binary graphic jpeg} {
    set f [makeJpegFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeJpegFile
    list $res $msg
} {0 {{{JPEG image data, JFIF standard 1.02, resolution (DPI), density 300x316, segment length 16}} {image jpeg} {jpeg jpg jpe jfif}}}


#the result should actually be 128x112, but current magic files indicate "byte" instead of "ubyte"
test fumagic.filetype-1.12.1 {test binary graphic jpeg} {
    set f [makeJpeg2File]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeJpeg2File
    list $res $msg
} {0 {{{JPEG image data, JFIF standard 1.02, resolution (DPI), density 300x316, segment length 16, thumbnail -128x112}} {image jpeg} {jpeg jpg jpe jfif}}}


test fumagic.filetype-1.13 {test binary graphic gif} {
    set f [makeGifFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeGifFile
    list $res $msg
} {0 {{{GIF image data, version 89a,} {43 x} 64} {image gif} {}}}


test fumagic.filetype-1.14 {test binary graphic png} {
    set f [makePngFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removePngFile
    list $res $msg
} {0 {{{PNG image data, 0 x} 0, 0-bit} {image png} {}}}


#{To do} {implement a "wild guess" mode}
#test fumagic.filetype-1.14.1 {test binary graphic png} {
#    set f [makePngFile]
#    set res [catch {fileutil::magic::filetype $f} msg]
#    removePngFile
#    list $res $msg
#} {0 {PNG image data, CORRUPTED, PNG image data, CORRUPTED}}



test fumagic.filetype-1.15 {test binary graphic tiff} {
    set f [makeTiffFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeTiffFile
    list $res $msg
} {0 {{{TIFF image data, big-endian, direntries=0}} {image tiff} {}}}


test fumagic.filetype-1.16 {test binary pdf} {
    set f [makePdfFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removePdfFile
    list $res $msg
} {0 {{{PDF document, version 1.2}} {application pdf} {}}}


test fumagic.filetype-1.17 {test text ps} {
    set f [makePSFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removePSFile
    list $res $msg
} {0 {{{PostScript document text}} {application postscript} {}}}


test fumagic.filetype-1.18 {test text eps} {
    set f [makeEPSFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeEPSFile
    list $res $msg
} {0 {{{PostScript document text}} {application postscript} {}}}


test fumagic.filetype-1.19 {test binary gravity_wave_data_frame} {
    set f [makeIgwdFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeIgwdFile
    list $res $msg
} {0 {{} {} {}}}


test fumagic.filetype-1.20 {test binary compressed bzip} {
    set f [makeBzipFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeBzipFile
    list $res $msg
} {0 {{{bzip2 compressed data, block size = 900k}} {application x-bzip2} {}}}


test fumagic.filetype-1.21 {test binary compressed gzip} {
    set f [makeGzipFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeGzipFile
    list $res $msg
} {0 {{{gzip compressed data, reserved method, ASCII, original size 16878367}} {application x-gzip} {}}}


test fumagic.filetype-1.22 {test pstring} {
    set f [makeWsdlFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeWsdlFile
    list $res $msg
} {0 {{{PHP WSDL cache,} {version 0x03, created 7, uri: "hello", source: "some source", target_ns: "and a target"}} {} {}}}
 

test fumagic.filetype-1.23 {regular expressions} {
    set f [makeCSourceFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeCSourceFile
    list $res $msg
} {0 {{{C source text}} {text x-c} {}}}


# XZ is the one format whose magic record is of type "ustring"
test fumagic.filetype-1.24 {ustring} {
    set f [makeXzFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removeXzFile
    list $res $msg
} {0 {{{XZ compressed data}} {application x-xz} {}}}


test fumagic.filetype-1.25 {
    tests negative relative offsets
} {
    set f [makePdf2File]
    set res [catch {fileutil::magic::filetype $f} msg]
    removePdf2File
    list $res $msg
} {0 {{{PDF document, version 1.3}} {application pdf} {}}}


test fumagic.filetype-1.25.1 {
    matches and strengths
} {
    set f [makePdf2File]

    set chan [open $f]

    set matches {}

    try {
        file stat $f stats
        set finfo [array get stats]
        dict set finfo name $f

        set coro [coroutine [info cmdcount] \
            ::fileutil::magic::rt::new $finfo $chan \
		$::fileutil::magic::filetype::named [
        	list [namespace which ::fileutil::magic::filetype::analyze]]]
        while 1 {
            lassign [$coro] weight result mimetype ext 
            dict update matches $weight weight {
        	lappend weight [list $result $mimetype $ext]
            }
        }
    } finally {
        close $chan
    }

    removePdf2File
    return $matches
} [list \
    5.0  [
	list [
	    list [
		list {tar archive (V7), type} '\0' \
		    {%PDF-1.3, mode 5 , uid ndobj, gid  xref, size 870 00000 n, seconds  xref, linkname  xref, comment: }
	] {application x-tar} tar]
] \
    66.0 {{{{PDF document, version 1.3}} {application pdf} {}} {{{PDF document, version 1.3}} {application pdf} {}}}]


test fumagic.filetype-1.26 {
    Tests comparisons against the empty string when a file is malformed or
    missing data at specified offsets.
} {
    set f [makePeFile]
    set res [catch {fileutil::magic::filetype $f} msg]
    removePeFile
    list $res $msg
} {0 {{{MS-DOS executable}} {application x-dosexec} {}}}


test fumagic.filetype-1.27 {
    Tests indirect offsets, as well as the "default" test type. 
} {
    set f [makePe2File]
    set res [catch {fileutil::magic::filetype $f} msg]
    removePe2File
    list $res $msg
} {0 {{{PE32 executable} {Unknown PE signature} 0x10ba (GUI) {Intel 80386, for MS Windows}} {application x-dosexec} {}}}

if 0 {
	to do

	ebml and webm both have a belong at 440786851

		make sure this is handled correctly
}


testsuiteCleanup
return

Changes to modules/fumagic/fumagic.testsupport.

36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
	Bin    "\u0000" \
	Elf    [cat "\x7F" "ELF" "\x01\x01\x01\x00\x00" "\x00\x00\x00\x00\x00\x00\x00" "\x02\x00"] \
	Bzip   "BZh91AY&SY\x01\x01\x01\x00\x00" \
	Gzip   "\x1f\x8b\x01\x01\x01\x00\x00" \
	Jpeg   [cat "\xFF\xD8\xFF\xE0\x00\x10JFIF" "\x00\x01\x02\x01\x01\x2c\x01\x3c"] \
	Jpeg2   [cat "\xFF\xD8\xFF\xE0\x00\x10JFIF" "\x00\x01\x02\x01\x01\x2c\x01\x3c\x80\x70"] \
	Gif    "GIF89a\x2b\x00\x40\x00\xf7\xff\x00" \
	Png    "\x89PNG\x0D\x0A\x1A\x0A" \
	PngMalformed "\x89PNG\x00\x01\x02\x01\x01\x2c" \
	Tiff   "MM\x00\*\x00\x01\x02\x01\x01\x2c" \
	Pdf    "%PDF-1.2 \x00\x01\x02\x01\x01\x2c" \
	Pdf2   {%PDF-1.3 %âãÏÓ
25 0 obj <<  /Linearized 1  /O 29  /H [ 1948 443 ]  /L 64573  /E 41907  /N 3  /T 63955  >>  endobj                                                           xref 25 67  0000000016 00000 n
0000001687 00000 n
0000001800 00000 n







|







36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
	Bin    "\u0000" \
	Elf    [cat "\x7F" "ELF" "\x01\x01\x01\x00\x00" "\x00\x00\x00\x00\x00\x00\x00" "\x02\x00"] \
	Bzip   "BZh91AY&SY\x01\x01\x01\x00\x00" \
	Gzip   "\x1f\x8b\x01\x01\x01\x00\x00" \
	Jpeg   [cat "\xFF\xD8\xFF\xE0\x00\x10JFIF" "\x00\x01\x02\x01\x01\x2c\x01\x3c"] \
	Jpeg2   [cat "\xFF\xD8\xFF\xE0\x00\x10JFIF" "\x00\x01\x02\x01\x01\x2c\x01\x3c\x80\x70"] \
	Gif    "GIF89a\x2b\x00\x40\x00\xf7\xff\x00" \
	Png    "\x89PNG\x0D\x0A\x1A\x0A\x00\x00\x00\x0DIHDR" \
	PngMalformed "\x89PNG\x00\x01\x02\x01\x01\x2c" \
	Tiff   "MM\x00\*\x00\x01\x02\x01\x01\x2c" \
	Pdf    "%PDF-1.2 \x00\x01\x02\x01\x01\x2c" \
	Pdf2   {%PDF-1.3 %âãÏÓ
25 0 obj <<  /Linearized 1  /O 29  /H [ 1948 443 ]  /L 64573  /E 41907  /N 3  /T 63955  >>  endobj                                                           xref 25 67  0000000016 00000 n
0000001687 00000 n
0000001800 00000 n

Changes to modules/fumagic/rtcore.man.

15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31

32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57

58
59
60


61

62
63
64
65


66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108


109
110
111
112
113
114
115
116
117
118

119
120
121

122
123

124

125
126
127
128
129
130
131


132
133
134
135
136
137
138
139
140
141

142
143
144


145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203

204
205
206
207
208
209

210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
[require Tcl 8.5]
[require fileutil::magic::rt [opt [vset VERSION]]]
[description]
[para]

This package provides the runtime core for file type recognition
engines written in pure Tcl and is thus used by all other packages in
this module, i.e. the two frontend packages
[package fileutil::magic::mimetypes] and

[package fileutil::magic::filetypes], and the two engine compiler
packages [package fileutil::magic::cgen] and
[package fileutil::magic::cfront].

[section COMMANDS]

[list_begin definitions]


[call [cmd ::fileutil::magic::rt::>]] 

Shorthand for [cmd {incr level}].

[call [cmd ::fileutil::magic::rt::<]] 

Shorthand for [cmd {incr level -1}].

[call [cmd ::fileutil::magic::rt::open] [arg filename]]

This command initializes the runtime and prepares the file
[arg filename] for use by the system.

This command has to be invoked first, before any other command of this
package.

[para]

The command returns the channel handle of the opened file as its
result.

[call [cmd ::fileutil::magic::rt::close]]

This command closes the last file opened via
[cmd ::fileutil::magic::rt::open] and shuts the runtime down.


This command has to be invoked last, after the file has been dealt
with completely.




Afterward another invokation of [cmd ::fileutil::magic::rt::open]  is
required to process another file.

[para]



This command returns the empty string as its result.

[call [cmd ::fileutil::magic::rt::file_start] [arg name]]

This command marks the start of a magic file when debugging. It
returns the empty string as its result.

[call [cmd ::fileutil::magic::rt::result] [opt [arg msg]]]

This command returns the current result and stops processing.

[para]

If [arg msg] is specified its text is added to the result before it is
returned. See [cmd ::fileutil::magic::rt::emit] for the allowed
special character sequences.

[call [cmd ::fileutil::magic::rt::resultv] [opt [arg msg]]]

This command returns the current result.

In contrast to [cmd ::fileutil::magic::rt::result] processing
continues.

[para]

If [arg msg] is specified its text is added to the result before it is
returned. See [cmd ::fileutil::magic::rt::emit] for the allowed
special character sequences.

[call [cmd ::fileutil::magic::rt::emit] [arg msg]]

This command adds the text [arg msg] to the result buffer. The
message may contain the following special character sequences. They
will be replaced with buffered values before the message is added to
the result. The command returns the empty string as its result.

[list_begin definitions]
[def [const \\b]] This sequence is removed
[def [const %s]]  Replaced with the last buffered string value.
[def [const %ld]] Replaced with the last buffered numeric value.
[def [const %d]]  See above.


[list_end]

[comment [call [cmd ::fileutil::magic::rt::offset] [arg where]]]
[comment {
	Handling of complex offsets. Currently not implemented.
	Always returns zero.
}]

[call [cmd ::fileutil::magic::rt::Nv] [arg type] [arg offset] [opt [arg qual]]]


This command fetches the numeric value with [arg type] from the
absolute location [arg offset] and returns it as its result. The
fetched value is further stored in the numeric buffer.


[para]



If [arg qual] is specified it is considered to be a mask and applied
to the fetched value before it is stored and returned. It has to have
the form of a partial Tcl bit-wise expression, i.e.

[example {
	& number
}]



For example:

[example {
	Nv lelong 0 &0x8080ffff
}]

For the possible types see section [sectref {NUMERIC TYPES}].

[call [cmd ::fileutil::magic::rt::N] [arg type] [arg offset] [arg comp] [arg val] [opt [arg qual]]]


This command behaves mostly like [cmd ::fileutil::magic::rt::Nv],
except that it compares the fetched and masked value against [arg val]


as specified with [arg comp] and returns the result of that
comparison.

[para]

The argument [arg comp] has to contain one of Tcl's comparison
operators, and the comparison made will be

[example {
	<val> <comp> <fetched-and-masked-value>
}]

[para]

The special comparison operator [const x] signals that no comparison
should be done, or, in other words, that the fetched value will always
match [arg val].

[call [cmd ::fileutil::magic::rt::Nvx] [arg type] [arg offset] [opt [arg qual]]]

This command behaves like [cmd ::fileutil::magic::rt::Nv], except that
it additionally remembers the location in the file after the fetch in
the calling context, for the current level, for later use by
[cmd ::fileutil::magic::rt::R].

[call [cmd ::fileutil::magic::rt::Nx] [arg type] [arg offset] [arg comp] [arg val] [opt [arg qual]]]

This command behaves like [cmd ::fileutil::magic::rt::N], except that
it additionally remembers the location in the file after the fetch in
the calling context, for the current, for later use by
[cmd ::fileutil::magic::rt::R].

[call [cmd ::fileutil::magic::rt::S] [arg offset] [arg comp] [arg val] [opt [arg qual]]]

This command behaves like [cmd ::fileutil::magic::rt::N], except that
it fetches and compares strings, not numeric data. The fetched value
is also stored in the internal string buffer instead of the numeric
buffer.

[call [cmd ::fileutil::magic::rt::Sx] [arg offset] [arg comp] [arg val] [opt [arg qual]]]

This command behaves like [cmd ::fileutil::magic::rt::S], except that
it additionally remembers the location in the file after the fetch in
the calling context, for the current level, for later use by
[cmd ::fileutil::magic::rt::R].

[call [cmd ::fileutil::magic::rt::L] [arg newlevel]]

This command sets the current level in the calling context to
[arg newlevel]. The command returns the empty string as its result.

[call [cmd ::fileutil::magic::rt::I] [arg base] [arg type] [arg delta]]

This command handles base locations specified indirectly through the
contents of the inspected file. It returns the sum of [arg delta] and
the value of numeric [arg type] fetched from the absolute location
[arg base].

[para]


For the possible types see section [sectref {NUMERIC TYPES}].

[call [cmd ::fileutil::magic::rt::R] [arg offset]]

This command handles base locations specified relative to the end of

the last field one level above.

[para]

In other words, the command computes an absolute location in the file
based on the relative [arg offset] and returns it as its result. The
base the offset is added to is the last location remembered for the
level in the calling context.

[call [cmd ::fileutil::magic::rt::U] [arg fileindex] [arg name]]

Use a named test script at the current level.

[list_end]

[section {NUMERIC TYPES}]

[list_begin definitions]
[def [const byte]]    8-bit integer







<
<
<
|






>



<
|
<

<

|

<
<
|
<
<

<

<
<
<
|

|
<
>

<
<
>
>

>
|
<

<
>
>

<






<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<













>
>


<
<
<
<
<

|

>
|
|
<
>

<
>

>
|
<
<

<
<
<
>
>

<

<
<
<

<
<
|
>

<
<
>
>
|
<



|
|


|








<

<
<
<
|
|
<

<
<
<
|
<
<
<
<
|
<
<

<

<
<
<
<
<


|


|
<
<
<
<
|

<
>

<



|
>
|



<
<
<
<



|







15
16
17
18
19
20
21



22
23
24
25
26
27
28
29
30
31
32

33

34

35
36
37


38


39

40



41
42
43

44
45


46
47
48
49
50

51

52
53
54

55
56
57
58
59
60






















61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77





78
79
80
81
82
83

84
85

86
87
88
89


90



91
92
93

94



95


96
97
98


99
100
101

102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117

118



119
120

121



122




123


124

125





126
127
128
129
130
131




132
133

134
135

136
137
138
139
140
141
142
143
144




145
146
147
148
149
150
151
152
153
154
155
[require Tcl 8.5]
[require fileutil::magic::rt [opt [vset VERSION]]]
[description]
[para]

This package provides the runtime core for file type recognition
engines written in pure Tcl and is thus used by all other packages in



this module such as [package fileutil::magic::filetype] and the two compiler
packages [package fileutil::magic::cgen] and
[package fileutil::magic::cfront].

[section COMMANDS]

[list_begin definitions]


[call [cmd ::fileutil::magic::rt::>]] 


Increment the level and perform related housekeeping




[call [cmd ::fileutil::magic::rt::<]] 



Decrement the level and perform related housekeeping








[call [cmd ::fileutil::magic::rt::new] [arg chan] [arg named] [arg analyze]]

Create a new command which returns one description of the file each time it is

called, and a code of [arg break] when there are no more descriptions.



[arg chan] is the channel containing the data to describe.  The channel
configuration is then managed as needed.

[arg named] is a dictionary of named tests, as generated by
[cmd fileutil::magic::cfront::compile].



[arg test] is a command prefix for a routine composed of the list of commands
as returned by [cmd fileutil::magic::cfront::compile].



[call [cmd ::fileutil::magic::rt::file_start] [arg name]]

This command marks the start of a magic file when debugging. It
returns the empty string as its result.
























[call [cmd ::fileutil::magic::rt::emit] [arg msg]]

This command adds the text [arg msg] to the result buffer. The
message may contain the following special character sequences. They
will be replaced with buffered values before the message is added to
the result. The command returns the empty string as its result.

[list_begin definitions]
[def [const \\b]] This sequence is removed
[def [const %s]]  Replaced with the last buffered string value.
[def [const %ld]] Replaced with the last buffered numeric value.
[def [const %d]]  See above.
[def [const {${x:...?...}}]] Substitute one string if the file is executable, and
another string otherwise.
[list_end]







[call [cmd ::fileutil::magic::rt::O] [arg where]]

Produce an offset from [arg where], relative to the cursor one level up.



[comment [call [cmd ::fileutil::magic::rt::R] [arg where]]]


Produce an offset from [arg where], relative to the offset one level up.

[call [cmd ::fileutil::magic::rt::Nv] [arg type] [arg offset] [
    arg compinvert] [arg comp] [arg expected]]






A limited form of [cmd ::fileutile::magic::rt::N] that only checks for
equality and can't be told to invert the test.









[call [cmd ::fileutil::magic::rt::N] [arg type] [arg offset] [arg testinvert] [
    arg compinvert] [arg mod] [arg mand] [arg comp] [arg expected]]



Fetch the numeric value with [arg type] from the absolute location
[arg offset], compare it with [arg expected] using [arg comp] as the comparision
operator,  and returns the result.


[para]

The argument [arg comp] must be one of Tcl's comparison
operators.

[example {
	<comp> <fetched-and-masked-value> <comp> <expected>
}]

[para]

The special comparison operator [const x] signals that no comparison
should be done, or, in other words, that the fetched value will always
match [arg val].






[call [cmd ::fileutil::magic::rt::S] [arg type] [arg offset] [arg testinvert] [
    arg mod] [arg mand] [arg comp] [arg val]]





Like [cmd ::fileutil::magic::rt::N] except that it fetches and compares string




types , not numeric data.










[call [cmd ::fileutil::magic::rt::L] [arg newlevel]]

Sets the current level in the calling context to
[arg newlevel]. The command returns the empty string as its result.

[call [cmd ::fileutil::magic::rt::I] [arg offset] [arg it] [arg ioi] [arg ioo] [




    arg iir] [arg io]]


Calculates an offset based on an initial offset and the provided modifiers.



[call [cmd ::fileutil::magic::rt::R] [arg offset]]

Given an initial offset, calculates an offset relative to the cursor at the
next level up. The cursor is the position in the data one character after the
data extracted from the file one level up.

[para]






[call [cmd ::fileutil::magic::rt::U] [arg fileindex] [arg name]]

Add a level and use a named test script.

[list_end]

[section {NUMERIC TYPES}]

[list_begin definitions]
[def [const byte]]    8-bit integer

Changes to modules/fumagic/rtcore.tcl.

1
2
3
4
5
6
7

8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33

34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49

50
51
52
53
54
55
56
57


58

59




60
61






62
63















64
65
66



67
68
69
70



71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120

121

122

123







124
125

126









127











128
129








130
131
132
133
134
135
136
137
138
139
140
141

142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185














186



187
188


189
190




191

192



193



194



195
196


197
198



















199
200
201
202





203














204
205
206

207
208
209

210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
# rtcore.tcl --
#
#	Runtime core for file type recognition engines written in pure Tcl.
#
# Copyright (c) 2016-2017 Poor Yorick     <[email protected]>
# Copyright (c) 2004-2005 Colin McCormack <[email protected]>
# Copyright (c) 2005      Andreas Kupries <[email protected]>

#
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
# 
# RCS: @(#) $Id: rtcore.tcl,v 1.5 2005/09/28 04:51:19 andreas_kupries Exp $

#####
#
# "mime type recognition in pure tcl"
# http://wiki.tcl.tk/12526
#
# Tcl code harvested on:  10 Feb 2005, 04:06 GMT
# Wiki page last updated: ???
#
#####

#TODO  {
#    {Required Functionality} {
#	{implement full offset language} {
#	    done
#
#	    by pooryorick
#
#	    time {2016 06}
#	}
#

#	{implement pstring (pascal string, blerk)} {
#	    done
#
#	    by pooryorick
#
#	    time {2016 06}
#}
#
#	{implement regex form (blerk!)} {
#	    done
#
#	    by pooryorick
#
#	    time {2016 06}
#	}


#	{implement string qualifiers} {
#	    done
#	    
#	    by pooryorick
#
#	    time {2016 06}
#	}
#


#	{finish implementing the indirect type}

#




#	{Maybe distinguish between binary and text tests, like file(n)}
#	






#	{process and use strength directives}
#















#    }
#}




# ### ### ### ######### ######### #########
## Requirements

package require Tcl 8.5




# ### ### ### ######### ######### #########
## Implementation

namespace eval ::fileutil::magic::rt {
    # Configuration flag. (De)activate debugging output.
    # This is done during initialization.
    # Changes at runtime have no effect.

    variable debug 0

    # The maximum size of a substring to inspect from the file in question 
    variable maxstring 64

    # The maximum length of any %s substitution in a resulting description is
    variable maxpstring 64

    variable regexdefaultlen 4096

    # Runtime state.

    variable cursor 0      ; # The current offset
    variable fd     {}     ; # Channel to file under scrutiny
    variable found 0       ; # Whether the last test produced a match
    variable lfound {}     ; # For each level, whether a match was found
    variable level 0
    variable strbuf {}     ; # Input cache [*].
    variable cache         ; # Cache of fetched and decoded numeric
    array set cache {}	   ; # values.
    variable result {}     ; # Accumulated recognition result.
    variable extracted     ; # The value extracted for inspection
    variable  last         ; # Behind last fetch locations,
    array set last {}      ; # per nesting level.
    variable weight 0      ; # The weight of the current part. 
                           ; # Basically string length of the contributing of
			   ; # the potentially-matching part.

    variable weighttotal 0 ; # The aggregate weight of the matching components of
			   ; # the current test.

    # [*] The vast majority of magic strings are in the first 4k of the file.

    # Export APIs (full public, recognizer public)
    namespace export open close file_start result
    namespace export emit ext mime offset Nv N S Nvx Nx Sx L R I resultv U < >
}

# ### ### ### ######### ######### #########
## Public API, general use.


proc ::fileutil::magic::rt::> {} {

    variable level

    incr level







}


proc ::fileutil::magic::rt::< {} {









    variable level











    incr level -1
}









proc ::fileutil::magic::rt::classify {data} {
    set bin_rx {[\x00-\x08\x0b\x0e-\x1f]}
    if {[regexp $bin_rx $data] } {
        return binary
    } else {
        return text
    }
}

proc ::fileutil::magic::rt::mime value {
    upvar 1 mime mime

    set mime $value
}

proc ::fileutil::magic::rt::ext value {
    upvar 1 ext ext
    set ext $value
}


# open the file to be scanned
proc ::fileutil::magic::rt::open {file} {
    variable result {}
    variable extracted {} 
    variable strbuf
    variable fd
    variable cache

    set fd [::open $file]
    ::fconfigure $fd -translation binary
        
    # fill the string cache
    set strbuf [::read $fd 4096]
	set class [classify $strbuf]

    # clear the fetch cache
    catch {unset cache}
    array set cache {}

    return $fd
}


proc ::fileutil::magic::rt::close {} {
    variable fd
    ::close $fd
    return
}

# mark the start of a magic file in debugging
proc ::fileutil::magic::rt::file_start {name} {
    ::fileutil::magic::rt::Debug {puts stderr "File: $name"}
}
















# return the emitted result



proc ::fileutil::magic::rt::result {{msg {}}} {
    variable lfound {}


    variable found
    variable result




    variable weight

    variable weighttotal



    if {$msg ne {}} {emit $msg}



    set res [list $found $weighttotal $result]



    set found 0
    set weight 0


    set weighttotal 0
    set result {}



















    return -code return $res 
}

proc ::fileutil::magic::rt::resultv {{msg {}}} {





    try result on return result {














	return $result
    }
}


# ### ### ### ######### ######### #########
## Public API, for use by a recognizer.


# emit a description 
proc ::fileutil::magic::rt::emit msg {
    variable found
    variable lfound
    variable level
    variable maxpstring
    variable extracted
    variable result
    variable weight
    variable weighttotal
    set found 1
    dict set lfound $level 1
    incr weighttotal $weight

    #set map [list \
    #    \\b "" \
    #    %c [apply {extracted {
    #        if {[catch {format %c $extracted} result]} {
    #    	return {}
    #        }




<


>
















|
|
|





<

>
|







|





<
|
>
|





<

>
>
|
>

>
>
>
>
|
|
>
>
>
>
>
>
|

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>



>
>
>




>
>
>



















|

|
|
|
|
<
<
<
<
<
<
<
<
<
<
<
|
<
<

<

<
<
<
|




>

>
|
>

>
>
>
>
>
>
>


>

>
>
>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>

|
>
>
>
>
>
>
>
>










|
|
>
|
|
|
<
<
<



<
|
<
<
<
<
<
|
<
<
|
<
<
<
|
<
<
<

<
<
<
<
<
<
<
<
<







>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>
>
>
|
|
>
>
|
|
>
>
>
>
|
>
|
>
>
>
|
>
>
>
|
>
>
>
|
|
>
>
|
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|


|
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
|
|
>



>



<
<
<
<
|
|
|
<


<







1
2
3
4

5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31

32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47

48
49
50
51
52
53
54
55

56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128











129


130

131



132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200



201
202
203

204





205


206



207



208









209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317




318
319
320

321
322

323
324
325
326
327
328
329
# rtcore.tcl --
#
#	Runtime core for file type recognition engines written in pure Tcl.
#

# Copyright (c) 2004-2005 Colin McCormack <[email protected]>
# Copyright (c) 2005      Andreas Kupries <[email protected]>
# Copyright (c) 2016-2018 Poor Yorick     <[email protected]>
#
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
# 
# RCS: @(#) $Id: rtcore.tcl,v 1.5 2005/09/28 04:51:19 andreas_kupries Exp $

#####
#
# "mime type recognition in pure tcl"
# http://wiki.tcl.tk/12526
#
# Tcl code harvested on:  10 Feb 2005, 04:06 GMT
# Wiki page last updated: ???
#
#####

# TODO
#    Required Functionality
#	implement full offset language}
#	    done
#
#	    by pooryorick
#
#	    time {2016 06}

#
#
#	implement pstring (pascal string)
#	    done
#
#	    by pooryorick
#
#	    time {2016 06}
#}
#
#	implement regex form
#	    done
#
#	    by pooryorick
#
#	    time {2016 06}

#
#
#	implement string qualifiers
#	    done
#	    
#	    by pooryorick
#
#	    time {2016 06}

#
#	implement correct handling of date types
#
#	finish implementing the indirect type} 
#	    done
#
#	    by pooryorick
#
#	    2018 08
#
#	Maybe distinguish between binary and text tests, like file(n)
#
#	    done
#
#	    by pooryorick
#
#	    2018 08
#	
#	process and use strength directives
#
#	    done
#
#	    by pooryorick
#
#	    2018 08
#
#	handle the "indirect" type
#
#	    done
#
#	    by pooryorick
#
#	    2018 08
#
#
#    }
#}




# ### ### ### ######### ######### #########
## Requirements

package require Tcl 8.5




# ### ### ### ######### ######### #########
## Implementation

namespace eval ::fileutil::magic::rt {
    # Configuration flag. (De)activate debugging output.
    # This is done during initialization.
    # Changes at runtime have no effect.

    variable debug 0

    # The maximum size of a substring to inspect from the file in question 
    variable maxstring 64

    # The maximum length of any %s substitution in a resulting description is
    variable maxpstring 64

    variable regexdefaultlen 4096

    # [*] The vast majority of magic strings are in the first 4k of the file.

    # Export APIs (full public, recognizer public)
    namespace export file_start result
    namespace export emit ext mime new offset strength \
	D Nv N O S Nvx Nx Sx L R I U < >











}










# ### ### ### ######### ######### #########
## Public API, general use.


proc ::fileutil::magic::rt::> {} {
    upvar #1 cursors cursors depth depth found found \
	level level lfound lfound offsets offsets strengths strengths \
	typematch typematch useful useful
    incr level
    incr depth
    set cursors($level) $cursors([expr {$level-1}])
    set strengths($level) 0
    set useful($level) 0
    set found 0
    dict set lfound $level 0
    return
}


proc ::fileutil::magic::rt::< {} {
    upvar #1 class class ext ext found found level level mime mime \
	result result strengths strengths typematch typematch useful useful

    if {$level == 1 && [llength $result]} {
	set leveln $level
	set weight 0
	while {$leveln >= 0} {
	    set weight [
		expr {$weight + $useful($leveln) + $strengths($leveln) + $typematch($leveln)}]
	    incr leveln -1
	}

	yield [list $weight $result $mime $ext]
	set result {}
    }

    # $useful holds weight of the match at each level, Each weight is
    # basically length of the match.
    set useful($level) 0
    set strengths($level) 0

    incr level -1

    if {$level == 0} {
	set ext {}
	set found 0
	set mime {}
	set depth 0
    }
}


proc ::fileutil::magic::rt::classify {data} {
    set bin_rx {[\x00-\x08\x0b\x0e-\x1f]}
    if {[regexp $bin_rx $data] } {
        return binary
    } else {
        return text
    }
}

proc ::fileutil::magic::rt::executable {} {
    upvar #1 finfo finfo
    if {![dict exists $finfo mode]} {
	return 0
    }
    expr {([dict get $finfo mode] & 0o111) > 0} 



}



proc ::fileutil::magic::rt::ext value {





    upvar #1 ext ext


    set ext [split $value /]



}














# mark the start of a magic file in debugging
proc ::fileutil::magic::rt::file_start {name} {
    ::fileutil::magic::rt::Debug {puts stderr "File: $name"}
}


proc ::fileutil::magic::rt::message msg {
    upvar #1 finfo finfo
    set ranges [regexp -all -inline -indices {\$\{([^\}]*)\}} $msg]
    foreach {orange irange} $ranges {
	lassign $irange first last
	set sub [string range $msg $first $last] 

	if {[regexp {^x\?([^:]*?):(.*)$} $sub -> tmsg fmsg]} {
	    set part [expr {[executable] ? $tmsg : $fmsg}]
	    set msg [string replace $msg[set line {}] {*}$orange $part]
	} else {
	    parseerror error [list {unrecognized variable in description}] 
	}
    }
    return $msg
}


proc ::fileutil::magic::rt::mime value {
    upvar #1 mime mime
    set mime [split [message $value] /]
}


# level #1 of a coroutine
proc ::fileutil::magic::rt::new {finfo chan named tests} {
    array set cache {}	    ; # Cache of fetched and decoded numeric
			    ; # values.

    ::fconfigure $chan -translation binary

    # fill the string cache
    set strbuf [::read $chan 4096]  ; # Input cache [*].
    set class [classify $strbuf]    ; # text or binary

    # clear the fetch cache
    catch {unset cache}
    array set cache {}

    set depth 0		; # depth of the current branch
    set ext {}
    set extracted {}    ; # The value extracted for inspection
    set found 1		; # Whether the last test produced a match
    set level 0
    set lfound {}	; # For each level, whether a match was found
    dict set lfound 0 1
    set mime {}
    set result {}	; # The accumulated recognition result that is
			; # in progress.

    array unset cursors	; # the offset just after the last matching bytes,
			; # per nesting level.

    set offsetstart 0	; # the offset for the offset.  Used to process
			; # "indirect" entries.

    array unset strengths ; #strengths at each level
    set strengths(0) 0
    set typematch(0) 0

    yield [info coroutine]
    if {[string length $strbuf] == 0} {
	yield [list 0 empty {} {}]
    } else {
	{*}$tests
    }
    rename [info coroutine] {}
    return -code break
}

proc ::fileutil::magic::rt::strength {expr} {
    upvar #1 level level strengths strengths
    upvar 0 strengths($level) strength
    # this expr must not be braced
    set strength [expr double($strength) $expr]
}

proc ::fileutil::magic::rt::use {named file name} {
    if [dict exists $named $file $name] {
	set script [dict get $named $file $name]
    } else {
	dict for {file1 val} $named {
	    if {[dict exists $val $name]} {
		set script [dict get $val $name]
		break
	    }
	}
    }
    if {![info exists script]} {
	return -code error [list {name not found} $file $name]
    }
    return $script
}



# ### ### ### ######### ######### #########
## Public API, for use by a recognizer.


# emit a description 
proc ::fileutil::magic::rt::emit msg {




    upvar #1 extracted extracted found found level level lfound lfound \
	result result
    variable maxpstring

    set found 1
    dict set lfound $level 1


    #set map [list \
    #    \\b "" \
    #    %c [apply {extracted {
    #        if {[catch {format %c $extracted} result]} {
    #    	return {}
    #        }
248
249
250
251
252
253
254



255
256
257
258
259
260
261
262
263









264
265
266
267
268
269
270
271
272
273
274
275
276
277
278

279
280
281

282
283
284
285
286
287
288
289
290
291
292
293
294
295

296
297
298
299
300




301
302

303
304
305
306
307
308
309

310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325




326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345



346
347
348
349
350


351
352
353
354
355
356
357
358
359
    for {set i 0} {$i < $count} {incr i} {
	lappend arguments $extracted2
    }
    catch {set msg [format $msg {*}$arguments]}

    # Assumption: [regexp] leaves $msg untouched if it fails
    regexp {\A(\b|\\b)?(.*)$} $msg match b msg



    if {$b ne {} && [llength $result]} {
	lset result end [lindex $result end]$msg
    } else {
	lappend result $msg
    }
    return
}

proc ::fileutil::magic::rt::Nv {type offset compinvert mod mand} {









    variable typemap
    variable extracted
    variable weight

    # unpack the type characteristics
    foreach {size scan} $typemap($type) break

    # fetch the numeric field from the file
    set extracted [Fetch $offset $size $scan]

    if {$compinvert && $extracted ne {}} {
	set extracted [expr ~$extracted]
    }
    if {$mod ne {} && $extracted ne {}} {
	# there's a mask to be applied

	set extracted [expr $extracted $mod $mand]
    }


    ::fileutil::magic::rt::Debug {puts stderr "NV $type $offset $mod: $extracted"}
    set weight [string length $extracted]
    return $extracted
}

proc ::fileutil::magic::rt::use {named file name} {
    if [dict exists $named $file $name] {
	set script [dict get $named $file $name]
    } else {
	dict for {file val} $named {
	    if {[dict exists $val $name]} {
		set script [dict get $val $name]
		break
	    }

	}
    }
    if {![info exists script]} {
	return -code error [list {name not found} $file $name]
    }




    return $script
}


# Numeric - get bytes of $type at $offset and $compare to $val
# qual might be a mask
proc ::fileutil::magic::rt::N {
    type offset testinvert compinvert mod mand comp val} {
    variable typemap
    variable extracted

    variable weight

    # unpack the type characteristics
    foreach {size scan} $typemap($type) break

    # fetch the numeric field
    set extracted [Fetch $offset $size $scan]
    if {$extracted eq {}} {

	# Rules like the following, from the jpeg file, imply that
	# in the absence of an extracted value, a numerical value of 
	# 0 should be used

	# From jpeg:
	    ## Next, show thumbnail info, if it exists:
	    #>>18    byte        !0      \b, thumbnail %dx




	set extracted 0
    }

    # Would moving this before the fetch an optimisation ? The
    # tradeoff is that we give up filling the cache, and it is unclear
    # how often that value would be used. -- Profile!
    if {$comp eq {x}} {
	set weight 0
	# anything matches - don't care
	if {$testinvert} {
	    return 0
	} else {
	    return 1
	}
    }

    if {[string match $scan *me]} {
	set data [me4 $data]
	set scan I 
    }



    # get value in binary form, then back to numeric
    # this avoids problems with sign, as both values are
    # [binary scan]-converted identically (see [treegen1])
    binary scan [binary format $scan $val] $scan val



    if {$compinvert && $extracted ne {}} {
	set extracted [expr ~$extracted]
    }

    # perform comparison
    if {$mod ne {}} {
	# there's a mask to be applied
	set extracted [expr $extracted $mod $mand]
    }







>
>
>








|
>
>
>
>
>
>
>
>
>

<
<
<
<
|

<
|

|
|

|
<
>
|


>
|
<
<
|

<
<
<
|
<
|
|
<
|
>
|
|
<
<
|
>
>
>
>
|

>





|
|
>
|















>
>
>
>



|



|












>
>
>
|
|
|
|
|
>
>

|







347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375




376
377

378
379
380
381
382
383

384
385
386
387
388
389


390
391



392

393
394

395
396
397
398


399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
    for {set i 0} {$i < $count} {incr i} {
	lappend arguments $extracted2
    }
    catch {set msg [format $msg {*}$arguments]}

    # Assumption: [regexp] leaves $msg untouched if it fails
    regexp {\A(\b|\\b)?(.*)$} $msg match b msg

    set msg [message $msg[set msg {}]]

    if {$b ne {} && [llength $result]} {
	lset result end [lindex $result end]$msg
    } else {
	lappend result $msg
    }
    return
}

proc ::fileutil::magic::rt::D offset {
    upvar #1 found found
    expr {!$found}
}

proc ::fileutil::magic::rt::I {offset it ioi ioo iir io} {
    # Handling of base locations specified indirectly through the
    # contents of the inspected file.
    upvar #1 level level offsets offsets
    set offsets($level) $offset
    variable typemap




    foreach {size scan} $typemap($it) break


    set offset [Fetch $offset $size $scan]

    if {[catch {expr {$offset + 0}}]} {
	return [expr {-1 * 2 ** 128}]
    }


    if {$ioi && ![catch {$offset + 0}]} {
	set offset [expr {~$offset}]
    }

    if {$iir} {
	set io [Fetch [expr {$offset + $io}] $size $scan]


    }




    if {$ioo ne {}} {

	# no bracing this expression
	set offset [expr $offset $ioo $io]

    }
    return $offset
}




proc ::fileutil::magic::rt::L newlevel {
    upvar #1 level level
    set level $newlevel
    # Regenerate level information in the calling context.
    return
}


# Numeric - get bytes of $type at $offset and $compare to $val
# qual might be a mask
proc ::fileutil::magic::rt::N {
    type offset testinvert compinvert mod mand comp val} {

    upvar #1 class class cursors cursors extracted extracted level level \
	typematch typematch useful useful
    variable typemap

    # unpack the type characteristics
    foreach {size scan} $typemap($type) break

    # fetch the numeric field
    set extracted [Fetch $offset $size $scan]
    if {$extracted eq {}} {

	# Rules like the following, from the jpeg file, imply that
	# in the absence of an extracted value, a numerical value of 
	# 0 should be used

	# From jpeg:
	    ## Next, show thumbnail info, if it exists:
	    #>>18    byte        !0      \b, thumbnail %dx
	#
	# pyk 2018-08-16:
	#    Not necessarily.  The failure to extract might cause the rule to
	#    be skipped.  Consider doing something different here.
	set extracted 0
    }

    # Would moving this before the fetch be an optimisation ? The
    # tradeoff is that we give up filling the cache, and it is unclear
    # how often that value would be used. -- Profile!
    if {$comp eq {x}} {
	set useful($level) 0
	# anything matches - don't care
	if {$testinvert} {
	    return 0
	} else {
	    return 1
	}
    }

    if {[string match $scan *me]} {
	set data [me4 $data]
	set scan I 
    }

    switch $scan {
	default {
	    # get value in binary form, then back to numeric
	    # this avoids problems with sign, as both values are
	    # [binary scan]-converted identically (see [treegen1])
	    binary scan [binary format $scan $val] $scan val
	}
    }

    if {$compinvert && $extracted ne {}} {
	set extracted [expr -$extracted]
    }

    # perform comparison
    if {$mod ne {}} {
	# there's a mask to be applied
	set extracted [expr $extracted $mod $mand]
    }
369
370
371
372
373
374
375
376






377
378
379
380
381
382
383
384
385
386
387
388

389


390

























391

392
393







394













395
396
397


398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417


418
419
420
421
422


423



424
425
426
427
428
429
430

431
432
433
434
435
436
437
438
439
440

441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459


460


461
462
463
464
465
466
467
468

469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486

487
488

489
490
491
492
493
494
495
496
497
	}
	default {
	    #Should never reach this
	    return -code error [list {unknown comparison operator} $comp]
	}
    }
    # Do this last to minimize shimmering
    set weight [string length $extracted]







    ::fileutil::magic::rt::Debug {
	puts stderr "numeric $type: $val $t$comp $extracted / $mod - $c"
    }
    if {$testinvert} {
	set c [expr {!$c}]
	return $c 
    } else {
	return $c
    }
}


proc ::fileutil::magic::rt::S {type offset testinvert mod mand comp val} {


    variable cursor

























    variable extracted

    variable fd
    variable level







    variable lfound













    variable maxstring
    variable regexdefaultlen
    variable weight



    # $compinvert is currently ignored for strings

    set weight [string length $val]

    switch $type {
	pstring {
	    set ptype B
	    set vincluded 0
	    # The last pstring type specifier wins 
	    foreach item $mod {
		if {$item eq {J}} {
		    set vincluded 1
		} else {
		    set ptype $item
		}
	    }
	    lassign [dict get {B {b 1} H {S 2} h {s 2} L {I 4} l {i 4}} $ptype] scan slength
	    set length [GetString $offset $slength]
	    set offset $cursor 


	    binary scan $length ${scan}u length
	    if {$vincluded} {
		set length [expr {$length - $slength}]
	    }
	    set extracted [GetString $offset $length]


	    set c [Smatch $val $comp $extracted $mod]



	}
	regex {
	    if {$mand eq {}} {
		set mand $regexdefaultlen 
	    }
	    set extracted [GetString $offset $mand]
	    if {[regexp $val $extracted match]} {

		set weight [string length $match]
	        set c 1
	    } else {
	        set c 0
	    }
	}
	search {
	    set limit $mand
	    set extracted [GetString $offset $limit]
	    if {[string first $val $extracted] >= 0} {

		set weight [string length $val]
		set c 1
	    } else {
		set c 0
	    }
	} default {
	    # explicit "default" type, which is intended only to be used with
	    # the "x" pattern
	    set c [expr {[dict exists $lfound $level] ? ![dict get $lfound $level] : 1}]
	} default {
	    # get the string and compare it
	    switch $type bestring16 - lestring16 {
		set extracted [GetString $offset $maxstring]
		set extracted [string range $extracted 0 1]
		switch $type bestring16 {
		    binary scan $extracted Su extracted
		} lestring16 {
		    binary scan $extracted Su extracted
		}


		set extracted [format %c $extracted]


	    } default {
		# If $val is 0, give [emit] something to work with .
		if {$val eq  "\0"} {
		    set extracted [GetString $offset $maxstring]
		} else {
		    set extracted [GetString $offset [string length $val]]
		}
	    }

	    set c [Smatch $val $comp $extracted $mod]
	}
    }


    ::fileutil::magic::rt::Debug {
	puts "String '$val' $comp '$extracted' - $c"
	if {$c} {
	    puts "offset $offset - $extracted"
	}
    }
    if {$testinvert} {
	return [expr {!$c}]
    } else {
	return $c
    }
}


proc ::fileutil::magic::rt::Smatch {val op string mod} {
    variable weight

    if {$op eq {x}} {
	set weight 0
	return 1
    }

    if {![string length $string] && $op in {eq == < <=}} {
	if {$op in {eq == < <=}} {
	    # Nothing matches an empty $string.
	    return 0







|
>
>
>
>
>
>












>
|
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>
|
|
>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>


|
>
>



<
<














|
>
>
|
|
|
|
|
>
>
|
>
>
>






|
>
|








|
>
|











|
|

|

|

>
>
|
>
>








>


















>

<
>

|







483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571


572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671

672
673
674
675
676
677
678
679
680
681
	}
	default {
	    #Should never reach this
	    return -code error [list {unknown comparison operator} $comp]
	}
    }
    # Do this last to minimize shimmering
    set useful($level) [string length $extracted]

    if {$class eq {binary}} {
	set typematch($level)  1
    } else {
	set typematch($level)  0
    }

    ::fileutil::magic::rt::Debug {
	puts stderr "numeric $type: $val $t$comp $extracted / $mod - $c"
    }
    if {$testinvert} {
	set c [expr {!$c}]
	return $c 
    } else {
	return $c
    }
}


proc ::fileutil::magic::rt::Nv {type offset compinvert mod mand} {
    upvar #1 class class cursors cursors extracted extracted level level \
	offsets offsets useful useful
    variable typemap

    set offsets($level) $offset

    # unpack the type characteristics
    foreach {size scan} $typemap($type) break

    # fetch the numeric field from the file
    set extracted [Fetch $offset $size $scan]

    if {$compinvert && $extracted ne {}} {
	set extracted [expr ~$extracted]
    }
    if {$mod ne {} && $extracted ne {}} {
	# there's a mask to be applied
	set extracted [expr $extracted $mod $mand]
    }

    if {$class eq {binary}} {
	set typematch($level)  1
    } else {
	set typematch($level)  0
    }

    ::fileutil::magic::rt::Debug {puts stderr "NV $type $offset $mod: $extracted"}
    set useful($level) [string length $extracted]
    return $extracted
}


proc ::fileutil::magic::rt::O offset {
    # Handling of offset locations specified relative to the offset
    # last field one level up.
    upvar #1 offsets offsets level level
    upvar 0 offsets([expr {$level -1}]) base
    return [expr {$base + $offset}]
}


proc ::fileutil::magic::rt::R offset {
    # Handling of offset locations specified relative to the cursor one level
    # up.
    upvar #1 cursors cursors level level
    upvar 0 cursors([expr {$level -1}]) cursor
    return [expr {$cursor + $offset}]
}


proc ::fileutil::magic::rt::S {type offset testinvert mod mand comp val} {
    upvar #1 cursors cursors extracted extracted level level \
	lfound lfound useful useful
    variable maxstring
    variable regexdefaultlen

    upvar 0 cursors($level) cursor useful($level) used
    set cursor $offset

    # $compinvert is currently ignored for strings



    switch $type {
	pstring {
	    set ptype B
	    set vincluded 0
	    # The last pstring type specifier wins 
	    foreach item $mod {
		if {$item eq {J}} {
		    set vincluded 1
		} else {
		    set ptype $item
		}
	    }
	    lassign [dict get {B {b 1} H {S 2} h {s 2} L {I 4} l {i 4}} $ptype] scan slength
	    set length [GetString $offset $slength]
	    incr offset $slength
	    incr cursor $slength
	    set scanu ${scan}u
	    if {[binary scan $length $scanu length2]} {
		if {$vincluded} {
		    set length2 [expr {$length2 - $slength}]
		}
		set extracted [GetString $offset $length2]
		incr cursor [string length $extracted]
		    array get cursors]]
		set c [Smatch $val $comp $extracted $mod]
	    } else {
		set c 0
	    }
	}
	regex {
	    if {$mand eq {}} {
		set mand $regexdefaultlen 
	    }
	    set extracted [GetString $offset $mand]
	    if {[regexp -indices $val $extracted match indices]} {
		incr cursor [lindex $indices 1]
		set used [string length $match]
	        set c 1
	    } else {
	        set c 0
	    }
	}
	search {
	    set limit $mand
	    set extracted [GetString $offset $limit]
	    if {[set offset2 [string first $val $extracted]] >= 0} {
		set cursor [expr {$offset + $offset2 + [string length $val]}]
		set used [string length $val]
		set c 1
	    } else {
		set c 0
	    }
	} default {
	    # explicit "default" type, which is intended only to be used with
	    # the "x" pattern
	    set c [expr {[dict exists $lfound $level] ? ![dict get $lfound $level] : 1}]
	} default {
	    # get the string and compare it
	    switch $type bestring16 - lestring16 {
		set extracted [GetString $offset [
		    expr {2 * [string length $val]}]]
		switch $type bestring16 {
		    binary scan $extracted Su* extracted
		} lestring16 {
		    binary scan $extracted su* extracted
		}

		foreach ordinal $extracted[set extracted {}] {
		    append extracted [format %c $ordinal]
		}

	    } default {
		# If $val is 0, give [emit] something to work with .
		if {$val eq  "\0"} {
		    set extracted [GetString $offset $maxstring]
		} else {
		    set extracted [GetString $offset [string length $val]]
		}
	    }
	    incr cursor [string length $extracted]
	    set c [Smatch $val $comp $extracted $mod]
	}
    }


    ::fileutil::magic::rt::Debug {
	puts "String '$val' $comp '$extracted' - $c"
	if {$c} {
	    puts "offset $offset - $extracted"
	}
    }
    if {$testinvert} {
	return [expr {!$c}]
    } else {
	return $c
    }
}


proc ::fileutil::magic::rt::Smatch {val op string mod} {

    upvar #1 class class level level typematch typematch useful useful 
    if {$op eq {x}} {
	set useful($level) 0
	return 1
    }

    if {![string length $string] && $op in {eq == < <=}} {
	if {$op in {eq == < <=}} {
	    # Nothing matches an empty $string.
	    return 0
531
532
533
534
535
536
537






538
539
540
541
542
543
544
    }


    if {{T} in $mod} {
	set string [string trim $string[set string {}]]
	set val [string tolower $val[set val {}]]
    }







    set string [string range $string  0 [string length $val]-1]

    # The remaining code may assume that $string and $val have the same length
    # .

    set opnum [dict get {< -1 == 0 eq 0 != 0 ne 0 > 1} $op]







>
>
>
>
>
>







715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
    }


    if {{T} in $mod} {
	set string [string trim $string[set string {}]]
	set val [string tolower $val[set val {}]]
    }

    if {$class eq {binary} || {b} in $mod} {
	set typematch($level)  0
    } else {
	set typematch($level)  1
    }

    set string [string range $string  0 [string length $val]-1]

    # The remaining code may assume that $string and $val have the same length
    # .

    set opnum [dict get {< -1 == 0 eq 0 != 0 ne 0 > 1} $op]
571
572
573
574
575
576
577


578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679

680
681
682
683
684
685
686
687
688
689
690
691
692
693
694


695
696
697
698
699
700
701
702
703
704
705














706
707




708
709
710








711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774

775
776
777
778
779
780
781

782



783


784
785
786
787
788
789
790
791
792
793
794
795
796


797








798




799
800
801
802
803



804

805
806
807
808
809
810
811
812



813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843



844
845
846
847

848
	}
    } else {
	set res [expr {[::string compare $string $val] == $opnum}]
    }
    if {$op in {!= ne}} {
	set res [expr {!$res}]
    }


    set weight [string length $val]
    return $res
}

proc ::fileutil::magic::rt::Nvx {type offset compinvert mod mand} {
    variable typemap
    variable extracted
    variable last
    variable weight
    variable level

    # unpack the type characteristics
    foreach {size scan} $typemap($type) break
    set last($level) [expr {$offset + $size}]

    set extracted [Nv $type $offset $compinvert $mod $mand]

    ::fileutil::magic::rt::Debug {puts stderr "NVx $type $offset $extracted $mod $mand"}
    return $extracted
}

# Numeric - get bytes of $type at $offset and $compare to $val
# qual might be a mask
proc ::fileutil::magic::rt::Nx {
    type offset testinvert compinvert mod mand comp val} {

    variable cursor
    variable typemap
    variable extracted
    variable last
    variable level
    variable weight

    set res [N $type $offset $testinvert $compinvert $mod $mand $comp $val]

    ::fileutil::magic::rt::Debug {
	puts stderr "Nx numeric $type: $val $comp $extracted / $qual - $c"
    }
    set last($level) $cursor
    return $res
}

proc ::fileutil::magic::rt::Sx {
    type offset testinvert mod mand comp val} {
    variable cursor
    variable extracted
    variable fd
    variable last
    variable level
    variable weight

    set res [S $type $offset $testinvert $mod $mand $comp $val]
    set last($level) $cursor
    return $res
}
proc ::fileutil::magic::rt::L {newlevel} {
    variable level $newlevel
    # Regenerate level information in the calling context.
    return
}

proc ::fileutil::magic::rt::I {offset it ioi ioo iir io} {
    # Handling of base locations specified indirectly through the
    # contents of the inspected file.
    variable typemap
    foreach {size scan} $typemap($it) break
    if {$iir} {
	# To do:  this can't be right.
	set io [Fetch [expr $offset + $io] $size $scan]
    }
    set data [Fetch $offset $size $scan]

    if {$ioi && [string is double -strict $data]} {
	set data [expr {~$data}]
    }
    if {$ioo ne {} && [string is double -strict $data]} {
	set data [expr $data $ioo $io]
    }
    if {![string is double -strict $data]} {
	set data -1
    }
    return $data
}

proc ::fileutil::magic::rt::R base {
    # Handling of base locations specified relative to the end of the
    # last field one level above.

    variable last   ; # Remembered locations.
    variable level  ; # The level to get data from.
    return [expr {$last([expr {$level-1}]) + $base}]
}


proc ::fileutil::magic::rt::U {file name} {
    upvar named named
    set script [use $named $file $name]
    tailcall ::try $script
}

# ### ### ### ######### ######### #########
## Internal. Retrieval of the data used in comparisons.


# fetch and cache a numeric value from the file
proc ::fileutil::magic::rt::Fetch {where what scan} {
    variable cache
    variable cursor
    variable extracted
    variable strbuf
    variable fd

    # Avoid [seek] errors
    if {$where < 0} {
	set where 0
    }
    # {to do} id3 length
    if {![info exists cache($where,$what,$scan)]} {


	::seek $fd $where
	set data [::read $fd $what]
	incr cursor [string length $data]
	set extracted [rtscan $data $scan]
	set cache($where,$what,$scan) [list $extracted $cursor]

	# Optimization: If we got 4 bytes, i.e. long we implicitly
	# know the short and byte data as well. Should put them into
	# the cache. -- Profile: How often does such an overlap truly
	# happen ?















    } else {
	lassign $cache($where,$what,$scan) extracted cursor




    }
    return $extracted
}









proc ::fileutil::magic::rt::rtscan {data scan} {
    if {$scan eq {me}} {
	set data [me4 $data]
	set scan I 
    }
    set numeric {}
    binary scan $data $scan numeric
    return $numeric
}

proc ::fileutil::magic::rt::me4 data {
	binary scan $data a4 chars
	set data [binary format a4 [lindex $chars 1] [
	lindex $chars 0] [lindex $chars 3] [lindex $chars 2]]
}

proc ::fileutil::magic::rt::GetString {offset len} {
    variable cursor
    # We have the first 1k of the file cached
    variable strbuf
    variable fd

    set end [expr {$offset + $len - 1}]
    if {$end < 4096} {
	# in the string cache, copy the requested part.
	set string [::string range $strbuf $offset $end]
    } else {
	# an unusual one, move to the offset and read directly from
	# the file.
	::seek $fd $offset
	set string [::read $fd $len]
    }
    set cursor [expr {$offset + [string length $string]}]
    return $string
}

# ### ### ### ######### ######### #########
## Internal, debugging.

if {!$::fileutil::magic::rt::debug} {
    # This procedure definition is optimized out of using code by the
    # core bcc. It knows that neither argument checks are required,
    # nor is anything done. So neither results, nor errors are
    # possible, a true no-operation.
    proc ::fileutil::magic::rt::Debug {args} {}

} else {
    proc ::fileutil::magic::rt::Debug {script} {
	# Run the commands in the debug script. This usually generates
	# some output. The uplevel is required to ensure the proper
	# resolution of all variables found in the script.
	uplevel 1 $script
	return
    }
}

# ### ### ### ######### ######### #########
## Initialize constants

namespace eval ::fileutil::magic::rt {
    # maps magic typenames to field characteristics: size (#byte),
    # binary scan format


    variable typemap
}

proc ::fileutil::magic::rt::Init {} {
    variable typemap
    global tcl_platform


    # Set the definitions for all types which have their endianess



    # explicitly specified n their name.



    array set typemap {
	byte    {1 c}
	beshort {2 S}
	leshort {2 s}
	bedouble {8 Q}
	belong  {4 I}
	lelong  {4 i}
	bedate  {4 S}  ledate   {4 s}
	beldate {4 I}  leldate  {4 i}
	bedouble {8 Q}
	beqdate {8 W}
	beqldate {8 W}


	bequad {8 W} 








	ledouble {8 q}




	leqdate {8 w}
	leqldate {8 w}
	lequad {8 w}
	lequad {8 w} 
	leqwdate {8 w}



	medate  {4 me}

	melong  {4 me}
	meldate  {4 me}
	lestring16 {2 s}
	bestring16 {2 S}

	long  {4 Q} date  {4 Q} ldate {4 Q}
	short {2 Y} quad {8 W} 
    }




    # Now set the definitions for the types without explicit
    # endianess. They assume/use 'native' byteorder. We also put in
    # special forms for the compiler, so that it can use short names
    # for the native-endian types as well.

    # generate short form names
    foreach {n v} [array get typemap] {
	foreach {len scan} $v break
	#puts stderr "Adding $scan - [list $len $scan]"
	set typemap($scan) [list $len $scan]
    }

    # The special Q and Y short forms are incorrect, correct now to
    # use the proper native endianess.

    # {to do} {Is ldate done correctly in the procedure?  What is its byte
    # order anyway?  Native?}

    if {$tcl_platform(byteOrder) eq "littleEndian"} {
	array set typemap {Q {4 i} Y {2 s}
	    short {2 s} long {4 i} quad {8 w}
	}
    } else {
	array set typemap {Q {4 I} Y {2 S}
	    short {2 S} long {4 I} quad {8 W}
	}
    }
}

::fileutil::magic::rt::Init



# ### ### ### ######### ######### #########
## Ready for use.

package provide fileutil::magic::rt 2.0

# EOF







>
>
|



<
<
<
<
<
<

<
<
<
<
<
<
<
<
<
<
<
<
|
|
|
<
<
<
<
<
<
|
|
|
<
<
<
|
<

<
<
<
<
<
<
<
<
<

<
<
<
|
|
|
|
<
<
|
<
<
<
<
<
<
<
<
<
<
|
<
<
<
<
<
<
<
<
<
<
<
|
<
<
<
|
<
<
<



<
<
<
<
<



>



|
|
|
<
|






|
>
>
|
|
|







|
>
>
>
>
>
>
>
>
>
>
>
>
>
>

<
>
>
>
>

|

>
>
>
>
>
>
>
>











<
<
<
<
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<





















<
<

<
<
<

>
|
|





>
|
>
>
>
|
>
>
|
|
|
<
<

|
|
<
|
|


>
>

>
>
>
>
>
>
>
>

>
>
>
>



<

>
>
>

>

|
|
|
|
|
|
|
>
>
>
|
<
<
<
<




<



|
<

<
<
<
|
|
<
<

|
<
<




>
>
>




>

761
762
763
764
765
766
767
768
769
770
771
772
773






774












775
776
777






778
779
780



781

782









783



784
785
786
787


788










789











790



791



792
793
794





795
796
797
798
799
800
801
802
803
804

805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840

841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866




867




















868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888


889



890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909


910
911
912

913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935

936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953




954
955
956
957

958
959
960
961

962



963
964


965
966


967
968
969
970
971
972
973
974
975
976
977
978
979
	}
    } else {
	set res [expr {[::string compare $string $val] == $opnum}]
    }
    if {$op in {!= ne}} {
	set res [expr {!$res}]
    }
    # use the extracted value here, not val, because in the case of
    # inequalities the extra information has weight
    set useful($level) [string length $string]
    return $res
}




















proc ::fileutil::magic::rt::T {val op string mod} {
    upvar #1 offsetstart offsetstart
    set saved $offsetstart






    >
    {*}$tests
    <



    set offsetstart $saved

}














proc ::fileutil::magic::rt::U {file name offset} {
    upvar #1 level level named named offsets offsets
    set offsets($level) $offset


    set script [use $named $file $name]










    >











    ::try $script



    <



}








# ### ### ### ######### ######### #########
## Internal. Retrieval of the data used in comparisons.


# fetch and cache a numeric value from the file
proc ::fileutil::magic::rt::Fetch {where what scan} {
    upvar #1 cache cache chan chan cursors cursors extracted extracted \
	level level offsets offsets strbuf strbuf


    set offsets($level) $where 

    # Avoid [seek] errors
    if {$where < 0} {
	set where 0
    }
    # {to do} id3 length
    if {[info exists cache($where,$what,$scan)]} {
	lassign $cache($where,$what,$scan) extracted cursor
    } else {
	::seek $chan $where
	set data [::read $chan $what]
	set cursor [expr {$where + [string length $data]}]
	set extracted [rtscan $data $scan]
	set cache($where,$what,$scan) [list $extracted $cursor]

	# Optimization: If we got 4 bytes, i.e. long we implicitly
	# know the short and byte data as well. Should put them into
	# the cache. -- Profile: How often does such an overlap truly
	# happen ?
    }
    set cursors($level) $cursor 
    return $extracted
}


proc ::fileutil::magic::rt::GetString {offset len} {
    upvar #1 chan chan level level strbuf strbuf offsets offsets
    # We have the first 1k of the file cached

    set offsets($level) $offset
    set end [expr {$offset + $len - 1}]
    if {$end < [string length $strbuf]} {
        # in the string cache, copy the requested part.
        set string [::string range $strbuf $offset $end]
    } else {

	# an unusual one, move to the offset and read directly from
	# the file.
	::seek $chan $offset
	set string [::read $chan $len]
    }
    return $string
}


proc ::fileutil::magic::rt::me4 data {
	binary scan $data a4 chars
	set data [binary format a4 [lindex $chars 1] [
	lindex $chars 0] [lindex $chars 3] [lindex $chars 2]]
}


proc ::fileutil::magic::rt::rtscan {data scan} {
    if {$scan eq {me}} {
	set data [me4 $data]
	set scan I 
    }
    set numeric {}
    binary scan $data $scan numeric
    return $numeric
}



























# ### ### ### ######### ######### #########
## Internal, debugging.

if {!$::fileutil::magic::rt::debug} {
    # This procedure definition is optimized out of using code by the
    # core bcc. It knows that neither argument checks are required,
    # nor is anything done. So neither results, nor errors are
    # possible, a true no-operation.
    proc ::fileutil::magic::rt::Debug {args} {}

} else {
    proc ::fileutil::magic::rt::Debug {script} {
	# Run the commands in the debug script. This usually generates
	# some output. The uplevel is required to ensure the proper
	# resolution of all variables found in the script.
	uplevel 1 $script
	return
    }
}








# ### ### ### ######### ######### #########
## Initializ package


proc ::fileutil::magic::rt::Init {} {
    variable typemap
    global tcl_platform

    # map magic typenames to field characteristics: size (#byte),

    # Types without explicit endianess assume/use 'native' byteorder.
    # We also put in special forms for the compiler, so that it can use short
    # names for the native-endian types as well.

    # {to do} {Is ldate done correctly in the procedure?  What is its byte
    # order anyway?  Native?}
    
    foreach {type sig} {
	bedate  {4 S}


	bedouble {8 Q}
	befloat {4 R}
	beid3 {4 n}

	beldate {4 I}
	belong  {4 I}
	beqdate {8 W}
	beqldate {8 W}
	beqwdate {8 W}
	beqldate {8 W}
	bequad {8 W} 
	beshort {2 S}
	bestring16 {2 S}
	byte    {1 c}
	date {4 n}
	double {8 d}
	float {4 f}
	ldate {4 n}
	ledate   {4 n}
	ledouble {8 q}
	leid3 {4 nu}
	lefloat {4 f}
	leldate  {4 i}
	lelong  {4 i}
	leqdate {8 w}
	leqldate {8 w}
	lequad {8 w}

	leqwdate {8 w}
	leshort {2 s}
	lestring16 {2 s}
	long  {4 n}
	medate  {4 me}
	meldate  {4 me}
	melong  {4 me}
	qdate {8 m}
	qdate {8 n}
	qldata {8 m}
	quad {8 m} 
	qwdate {8 m}
	short {2 t}
    } {
	set typemap($type) $sig
	lassign $sig size scan
	set typemap(u$type) [list $size ${scan}u]
    }





    # generate short form names
    foreach {n v} [array get typemap] {
	foreach {len scan} $v break

	set typemap($scan) [list $len $scan]
    }

    # Add the special Q and Y short forms using the proper native endianess.





    if {$tcl_platform(byteOrder) eq {littleEndian}} {
	array set typemap {Q {4 i} Y {2 s} quad {8 w}}


    } else {
	array set typemap {Q {4 I} Y {2 S} quad {8 W}}


    }
}

::fileutil::magic::rt::Init



# ### ### ### ######### ######### #########
## Ready for use.

package provide fileutil::magic::rt 2.0

# EOF

Changes to modules/fumagic/tmc.

11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# (-)	Compilation of one or more files in magic(5) syntax into a
#	list of recognizers performing all the checks and mappings
#	encoded in them.
# 
# Command syntax
# --------------
# 
# Ad 1)	tmc namespace magic-file ?magic-file...?
#
#	Compile all magic files list of recognizers, generate a script which
#	assigns the recognizers to $namespace::tests and $namespace::named and
#	write the script to stdout.
# 
# Ad 2)	tmc -merge tclfile namespace magic-file ?magic-file...?
#
#	Same as (1), but does not write to stdout. Instead the part of
#	the 'tclfile' delineated by marker lines containing "BEGIN
#	GENERATED CODE" and "END GENERATED CODE" is replaced with the
#	generated code.

package require Tcl 8.5
set auto_path [linsert $auto_path 0 [file dirname [file normalize [info script]]]] ; # This directory
set auto_path [linsert $auto_path 0 [file dirname [lindex $auto_path end]]]]        ; # and the one above
#puts *\t[join $auto_path \n*\t]
package require fileutil::magic::cfront

# ### ### ### ######### ######### #########
## Internal data and status

namespace eval ::tmc {







|


|


|








|







11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# (-)	Compilation of one or more files in magic(5) syntax into a
#	list of recognizers performing all the checks and mappings
#	encoded in them.
# 
# Command syntax
# --------------
# 
# Ad 1)	tmc magic-file ?magic-file...?
#
#	Compile all magic files list of recognizers, generate a script which
#	assigns the recognizers to $tests and $named and
#	write the script to stdout.
# 
# Ad 2)	tmc -merge tclfile magic-file ?magic-file...?
#
#	Same as (1), but does not write to stdout. Instead the part of
#	the 'tclfile' delineated by marker lines containing "BEGIN
#	GENERATED CODE" and "END GENERATED CODE" is replaced with the
#	generated code.

package require Tcl 8.5
set auto_path [linsert $auto_path 0 [file dirname [file normalize [info script]]]] ; # This directory
set auto_path [linsert $auto_path 0 [file dirname [lindex $auto_path end]]]        ; # and the one above
#puts *\t[join $auto_path \n*\t]
package require fileutil::magic::cfront

# ### ### ### ######### ######### #########
## Internal data and status

namespace eval ::tmc {
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
##

proc ::tmc::processCmdline {} {
    global argv

    variable output
    variable magic
    variable namespace

    set output ""
    set magic  {}
    set namespace ""

    # Process the options, perform basic validation.

    while {[llength $argv]} {
	set opt [lindex $argv 0]
	if {![string match "-*" $opt]} break
	if {$opt eq "-merge"} {
	    if {[llength $argv] < 2} Usage
	    set output [lindex $argv 1]
	    set argv   [lrange $argv 2 end]
	} else {
	    Usage
	}
    }

    # Additional validation, and extraction of the non-option
    # arguments.

    if {[llength $argv] < 2} Usage

    set namespace  [lindex $argv 0]
    set magic [lrange $argv 1 end]

    # Final validation across the whole configuration.

    if {$namespace eq ""} {
	ArgError "Illegal empty namespace name"
    }
    foreach m $magic {
	CheckInput $m {Magic file}
    }
    if {$output ne ""} {
	CheckTheMerge
    }
    return







<



<


















|

<




<
<
<







73
74
75
76
77
78
79

80
81
82

83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102

103
104
105
106



107
108
109
110
111
112
113
##

proc ::tmc::processCmdline {} {
    global argv

    variable output
    variable magic


    set output ""
    set magic  {}


    # Process the options, perform basic validation.

    while {[llength $argv]} {
	set opt [lindex $argv 0]
	if {![string match "-*" $opt]} break
	if {$opt eq "-merge"} {
	    if {[llength $argv] < 2} Usage
	    set output [lindex $argv 1]
	    set argv   [lrange $argv 2 end]
	} else {
	    Usage
	}
    }

    # Additional validation, and extraction of the non-option
    # arguments.

    if {[llength $argv] < 1} Usage


    set magic [lrange $argv 1 end]

    # Final validation across the whole configuration.




    foreach m $magic {
	CheckInput $m {Magic file}
    }
    if {$output ne ""} {
	CheckTheMerge
    }
    return
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
# Both write their messages to stderr and then
# exit the application with status 1.
##

proc ::tmc::Usage {} {
    global argv0
    puts stderr "$argv0 wrong#args, expected:\
	    ?-merge iofile? namespace magic magic..."
    exit 1
}

proc ::tmc::ArgError {text} {
    global argv0
    puts stderr "$argv0: $text"
    exit 1







|







122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# Both write their messages to stderr and then
# exit the application with status 1.
##

proc ::tmc::Usage {} {
    global argv0
    puts stderr "$argv0 wrong#args, expected:\
	    ?-merge iofile? magic magic..."
    exit 1
}

proc ::tmc::ArgError {text} {
    global argv0
    puts stderr "$argv0: $text"
    exit 1
181
182
183
184
185
186
187




188
189

190
191
192
193
194
195
196
## Helper commands. File reading and writing.

proc ::tmc::Get {f} {
    return [read [set in [open $f r]]][close $in]
}

proc ::tmc::Write {f data} {




    puts -nonewline [set out [open $f w]] $data
    close $out

    return
}

# ### ### ### ######### ######### #########
## Configuation phase, validate command line.

::tmc::processCmdline







>
>
>
>
|

>







175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
## Helper commands. File reading and writing.

proc ::tmc::Get {f} {
    return [read [set in [open $f r]]][close $in]
}

proc ::tmc::Write {f data} {
    while 1 {
	set tmp $f.tmc_[incr i]
	if {![file exists $tmp]} break 
    }
    puts -nonewline [set out [open $tmp w]] $data
    close $out
    file rename -force $tmp $f
    return
}

# ### ### ### ######### ######### #########
## Configuation phase, validate command line.

::tmc::processCmdline
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
}

# ### ### ### ######### ######### #########
## Invoking the functionality.

if {[catch {
    # Read and process all input files.
    # Generate commands into a namespace.
    # Write the result either to stdout, or merge
    # into the specified output file.

    set tcl [eval [linsert $tmc::magic 0 \
	    fileutil::magic::cfront::generate \
	    $tmc::namespace]]

    if {$tmc::output eq ""} {
	puts stdout $tcl
    } else {
	::tmc::Merge $tmc::output \n${tcl}\n
    }
} msg]} {
    puts $::errorInfo
    ::tmc::ArgError $msg
}

# ### ### ### ######### ######### #########
exit







<




|
<

|











222
223
224
225
226
227
228

229
230
231
232
233

234
235
236
237
238
239
240
241
242
243
244
245
246
}

# ### ### ### ######### ######### #########
## Invoking the functionality.

if {[catch {
    # Read and process all input files.

    # Write the result either to stdout, or merge
    # into the specified output file.

    set tcl [eval [linsert $tmc::magic 0 \
	    fileutil::magic::cfront::generate compressed 0 --]]


    if {$tmc::output eq {}} {
	puts stdout $tcl
    } else {
	::tmc::Merge $tmc::output \n${tcl}\n
    }
} msg]} {
    puts $::errorInfo
    ::tmc::ArgError $msg
}

# ### ### ### ######### ######### #########
exit

Changes to modules/mime/mime.tcl.

662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
		incr state(lines.current)
		set x [string length $line]
		if {$x == 0} {set blankP 1}
	    }
        }

         if {!$blankP && [string match *\r $line]} {
             set line [string range $line 0 [expr {$x - 2}]]
             if {$x == 1} {
                 set blankP 1
             }
         }

        if {!$blankP && (
	    [string first { } $line] == 0







|







662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
		incr state(lines.current)
		set x [string length $line]
		if {$x == 0} {set blankP 1}
	    }
        }

         if {!$blankP && [string match *\r $line]} {
             set line [string range $line 0 $x-2]]
             if {$x == 1} {
                 set blankP 1
             }
         }

        if {!$blankP && (
	    [string first { } $line] == 0