Tcl Library Source Code

me_cpucore.tcl at [3df60e7e55]
Login

File modules/grammar_me/me_cpucore.tcl artifact 6d49b9a516 part of check-in 3df60e7e55


# -*- tcl -*-
# (C) 2005-2006 Andreas Kupries <[email protected]>
# ### ### ### ######### ######### #########
## Package description

## Implementation of ME virtual machines based on state values
## manipulated by the commands according to the match
## instructions. Allows for implementation in C.

# ### ### ### ######### ######### #########
## Requisites

namespace eval ::grammar::me::cpu::core {}

# ### ### ### ######### ######### #########
## Implementation, API. Ensemble command.

proc ::grammar::me::cpu::core {cmd args} {
    # Dispatcher for the ensemble command.
    variable core::cmds
    return [uplevel 1 [linsert $args 0 $cmds($cmd)]]
}

namespace eval grammar::me::cpu::core {
    variable cmds

    # Mapping from cmd names to procedures for quick dispatch. The
    # objects will shimmer into resolved command references.

    array set cmds {
	disasm ::grammar::me::cpu::core::disasm
	asm    ::grammar::me::cpu::core::asm
	new    ::grammar::me::cpu::core::new
	lc     ::grammar::me::cpu::core::lc
	tok    ::grammar::me::cpu::core::tok
	pc     ::grammar::me::cpu::core::pc
	iseof  ::grammar::me::cpu::core::iseof
	at     ::grammar::me::cpu::core::at
	cc     ::grammar::me::cpu::core::cc
	sv     ::grammar::me::cpu::core::sv
	ok     ::grammar::me::cpu::core::ok
	error  ::grammar::me::cpu::core::error
	lstk   ::grammar::me::cpu::core::lstk
	astk   ::grammar::me::cpu::core::astk
	mstk   ::grammar::me::cpu::core::mstk
	estk   ::grammar::me::cpu::core::estk
	rstk   ::grammar::me::cpu::core::rstk
	nc     ::grammar::me::cpu::core::nc
	ast    ::grammar::me::cpu::core::ast
	halted ::grammar::me::cpu::core::halted
	code   ::grammar::me::cpu::core::code
	eof    ::grammar::me::cpu::core::eof
	put    ::grammar::me::cpu::core::put
	run    ::grammar::me::cpu::core::run
    }
}

# ### ### ### ######### ######### #########
## Ensemble implementation

proc ::grammar::me::cpu::core::disasm {code} {
    variable iname
    variable tclass
    variable anum

    Validate $code ord dst jmp

    set label 0
    foreach k [array names jmp] {
	set jmp($k) bra$label
	incr label
    }
    foreach k [array names dst] {
	if {![info exists jmp($k)]} {
	    set jmp($k) {}
	}
    }

    set result {}
    foreach {asm pool tokmap} $code break

    set pc    0
    set pcend [llength $asm]

    while {$pc < $pcend} {
	set base $pc
	set insn [lindex $asm $pc] ; incr pc
	set an   [lindex $anum $insn]

	if {$an == 1} {
	    set a [lindex $asm $pc] ; incr pc
	} elseif {$an == 2} {
	    set a [lindex $asm $pc] ; incr pc
	    set b [lindex $asm $pc] ; incr pc
	} elseif {$an == 3} {
	    set a [lindex $asm $pc] ; incr pc
	    set b [lindex $asm $pc] ; incr pc
	    set c [lindex $asm $pc] ; incr pc
	}

	set     instruction {}
	lappend instruction $jmp($base)
	lappend instruction $iname($insn)

	switch -exact $insn {
	    0 - 5 - 20 - 24 - 25 - 26 -
	    a/string {
		lappend instruction [lindex $pool $a]
	    }
	    1 {
		# a/tok b/string
		if {![llength $tokmap]} {
		    lappend instruction [lindex $pool $a]
		} else {
		    lappend instruction ${a}:$ord($a)
		}
		lappend instruction [lindex $pool $b]
	    }
	    2 {
		# a/tokstart b/tokend c/string
		if {![llength $tokmap]} {
		    lappend instruction [lindex $pool $a]
		    lappend instruction [lindex $pool $b]
		} else {
		    # tokmap defined: a = b = order rank.
		    lappend instruction ${a}:$ord($a)
		    lappend instruction ${b}:$ord($b)
		}
		lappend instruction [lindex $pool $c]
	    }
	    3 {
		# a/class(0-5) b/string
		lappend instruction [lindex $tclass $a]
		lappend instruction [lindex $pool $b]
	    }
	    4 {
		# a/branch b/string
		lappend instruction $jmp($a)
		lappend instruction [lindex $pool $b]
	    }
	    6 - 11 - 12 - 13 -
	    a/branch {
		lappend instruction $jmp($a)
	    }
	    default {}
	}

	lappend result $instruction
    }

    return $result
}

proc ::grammar::me::cpu::core::asm {code} {
    variable iname
    variable anum
    variable tccode

    # code = list(insn), insn = list (label insn-name ...)

    # I. Indices for the labels, based on instruction sizes.

    array set jmp {}
    set off 0
    foreach insn $code {
	foreach {label name} $insn break
	# Ignore embedded comments, except for labels
	if {$label ne ""} {
	    set jmp($label) $off
	}
	if {$name eq ".C"} continue
	if {![info exists iname($name)]} {
	    return -code error "Bad instruction \"$insn\", unknown command \"$name\""
	}
	set an [lindex $anum $iname($name)]
	if {[llength $insn] != ($an+2)} {
	    return -code error "Bad instruction \"$insn\", expected $an argument[expr {$an == 1 ? "" : "s"}]"
	}
	incr off
	incr off [lindex $anum $iname($name)]
    }

    set asm          {}
    set pool         {}
    array set poolh  {}
    array set tokmap {}
    array set ord    {}
    set plain        0

    foreach insn $code {
	foreach {label name} $insn break
	# Ignore embedded comments
	if {$name eq ".C"} continue
	set an [lindex $anum $iname($name)]

	# Instruction code to assembly ...
	lappend asm $iname($name)

	# Encode arguments ...
	switch -exact -- $name {
	    ict_advance            -
	    inc_save               -
	    ier_nonterminal        -
	    isv_nonterminal_leaf   -
	    isv_nonterminal_range  -
	    isv_nonterminal_reduce {
		lappend asm [Str [lindex $insn 2]]
	    }
	    ict_match_token {
		lappend asm [Tok [lindex $insn 2]]
		lappend asm [Str [lindex $insn 3]]
	    }
	    ict_match_tokrange {
		lappend asm [Tok [lindex $insn 2]]
		lappend asm [Tok [lindex $insn 3]]
		lappend asm [Str [lindex $insn 4]]
	    }
	    ict_match_tokclass {
		set ccode [lindex $insn 2]
		if {![info exists tccode($ccode)]} {
		    return -code error "Bad instruction \"$insn\", unknown class code \"$ccode\""
		}
		lappend asm $tccode($ccode)
		lappend asm [Str [lindex $insn 3]]

	    }
	    inc_restore {
		set jmpto [lindex $insn 2]
		if {![info exists jmp($jmpto)]} {
		    return -code error "Bad instruction \"$insn\", unknown branch destination \"$jmpto\""
		}
		lappend asm $jmp($jmpto)
		lappend asm [Str [lindex $insn 3]]
	    }
	    icf_ntcall  -
	    icf_jalways -
	    icf_jok     -
	    icf_jfail   {
		set jmpto [lindex $insn 2]
		if {![info exists jmp($jmpto)]} {
		    return -code error "Bad instruction \"$insn\", unknown branch destination \"$jmpto\""
		}
		lappend asm $jmp($jmpto)
	    }
	}
    }

    return [list $asm $pool [array get tokmap]]
}

proc ::grammar::me::cpu::core::new {code} {
    # The code generating the state is drawn out to integrate a
    # specification of how the machine state is mapped to Tcl as well.

    Validate $code

    set     state {}   ; # The state is representend as a Tcl list.
    # ### ### ### ######### ######### #########
    lappend state $code ; # [_0] code  - list  - code to run (-)
    lappend state 0     ; # [_1] pc    - int   - Program counter
    lappend state 0     ; # [_2] halt  - bool  - Flag, set (internal) when machine was halted (icf_halt).
    lappend state 0     ; # [_3] eof   - bool  - Flag, set (external) when where will be no more input.
    lappend state {}    ; # [_4] tc    - list  - Terminal cache, pending and processed tokens.
    lappend state -1    ; # [_5] cl    - int   - Current Location
    lappend state {}    ; # [_6] ct    - token - Current Character
    lappend state 0     ; # [_7] ok    - bool  - Match Status
    lappend state {}    ; # [_8] sv    - any   - Semantic Value
    lappend state {}    ; # [_9] er    - list  - Error status (*)
    lappend state {}    ; # [10] ls    - list  - Location Stack (x)
    lappend state {}    ; # [11] as    - list  - Ast Stack
    lappend state {}    ; # [12] ms    - list  - Ast Marker Stack
    lappend state {}    ; # [13] es    - list  - Error Stack
    lappend state {}    ; # [14] rs    - list  - Return Stack
    lappend state {}    ; # [15] nc    - dict  - Nonterminal Cache (backtracking)
    # ### ### ### ######### ######### #########

    # tc    = list(token)
    # token = list(str lexeme line col)


    # (-) See manpage of this package for the representation.

    # (*) 2 elements, first is error location, second is list of 
    # ... strings, the error messages. The strings are actually
    # ... represented by references into the pool element of the code.

    # (x) Regarding the various stacks maintained in the state, their
    #     top element is always at the right end, i.e. the last
    #     element in the list representing it.

    return $state
}

proc ::grammar::me::cpu::core::ntok {state} {
    return [llength [lindex $state 4]]
}

proc ::grammar::me::cpu::core::lc {state loc} {
    set tc  [lindex $state 4]
    set loc [INDEX $tc $loc "Illegal location"]
    return [lrange [lindex $tc $loc] 2 3]
    # result = list(line col)
}

proc ::grammar::me::cpu::core::tok {state args} {
    if {[llength $args] > 2} {
	return -code error {wrong # args: should be "grammar::me::cpu::core::tok state ?from ?to??"}
    }
    set tc [lindex $state 4]
    if {[llength $args] == 0} {
	return $tc
    } elseif {[llength $args] == 1} {
	set at [INDEX $tc [lindex $args 0] "Illegal location"]
	return [lrange $tc $at $at]
    } else {
	set from [INDEX $tc [lindex $args 0] "Illegal start location"]
	set to   [INDEX $tc [lindex $args 1] "Illegal end location"]
	if {$from > $to} {
	    return -code error "Illegal empty location range $from .. $to"
	}
	return [lrange $tc $from $to]
    }
    # result = list(token), token = list(str lex line col)
}

proc ::grammar::me::cpu::core::pc {state} {
    return [lindex $state 1]
}

proc ::grammar::me::cpu::core::iseof {state} {
    return [lindex $state 3]
}

proc ::grammar::me::cpu::core::at {state} {
    return [lindex $state 5]
}

proc ::grammar::me::cpu::core::cc {state} {
    return [lindex $state 6]
}

proc ::grammar::me::cpu::core::sv {state} {
    return [lindex $state 8]
}

proc ::grammar::me::cpu::core::ok {state} {
    return [lindex $state 7]
}

proc ::grammar::me::cpu::core::error {state} {
    set er [lindex $state 9]
    if {[llength $er]} {
	foreach {l m} $er break

	set pool [lindex $state 0 1] ; # state ->/0 code ->/1 pool
	set mx   {}
	foreach id $m {
	    lappend mx [lindex $pool $id]
	}
	set er [list $l $mx]
    }
    return $er
}

proc ::grammar::me::cpu::core::lstk {state} {
    return [lindex $state 10]
}

proc ::grammar::me::cpu::core::astk {state} {
    return [lindex $state 11]
}

proc ::grammar::me::cpu::core::mstk {state} {
    return [lindex $state 12]
}

proc ::grammar::me::cpu::core::estk {state} {
    return [lindex $state 13]
}

proc ::grammar::me::cpu::core::rstk {state} {
    return [lindex $state 14]
}

proc ::grammar::me::cpu::core::nc {state} {
    return [lindex $state 15]
}

proc ::grammar::me::cpu::core::ast {state} {
    return [lindex $state 11 end]
}

proc ::grammar::me::cpu::core::halted {state} {
    return [lindex $state 2]
}

proc ::grammar::me::cpu::core::code {state} {
    return [lindex $state 0]
}

proc ::grammar::me::cpu::core::eof {statevar} {
    upvar 1 $statevar state
    lset state 3 1
    return
}

proc ::grammar::me::cpu::core::put {statevar tok lex line col} {
    upvar 1 $statevar state
    if {[lindex $state 3]} {
	return -code error "Cannot add input data after eof"
    }
    set     tc [K [lindex $state 4] [lset state 4 {}]]
    lappend tc [list $tok $lex $line $col]
    lset state 4 $tc
    return
}

proc ::grammar::me::cpu::core::run {statevar {steps -1}} {
    # Execution loop. Should be instrumented for statistics about
    # dynamic instruction frequency. I.e. which instructions are
    # executed the most => put them at the front of the if/switch for
    # quicker selection. I.e. frequency coding of the branches for
    # speed.

    # A C implementation can shimmer the state into a directly
    # accessible data structure. And the asm instructions can shimmer
    # into an integer index upon which we can switch fast.

    variable anum
    variable tclass
    upvar 1 $statevar state
    variable iname ; # For debug output

    # Do nothing for a stopped machine (halt flag set).
    if {[lindex $state 2]} {return $state}

    # Fail if there are no instruction to execute
    if {![llength [lindex $state 0 0]]} {
	# No instructions to execute
	return -code error "No instructions to execute"
    }

    # Unpack state into locally accessible variables
    #        0    1  2    3   4  5  6  7  8  9  10 11 12 13 14 15 16 17 18  19  20
    foreach {code pc halt eof tc cl ct ok sv er ls as ms es rs nc} $state break

    # Unpack match program for easy access as well.
    #        0   1    2
    foreach {asm pool tokmap} $code break

    if 0 {
	puts ________________________
	puts [join [disasm $code] \n]
	puts ________________________
    }

    # Ensure that the unpacked information is not shared
    unset state

    # Internal flags for optimal handling of the nonterminal
    # cache. Avoid multiple unpacking of the dictionary, and avoid
    # repacking if it was not modified.

    set ncunpacked 0
    set ncmodified 0
    set tmunpacked 0

    while {1} {
	# Stop execution if the specified number of instructions have
	# been executed. Ignore if infinity was specified.
	if {$steps == 0} break
	if {$steps > 0} {incr steps -1}

	# Get current instruction ...

	if 0 {puts .$pc:\t$iname([lindex $asm $pc])}
	if 0 {puts -nonewline .$pc:\t$iname([lindex $asm $pc])}

	set insn [lindex $asm $pc] ; incr pc

	# And its arguments ...

	set an [lindex $anum $insn]
	if {$an == 1} {
	    set a [lindex $asm $pc] ; incr pc
	    if 0 {puts \t<$a>}
	} elseif {$an == 2} {
	    set a [lindex $asm $pc] ; incr pc
	    set b [lindex $asm $pc] ; incr pc
	    if 0 {puts \t<$a|$b>}
	} elseif {$an == 3} {
	    set a [lindex $asm $pc] ; incr pc
	    set b [lindex $asm $pc] ; incr pc
	    set c [lindex $asm $pc] ; incr pc
	    if 0 {puts \t<$a|$b|$c>}
	} ;# else {puts ""}

	# Dispatch to implementation of the instruction ...

	# Separate if commands are used for easier ordering of the
	# dispatch. The order of the branches should be frequency
	# coded to have the most frequently used instructions first.

	# ict_advance <a:message>
	if {$insn == 0} {
	    if 0 {puts \t\[$cl|[llength $tc]|$eof\]}
	    incr cl
	    if {$cl < [llength $tc]} {
		if 0 {puts \tConsume}

		set ct [lindex $tc $cl 0]
		set ok 1
		set er {}
	    } elseif {$eof} {
		if 0 {puts \tFail<Eof>}

		# We have no input, and there won't be more coming in
		# either. Fail the advance. We do _not_ stop the match
		# loop, the program has to complete. The failure might
		# be no such, revealed during backtracking. The current
		# location is not rewound automatically, this is the
		# responsibility of any backtracking.

		set er  [list $cl [list $a]]
		set ok  0
	    } else {
		if 0 {puts \tSuspend&Wait}

		# We have no input, stop matching and wait for
		# more. We reset the machine into a state
		# which will restart this instruction when
		# execution resumes.

		incr cl -1
		incr pc -2 ; # code and message argument
		break
	    }
	    if 0 {puts .Next}
	    continue
	}

	# ict_match_token <a:token> <b:message>
	if {$insn == 1} {
	    if {[llength $tokmap]} {
		if {!$tmunpacked} {
		    array set tm $tokmap
		    set tmunpacked 1
		}
		set ok [expr {$a == $tm($ct)}]
	    } else {
		set xch [lindex $pool $a]
		set ok  [expr {$xch eq $ct}]
	    }
	    if {!$ok} {
		set er [list $cl [list $b]]
	    } else {
		set er {}
	    }
	    continue
	}

	# ict_match_tokrange <a:tokstart> <b:tokend> <c:message>
	if {$insn == 2} {
	    if {[llength $tokmap]} {
		if {!$tmunpacked} {
		    array set tm $tokmap
		    set tmunpacked 1
		}
		set x $tm($ct)
		set ok [expr {($a <= $x) && ($x <= $b)}]
	    } else {
		set a [lindex $pool $a]
		set b [lindex $pool $b]
		set ok [expr {
		    ([string compare $a $ct] <= 0) &&
		    ([string compare $ct $b] <= 0)
		}] ; # {}
	    }
	    if {!$ok} {
		set er [list $cl [list $c]]
	    } else {
		set er {}
	    }
	    continue
	}

	# ict_match_tokclass <a:code> <b:message>
	if {$insn == 3} {
	    set strcode [lindex $tclass $a]
	    set ok   [string is $strcode -strict $ct]
	    if {!$ok} {
		set er [list $cl [list $b]]
	    } else {
		set er {}
	    }
	    continue
	}

	# inc_restore <a:branchtarget> <b:nonterminal>
	if {$insn == 4} {
	    set sym [lindex $pool $b]

	    # Unpack the cache dict, only here.
	    # 8.5 - Use dict operations instead.

	    if {!$ncunpacked} {
		array set ncc $nc
		set ncunpacked 1
	    }

	    if {[info exists ncc($cl,$sym)]} {
		foreach {go ok error sv} $ncc($cl,$sym) break

		# Go forward, as the nonterminal matches (or not).
		set cl $go
		set pc $a
	    }
	    continue
	}

	# inc_save <a:nonterminal>
	if {$insn == 5} {
	    set sym [lindex $pool $a]
	    set at  [lindex $ls end]
	    set ls  [lrange $ls 0 end-1]

	    # Unpack, modify, only here.
	    # 8.5 - Use dict operations instead.

	    if {!$ncunpacked} {
		array set ncc $nc
		set ncunpacked 1
	    }

	    set ncc($at,$sym) [list $cl $ok $er $sv]
	    set ncmodified 1
	    continue
	}

	# icf_ntcall <a:branchtarget>
	if {$insn == 6} {
	    lappend rs $pc
	    set     pc $a
	    continue
	}

	# icf_ntreturn
	if {$insn == 7} {
	    set pc [lindex $rs end]
	    set rs [lrange $rs 0 end-1]
	    continue
	}

	# iok_ok
	if {$insn == 8} {
	    set ok 1
	    continue
	}

	# iok_fail
	if {$insn == 9} {
	    set ok 0
	    continue
	}

	# iok_negate
	if {$insn == 10} {
	    set ok [expr {!$ok}]
	    continue
	}

	# icf_jalways <a:branchtarget>
	if {$insn == 11} {
	    set pc $a
	    continue
	}

	# icf_jok <a:branchtarget>
	if {$insn == 12} {
	    if {$ok} {set pc $a}
	    # !ok => pc is already on next instruction.
	    continue
	}

	# icf_jfail <a:branchtarget>
	if {$insn == 13} {
	    if {!$ok} {set pc $a}
	    # ok => pc is already on next instruction.
	    continue
	}

	# icf_halt
	if {$insn == 14} {
	    set halt 1
	    break
	}

	# icl_push
	if {$insn == 15} {
	    lappend ls $cl
	    continue
	}

	# icl_rewind
	if {$insn == 16} {
	    set cl [lindex $ls end]
	    set ls [lrange $ls 0 end-1]
	    continue
	}

	# icl_pop
	if {$insn == 17} {
	    set ls [lrange $ls 0 end-1]
	    continue
	}

	# ier_push
	if {$insn == 18} {
	    lappend es $er
	    continue
	}

	# ier_clear
	if {$insn == 19} {
	    set er {}
	    continue
	}

	# ier_nonterminal <a:nonterminal>
	if {$insn == 20} {
	    if {[llength $er]} {
		set  pos [lindex $ls end]
		incr pos
		set eloc [lindex $er 0]
		if {$eloc == $pos} {
		    set er [list $eloc [list $a]]
		}
	    }
	    continue
	}

	# ier_merge
	if {$insn == 21} {
	    set old [lindex $es end]
	    set es  [lrange $es 0 end-1]

	    # We have either old or current error data, keep it.

	    if {![llength $er]} {
		# No current data, keep old
		set er $old
	    } elseif {[llength $old]} {
		# If one of the errors is further on in the input
		# choose that as the information to propagate.

		foreach {loe msgse} $er  break
		foreach {lon msgsn} $old break

		if {$lon > $loe} {
		    set er $old
		} elseif {$loe == $lon} {
		    # Equal locations, merge the message lists.

		    foreach m $msgsn {lappend msgse $m}
		    set er [list $loe [lsort -uniq $msgse]]
		}
		# else lon < loe - er is better - nothing
	    }
	    # else - !old, but er - nothing

	    continue
	}

	# isv_clear
	if {$insn == 22} {
	    set sv {}
	    continue
	}

	# isv_terminal (implied ias_push)
	if {$insn == 23} {
	    set sv [list {} $cl $cl]
	    lappend as $sv
	    continue
	}

	# isv_nonterminal_leaf <a:nonterminal>
	if {$insn == 24} {
	    set pos [lindex $ls end]
	    set sv  [list $a $pos $cl]
	    continue
	}

	# isv_nonterminal_range <a:nonterminal>
	if {$insn == 25} {
	    set pos [lindex $ls end]
	    set sv  [list $a $pos $cl [list {} $pos $cl]]
	    continue
	}

	# isv_nonterminal_reduce <a:nonterminal>
	if {$insn == 26} {
	    set pos [lindex $ls end]
	    if {[llength $ms]} {
		set  mrk [lindex $ms end]
		incr mrk
	    } else {
		set mrk 0
	    }
	    set sv [lrange $as $mrk end]
	    set sv [linsert $sv 0 $a $pos $cl]
	    continue
	}

	# ias_push
	if {$insn == 27} {
	    lappend as $sv
	    continue
	}

	# ias_mark
	if {$insn == 28} {
	    set  mark [llength $as]
	    incr mark -1
	    lappend ms $mark
	    continue
	}

	# ias_mrewind
	if {$insn == 29} {
	    set mark [lindex $ms end]
	    set ms   [lrange $ms 0 end-1]
	    set as   [lrange $as 0 $mark]
	    continue
	}

	# ias_mpop
	if {$insn == 30} {
	    set ms [lrange $ms 0 end-1]
	    continue
	}

	return -code error "Illegal instruction $insn"
    }

    # Repack a modified cache dictionary, then repack and store the
    # updated state value.

    if 0 {puts .Repackage\ state}

    if {$ncmodified} {set nc [array get ncc]}
    set state [list $code $pc $halt $eof $tc $cl $ct $ok $sv $er $ls $as $ms $es $rs $nc]
    return
}

namespace eval grammar::me::cpu::core {
    # Map between class codes and names
    variable tclass {}
    variable tccode

    foreach {x code} {
	0 alnum
	1 alpha
	2 digit
	3 xdigit
	4 punct
	5 space
    } {
	lappend tclass $code
	set tccode($code) $x
    }

    # Number of arguments per ME instruction.
    # Indexed by instruction code.
    variable anum {}

    # Mapping between instruction codes and names.
    variable iname

    foreach {z insn x notes} {
	0  ict_advance            1	{-- TESTED}
	1  ict_match_token        2	{-- TESTED}
	2  ict_match_tokrange     3	{-- TESTED}
	3  ict_match_tokclass     2	{-- TESTED}
	4  inc_restore            2	{-- TESTED}
	5  inc_save               1	{-- TESTED}
	6  icf_ntcall             1	{-- TESTED}
	7  icf_ntreturn           0	{-- TESTED}
	8  iok_ok                 0	{-- TESTED}
	9  iok_fail               0	{-- TESTED}
	10 iok_negate             0	{-- TESTED}
	11 icf_jalways            1	{-- TESTED}
	12 icf_jok                1	{-- TESTED}
	13 icf_jfail              1	{-- TESTED}
	14 icf_halt               0	{-- TESTED}
	15 icl_push               0	{-- TESTED}
	16 icl_rewind             0	{-- TESTED}
	17 icl_pop                0	{-- TESTED}
	18 ier_push               0	{-- TESTED}
	19 ier_clear              0	{-- TESTED}
	20 ier_nonterminal        1	{-- TESTED}
	21 ier_merge              0	{-- TESTED}
	22 isv_clear              0	{-- TESTED}
	23 isv_terminal           0	{-- TESTED}
	24 isv_nonterminal_leaf   1	{-- TESTED}
	25 isv_nonterminal_range  1	{-- TESTED}
	26 isv_nonterminal_reduce 1	{-- TESTED}
	27 ias_push               0	{-- TESTED}
	28 ias_mark               0	{-- TESTED}
	29 ias_mrewind            0	{-- TESTED}
	30 ias_mpop               0	{-- TESTED}
    } {
	lappend anum $x
	set iname($z) $insn
	set iname($insn) $z
    }
}

# ### ### ### ######### ######### #########
## Helper commands ((Dis)Assembler, runtime).

proc ::grammar::me::cpu::core::INDEX {list i label} {
    if {$i eq "end"} {
	set i [expr {[llength $list] - 1}]
    } elseif {[regexp {^end-([0-9]+)$} $i -> n]} {
	set i [expr {[llength $list] - $n -1}]
    }
    if {
	![string is integer -strict $i] ||
	($i < 0) ||
	($i >= [llength $list])
    } {
	return -code error "$label $i"
    }
    return $i
}

proc ::grammar::me::cpu::core::K {x y} {set x}

proc ::grammar::me::cpu::core::Str {str} {
    upvar 1 pool pool poolh poolh
    if {![info exists poolh($str)]} {
	set poolh($str) [llength $pool]
	lappend pool $str
    }
    return $poolh($str)
}

proc ::grammar::me::cpu::core::Tok {str} {
    upvar 1 tokmap tokmap ord ord plain plain

    if {[regexp {^([^:]+):(.+)$} $str -> id name]} {
	if {$plain} {
	    return -code error "Bad assembly, mixing plain and ranked tokens"
	}
	if {[info exists ord($id)]} {
	    return -code error "Bad assembly, non-total ordering for $name and $ord($id), at rank $id"
	}
	set ord($id) $name
	set tokmap($name) $id

	return $id
    } else {
	if {[array size ord]} {
	    return -code error "Bad assembly, mixing plain and ranked tokens"
	}
	set plain 1
	return [uplevel 1 [list Str $str]]
    }
}

proc ::grammar::me::cpu::core::Validate {code {ovar {}} {tvar {}} {jvar {}}} {
    variable anum
    variable iname

    # Basic validation of structure ...

    if {[llength $code] != 3} {
	return -code error "Bad length"
    }

    foreach {asm pool tokmap} $code break

    if {[llength $tokmap] % 2 == 1} {
	return -code error "Bad tokmap, expected a dictionary"
    }

    array set ord {}
    if {[llength $tokmap] > 0} {
	foreach {tok rank} $tokmap {
	    if {[info exists ord($rank)]} {
		return -code error "Bad tokmap, non-total ordering for $tok and $ord($rank), at rank $rank"
	    }
	    set ord($rank) $tok
	}
    }

    # Basic validation of ME code: Valid instructions, collect valid
    # branch target indices

    array set target {}

    set pc 0
    set pcend   [llength $asm]
    set poolend [llength $pool]

    while {$pc < $pcend} {
	set target($pc) .

	set insn [lindex $asm $pc]
	if {($insn < 0) || ($insn > 30)} {
	    return -code error "Invalid instruction $insn at PC $pc"
	}

	incr pc
	incr pc [lindex $anum $insn]
    }

    if {$pc > $pcend} {
	return -code error "Bad program, last instruction $insn ($iname($insn)) is truncated"
    }

    # Validation of ME instruction arguments (pool references, branch
    # targets, ...)

    if {$jvar ne ""} {
	upvar 1 $jvar jmp
    }
    array set jmp {}

    set pc 0
    while {$pc < $pcend} {
	set base $pc
	set insn [lindex $asm $pc] ; incr pc
	set an   [lindex $anum $insn]

	if {$an == 1} {
	    set a [lindex $asm $pc] ; incr pc
	} elseif {$an == 2} {
	    set a [lindex $asm $pc] ; incr pc
	    set b [lindex $asm $pc] ; incr pc
	} elseif {$an == 3} {
	    set a [lindex $asm $pc] ; incr pc
	    set b [lindex $asm $pc] ; incr pc
	    set c [lindex $asm $pc] ; incr pc
	}

	switch -exact $insn {
	    0 - 5 - 20 - 24 - 25 - 26 -
	    a/string {
		if {($a < 0) || ($a >= $poolend)} {
		    return -code error "Invalid string reference $a for instruction $insn ($iname($insn)) at $base"
		}
	    }
	    1 {
		# a/tok b/string
		if {![llength $tokmap]} {
		    if {($a < 0) || ($a >= $poolend)} {
			return -code error "Invalid string reference $a for instruction $insn ($iname($insn)) at $base"
		    }
		} else {
		    if {![info exists ord($a)]} {
			return -code error "Invalid token rank $a for instruction $insn ($iname($insn)) at $base"
		    }
		}
		if {($b < 0) || ($b >= $poolend)} {
		    return -code error "Invalid string reference $b for instruction $insn ($iname($insn)) at $base"
		}
	    }
	    2 {
		# a/tokstart b/tokend c/string

		if {![llength $tokmap]} {
		    # a = b = string references.
		    if {($a < 0) || ($a >= $poolend)} {
			return -code error "Invalid string reference $a for instruction $insn ($iname($insn)) at $base"
		    }
		    if {($b < 0) || ($b >= $poolend)} {
			return -code error "Invalid string reference $b for instruction $insn ($iname($insn)) at $base"
		    }
		    if {$a == $b} {
			return -code error "Invalid single-token range for instruction $insn ($iname($insn)) at $base"
		    }
		    if {[string compare [lindex $pool $a] [lindex $pool $b]] > 0} {
			return -code error "Invalid empty range for instruction $insn ($iname($insn)) at $base"
		    }
		} else {
		    # tokmap defined: a = b = order rank.
		    if {![info exists ord($a)]} {
			return -code error "Invalid token rank $a for instruction $insn ($iname($insn)) at $base"
		    }
		    if {![info exists ord($b)]} {
			return -code error "Invalid token rank $b for instruction $insn ($iname($insn)) at $base"
		    }
		    if {$a == $b} {
			return -code error "Invalid single-token range for instruction $insn ($iname($insn)) at $base"
		    }
		    if {$a > $b} {
			return -code error "Invalid empty range for instruction $insn ($iname($insn)) at $base"
		    }
		}
		if {($c < 0) || ($c >= $poolend)} {
		    return -code error "Invalid string reference $c for instruction $insn ($iname($insn)) at $base"
		}
	    }
	    3 {
		# a/class(0-5) b/string
		if {($a < 0) || ($a > 5)} {
		    return -code error "Invalid token-class $a for instruction $insn ($iname($insn)) at $base"
		}
		if {($b < 0) || ($b >= $poolend)} {
		    return -code error "Invalid string reference $b for instruction $insn ($iname($insn)) at $base"
		}
	    }
	    4 {
		# a/branch b/string
		if {![info exists target($a)]} {
		    return -code error "Invalid branch target $a for instruction $insn ($iname($insn)) at $base"
		} else {
		    set jmp($a) .
		}
		if {($b < 0) || ($b >= $poolend)} {
		    return -code error "Invalid string reference $b for instruction $insn ($iname($insn)) at $base"
		}
	    }
	    6 - 11 - 12 - 13 -
	    a/branch {
		if {![info exists target($a)]} {
		    return -code error "Invalid branch target $a for instruction $insn ($iname($insn)) at $base"
		} else {
		    set jmp($base) $a
		}
	    }
	    default {}
	}
    }

    # All checks passed, code is deemed good enough.
    # Caller may have asked for some of the collected
    # information.

    if {$ovar ne ""} {
	upvar 1 $ovar o
	array set o [array get ord]
    }
    if {$tvar ne ""} {
	upvar 1 $tvar t
	array set t [array get target]
    }
    return
}

# ### ### ### ######### ######### #########
## Ready

package provide grammar::me::cpu::core 0.2