Check-in [67b989853b]

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:OOPS! Didn't add the new 'jumpthread.tcl' module! Remove the old 'nodesplit.tcl', and remove the 'renameTemps' pass since 'nodesplit' was the only thing that ever depended on it.
Timelines: family | ancestors | descendants | both | notworking | kbk-jumpthread
Files: files | file ages | folders
SHA3-256: 67b989853b4d87b43a290b5f999d59bce09f9f88602e076e329f3df7a2625de3
User & Date: kbk 2018-12-10 04:09:31.416
Context
2018-12-10
04:51
Remove vestiges of the old node splitter from 'inline.tcl'. Make console dribble in 'jumpthread.tcl' contingent on debug-jumpthread. check-in: fd2ea3e6f1 user: kbk tags: notworking, kbk-jumpthread
04:09
OOPS! Didn't add the new 'jumpthread.tcl' module! Remove the old 'nodesplit.tcl', and remove the 'renameTemps' pass since 'nodesplit' was the only thing that ever depended on it. check-in: 67b989853b user: kbk tags: notworking, kbk-jumpthread
01:34
Calculation of anticipable tests for jump threading check-in: f138663832 user: kbk tags: notworking, kbk-jumpthread
Changes
Unified Diff Ignore Whitespace Patch
Added quadcode/jumpthread.tcl.


















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
# jumpthread.tcl --
#
#	Compiler passes to perform jump threading on quadcode.
#
# Copyright (c) 2018 by Kevin B. Kenny
#
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
#
#------------------------------------------------------------------------------

# Jump threading is a surprisingly important optimization in the
# processing of quadcode. The reason is that it allows for separation
# of paths that would otherwise require boxing and unboxing of values
# at every access.  Consider, for instance, a procedure like:
#
#   proc processRange {a b} {
#       for {set i $a} {$i < $b} {incr i} {
#           doSomethingWith $i
#       }
#   }
#
# At the entry to the [for] loop, nothing is known about the values of
# $a and $b.  The comparison {$i < $b} will therefore be expensive,
# requiring that the types of both $i and $b be identified and either
# string or numeric comparision be performed according to the
# type. Moreover, on return to the top of the loop - after [incr i]
# has been guarantted to produce an integer - in order to keep types
# consistent at the phi operation at the type of the loop, $i will be
# widened to a string again, forcing it to be boxed in a Tcl object.
#
# While the particular case of $i could be addressed by 'loop
# peeling', duplicating the loop body so that the problematic first
# iteration executes separately from the others, the comparison
# {$i < $b} is not helped by loop peeling. On each trip through the loop,
# $b's type will be checked, and its value (which is almost certain
# to be an integer) will be unboxed.
#
# Jump threading addresses this issue by splitting the path from the
# detection of the type of $b to the next use of $b, so that numeric
# and non-numeric values for $b will take separate paths throughout
# the code.
#
# The drawback to jump threading is that it can possibly result in a
# combinatorial explosion in code volume. For this reason, there need
# to be safety checks and triage to only the most promising
# opportunities.
#
#
# Most of the algorithms used in this module derive indirectly from:
#
# [Prie17] Priesner, Joachim. 'Generalized jump threading in libFIRM."
# Masterarbeit, Fakultät für Informatik, Institut für
# Programmstrukturen und Dantenorganisation (IPD), Karlsruher Institut
# für Technologie (January 2017).
# https://pp.ipd.kit.edu/uploads/publikationen/priesner17masterarbeit.pdf
#
# Priesner's work, however, deals chiefly with threading of
# conditional branches and conditions involving Presburger arithmetic,
# rather than type assertions surrounding values in a dynamic
# language, so a fair amount of rethinking is present here. Instead of
# considering a threading opportunity as a sequence of basic blocks
# (with a conditional jump at the penultimate block), the logic here
# considers a threading opportunity in terms of a sequence of
# operations (reduced to a walk in the flowgraph) from an assignment
# to a value that gives it a known type (possibly this can be expanded
# to other constraints) to a use of the value that can be deleted from
# the program if a given constraint is satisfied. The basic data flow
# analysis, however - accumulate anticipated decisions from back to
# front in the program, and then accumulate threading opportunities
# from front to back - follows the general ideas in Priesner's thesis.

# Map from instructions to the types that trigger their removal.
# An instruction is removable if its sole operand matches the
# type expression 'is $TYPE' or 'isnot $TYPE'

namespace eval quadcode {

    # jt_removable carries the instructions that jump threading is trying
    # to let the optimizer rewrite, together with they type conditions
    # that the instructions are testing.

    variable jt_removable

    proc init {} {

        variable jt_removable
        namespace upvar ::quadcode::dataType \
            ARRAY ARRAY CONST0 CONST0 FAIL FAIL IMPURE IMPURE NEXIST NEXIST

        dict set jt_removable "arrayExists" \
            [list [list is $ARRAY] [list isnot $ARRAY]]

        dict set jt_removable "exists" \
            [list [list is $NEXIST] [list isnot $NEXIST]]

        dict set jt_removable "initArrayIfNotExists" \
            [dict get $jt_removable "exists"]

        dict set jt_removable "initIfNotExists" \
            [dict get $jt_removable "exists"]

        dict set jt_removable "jumpFalse" \
            [list [list is $CONST0] [list isnot $CONST0]]

        dict set jt_removable "jumpMaybe" \
            [list [list is $FAIL] [list isnot $FAIL]]

        dict set jt_removable "jumpTrue" \
            [dict get $jt_removable "jumpFalse"]

        dict set jt_removable "purify" \
            [list [list isnot $IMPURE]]

        rename init {}
    }

    init
}

# quadcode::transformer method jumpthread --
#
#	Performs jump threading on a quadcode sequence.
#
# Results:
#
#	Returns 1 if the sequence was modified, 0 otherwise.
#
# Side effects:
#
#	Performs nearly arbitrary surgery on the sequence. While ud-
#	and du-chains are kept up to date, and critical edges will be
#	split, the dominance tree will need to be rebuilt, and the
#	resulting program may contain unreachable code, basic blocks
#	subject to coalescence, constants that need folding, redundant
#	conditional jumps, chains of copy operations, and similar
#	messes that need tidying. Type analysis will also need to be
#	repeated.

oo::define quadcode::transformer method jumpthread {} {

    my debug-jumpthread {
	puts "Before jump threading"
	my dump-bb
    }

    # Unpack phi operations into the jt_phis, which is a multilevel
    # dictionary. [dict get $jt_phis $b $v $p], where $b is a basic block
    # number, $v is a variable and $p is the basic block number of a
    # predecessor of $b, identifies the data source in $p that corresponds to
    # variable $v in $b.

    my jt_unpackPhis

    # Identify sets of conditions that may benefit from threading.  The
    # conditionals appear in the dictionary jt_condition and are identified by
    # number. The anticipability of the conditions is tracked in jt_antin,
    # which records what conditions are anticipable at the start of each basic
    # block.

    my jt_backward

    # Identify which subsets of the conditions are reachable on specific
    # control flow paths, so that blocks can be replicated to have known
    # entry conditions. Also report the (up to two) successors for each
    # variant block

    my jt_forward

    # TODO: Once all the variants have been listed, if any block has more than
    #       one variant, deconstruct SSA.  Replicate the blocks into variants,
    #       redirecting their exits as needed (and tracking preds). Sort the
    #       blocks.  Reconstruct SSA, solve ud- and du-chains, propagate
    #       copies, remove unreachable code, and recalculate bbidom/bblevel.
    #	    (May want to inspect the result to see whether another try at
    #	    loop inversion might help.)

    my debug-jumpthread {
        puts "NOT DONE!"
    }

    # Clean up the working storage

    my jt_cleanup

    return 0
}

# quadcode::transformer method jt_unpackPhis --
#
#	Unpacks phi operations for fast lookup when doing jump threading.
#
# Results:
#
#	None.
#
# Side effects:
#
#	Creates a multilevel dictionary $jt_phis. If value $v is the result of
#	a phi in basic block $b, and $p is a predecessor block of $b, then
#	[dict get $jt_phis $b $v $p] will give the corresponding value in $p.

oo::define quadcode::transformer method jt_unpackPhis {} {

    my variable jt_phis

    set jt_phis {}

    set b -1
    foreach bb $bbcontent {
	incr b

	set pc -1
	foreach q $bb {
	    incr pc

	    if {[lindex $q 0 0] ne "phi"} break

	    set v [lindex $q 1]
	    foreach {source w} [lrange $q 2 end] {
		set p [lindex $source 1]
		dict set jt_phis $b $v $p $w
	    }
	}
    }

    return

}

# quadcode::transformer method jt_backward --
#
#	Perform one or more passes of backward data flow analysis
#	in support of jump threading.
#
# Results:
#	None.
#
# Side effects:
#
#	Constructs the list, jt_antin, indexed by basic block number,
#	containing dictionaries.  The dictionaries describe the conditions
#	that will inform jump threading downstream of the entry to the basic
#	blocks. The dictionaries have two levels. The first level key gives
#	the name of a value in the quadcode, and the second gives a condition
#	on that value's type.  It is a combination of
#	

oo::define quadcode::transformer method jt_backward {} {

    namespace upvar ::quadcode jt_removable jt_removable

    my variable jt_antin


    set jt_antin [lrepeat [llength $bbcontent] {}]

    set changed 1
    while {$changed} {
        set changed 0

        my debug-jumpthread {
            puts "Start a pass of anticipability for jump threading"
        }

        foreach b [my bbrorder] {
            set bb [lindex $bbcontent $b]

            my debug-jumpthread {
                puts "bb $b:"
            }
            # Construct the conditions anticipable on output. It is
            # possible that the conditions will refer to literals,
            # in which case any possible threading opportunity will begin
            # on the exit from this block to the successor.
            set antout {}
            foreach s [my bbsucc $b] {
                dict for {w conds} [lindex $jt_antin $s] {
                    set v [my jt_translate_phi $s $w $b]
                    dict for {c -} $conds {
                        dict set antout $v $c {}
                    }
                }
            }

            # Construct the conditions anticipable on input. Begin by
            # filtering any constant conditions out of the output conditions.

            set antin {}
            dict for {v conds} $antout {
                if {[lindex $v 0] in {"temp" "var"}} {
                    dict set antin $v $conds
                }
            }

            # Run backward through the instructions in the current block.
            # Remove any conditions that depend on instructions in the
            # block. Add any conditions that inform the removal of instructions
            # in the block.

            set pc [llength $bb]
            while {$pc > 0} {
                incr pc -1
                set q [lindex $bb $pc]
                lassign $q opcode dest source1
                set op [lindex $opcode 0]
                switch -exact -- $op {

                    phi {

                        # At a phi, we're done with the content of this
                        # block.
                        break
                    }

                    copy {

                        # For a copy, the conditions on the destination
                        # turn into conditions on the source
                        set conds {}
                        if {[dict exists $antin $dest]} {
                            foreach {c -} [dict get $antin $dest] {
                                dict set antin $source1 $c {}
                            }
                            dict unset antin $dest
                        }
                    }

                    instanceOf {

                        # For instanceOf, the conditions are that the
                        # value is definitely/is definitely not an
                        # instance of the given type.
                        set wanted [lindex $opcode 1]
                        dict set antin $source1 [list is $wanted] {}
                        dict set antin $source1 [list isnot $wanted] {}
                        dict unset antin $dest
                    }

                    default {

                        # Otherwise, if this is an instruction that might
                        # be removed depending on the type of its operand,
                        # record what that type is.
                        if {[dict exists $jt_removable $op]} {
                            foreach c [dict get $jt_removable $op] {
                                dict set antin $source1 $c {}
                            }
                        }
                        dict unset antin $dest
                    }
                }
            }

            if {$antin ne [lindex $jt_antin $b]} {
                lset jt_antin $b $antin
                set changed 1
                my debug-jumpthread {
                    puts "  $b: anticipable conditions:"
                    dict for {v conds} $antin {
                        dict for {c -} $conds {
                            lassign $c what type
                            puts "    $v $what $type ([nameOfType $type])"
                        }
                    }
                }
            }
        }
    }

    return
}

# quadcode::transformer method jt_translate_phi --
#
#	Given a variable in a successor block, finds out what the
#	corresponding variable in the predecessor block is.
#
# Parameters:
#	b - Successor block
#	v - Variable name
#	p - Predecessor block
#
# Results:
#	Returns the name of the corresponding variable in the predecessor

oo::define quadcode::transformer method jt_translate_phi {b v p} {

    my variable jt_phis
    
    if {[dict exists $jt_phis $b $v $p]} {
        return [dict get $jt_phis $b $v $p]
    } else {
        return $v
    }
}

# quadcode::translate method jt_forward --
#
#	Works through the forward propagation of knowledge about the
#	program to determine what sets of conditions should be assumed
#	in basic blocks prior to attempting to split them.
#
# Results:
#	None.
#
# Side effects:
#
#	A list of dictionaries, 'jt_variants' is created. The list is
#	indexed by basic block number. The list members are dictionaries,
#	whose keys are bit vectors identifying which of the anticipibale
#	conditions is true on entry to the block, and whose values are
#	immaterial on return from this procedure.

oo::define quadcode::transformer method jt_forward {} {

    # jt_stack is a list of alternating basic block number and
    # condition bit vector, used to track work that still needs to be
    # done.
    my variable jt_stack
    my variable jt_variants

    # Initially, the work list contains just the entry node, and
    # nothing is known on entry to it (there shouldn't
    # be any anticipated conditions there!)
    set jt_stack [list 0 0]
    set jt_variants [lrepeat [llength $bbcontent] {}]
    lset jt_variants [dict create 0 -1]
    
    # Pop entries off the worklist and process them

    while {[llength $jt_stack] > 0} {
        set b [lindex $jt_stack end-1]
        set condMask [lindex $jt_stack end]
        set jt_stack [lreplace $jt_stack[set jt_stack ""] end-1 end]

        my jt_forward_worker $b $condMask
    }

    return
}

# quadcode::transformer method jt_forward_worker --
#
#	Performs forward jump threading analysis through one basic
#	block, propagating facts into the successors.
#
# Parameters:
#	b - Basic block being analyzed
#	mask - Mask identifying the conditions that are promised on
#	       entry to the block.
#
# Results:
#	None.
#
# Side effects:
#	For each successor to the block, propagates the promised
#	conditions forward into the successor. If the successor has
#	a set of conditions that has not yet been visited, adds it
#	to 'jt_variants' and stacks it for processing.

oo::define quadcode::transformer method jt_forward_worker {b mask} {

    my variable jt_antin
    set antin [lindex $jt_antin $b]
    set mask_expanded {}

    my debug-jumpthread {
        puts "  bb $b"
    }

    # Expand the mask to the list of assertions.
    
    set asserted {}
    set bit 1
    dict for {v conds} $antin {
        dicr for {c -} $conds {
            if {$mask & $bit} {
                dict set asserted $v $c {}
            }
            set bit [expr {$bit << 1}]
        }
    }
    my debug-jumpthread {
        puts "    asserted on entry:"
        dict for {v conds} $asserted {
            dict for {c -} $conds {
                puts "        $v $c"
            }
        }
    }
    
    # Walk through the quads of the block, applying the assertions
    # to the types of the results, accumulating a private set of
    # types.

    set localtypes {}
    set bb [lindex $bbcontent $b]
    set pc -1
    foreach q $bb {
        incr pc

        lassign $q opcode result operand1
        set op [lindex $opcode 0]
        
        # Disregard quads that do not yield a result
        if {[lindex $result 0] ni {"temp" "var"}} {
            continue
        }

        # Narrow the result type to conform with any assertions.
        set ty [::quadcode::typeOfOperand $types $result]
        if {$op eq "copy" && [dict exists $asserted $operand1]} {
            dict set asserted $result [dict get $asserted $operand1]
        }
        if {[dict exists $asserted $result]} {
            set ty [my jt_applyAssertions $ty [dict get $asserted $result]]
        }

        puts "    type of $result is [format %#x $ty]\
                  ([quadcode::nameOfType $ty])"
        dict set localtypes $result $ty

    }

    # TODO - Look at and phi-translate the ANTIN for each successor block.
    #        Compare against first localtypes, then typeOfOperand, to
    #	     get the type of the input. Find out which constraints the
    #        computed type satisfies, and make a mask for them. Queue the
    #        correct variant of the successor block.
    
    
}

# quadcode::transformer method jt_applyAssertions --
#
#	Apply assertions about a value to its type descriptor to produce
#	a narrowed type
#
# Parameters:
#	ty - Type to narrow
#	as - Assertions to apply
#
# Results:
#	Returns the narrowed type.

oo::define quadcode::transformer method jt_applyAssertions {ty as} {

    namespace upvar quadcode::dataTypes \
        ARRAY ARRAY FAIL FAIL IMPURE IMPURE NEXIST NEXIST

    dict for {c -} $as {
        lassign $c kind type
        switch -exact -- $kind {
            is {
                set mask $type
            }
            isnot {
                set mask [quadcode::dataTypes::allbut $type]
            }
        }
        if {$type & ~($ARRAY | $FAIL | $IMPURE | $NEXIST)} {
            set mask [expr {$mask &~ $IMPURE}]
        }
        set ty [expr {$ty & $mask}]
            
    }

    return $ty

}

# quadcode::transformer method jt_cleanup --
#
#	Cleans up working storage after the jump threading pass.
#
# Results:
#	None.

oo::define quadcode::transformer method jt_cleanup {} {

    my variable jt_antin
    my variable jt_phis
    my variable jt_stack
    my variable jt_variants

    unset -nocomplain jt_antin
    unset -nocomplain jt_phis
    unset -nocomplain jt_stack
    unset -nocomplain jt_variants

    return
}

# Local Variables:
# mode: tcl
# fill-column: 78
# auto-fill-function: nil
# buffer-file-coding-system: utf-8-unix
# indent-tabs-mode: nil
# End:
Deleted quadcode/nodesplit.tcl.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
# nodesplit.tcl --
#
#	Code to update quadcode sequences by node splitting - to attempt
#	to isolate paths that operate on different data types, particularly
#	within loops.
#
# Copyright (c) 2017 by Kevin B. Kenny
#
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
#
#------------------------------------------------------------------------------

#-----------------------------------------------------------------------------
#
# Why do we want to do node splitting on quadcode sequences, and what do
# we expect the results to be?
#
# A program like:
#
#	proc x {a b c} {
#	    set x 0
#	    for {set i $a} {$i < $b} {incr i $c} {
#               set x [expr {$x + $i}]
#           }
#	    return $x
#       }
#
# gives rise to performance trouble at runtime. It turns into code like
# the following. For clarity, error handling code is omitted, and the
# dummy blocks that result from critical edge splitting (and eventually
# acquire type widening and memory management code) also are not shown.
#
#-----------------------------------------------------------------------------
#
# 1: a1 := args[0]		// STRING <- STRING
#    b1 := args[1]		// STRING <- STRING
#    c1 := args[2]              // STRING <- STRING
#
# 2: b2 := phi(b1{1}, b3{7})    // STRING <- STRING x IMPURE NUMERIC
#    c2 := phi(c1{1}, c7{7})	// STRING <- STRING x IMPURE NUMERIC
#    i2 := phi(a1{1}, i7{7})    // STRING <- STRING x NUMERIC
#    x2 := phi(0{1}, x5a{7})	// NUMERIC <- ZERO x NUMERIC
#    if i2 is not numeric goto error0 // <- STRING
#    goto 3
#
# 3: i3 := impure numeric(i2)	// IMPURE NUMERIC <- STRING
#    if b2 is not numeric goto error1 // <- STRING
#    goto 4
#
# 4: b4 := impure numeric(b2)	// IMPURE NUMERIC <- STRING
#    t4 := (i3 < b4)            // ZEROONE <- IMPURE NUMERIC x IMPURE NUMERIC
#    if t3 is false goto 7
#    goto 5
#
# 5: x5 = impure numeric(x2)	// IMPURE NUMERIC <- STRING
#    x5a := x5 + i3		// NUMERIC <- IMPURE NUMERIC x IMPURE NUMERIC
#    if c2 is not numeric goto error3 // <- STRING
#    goto 6
#
# 6: c6 := impure numeric(c2)	// IMPURE NUMERIC <- STRING
#    i6 := i3 + c6		// NUMERIC <- IMPURE NUMERIC x IMPURE NUMERIC
#    goto 2
#
# 7: return x2			// <- NUMERIC
#
#-----------------------------------------------------------------------------
#
# There are no fewer than three string-to-numeric conversions, all
# inside the loop. What's more, the three corresponding phi operations promote
# numerics back to strings. What we need is some way to make the string
# conversions happen on only the first and last iterations.
#
# The way we approach this is to do successive block splitting. Whenever
# we encounter values of incompatible types (defined as being a STRING
# and a non-STRING, or an IMPURE object and a pure one, or objects of
# two non-STRING types that will combine to STRING), we know that we likely
# can improve code by splitting the block. This process will unroll at least
# part of the loop, hoisting out the data conversions.
#
# Note that if we are splitting a multi-exit block, we must introduce
# additional dummy blocks because each exit will become a critical edge.
#
# Generally speaking, splitting blocks will give rise to many more
# opportunities to split. We continue until nothing can be split further.
# (There may be a need for a safety check to avoid infinite splits in
# certain pathological cases.)
#
# Let's see what happens as we do this. After one split, we see:
#
#-----------------------------------------------------------------------------
#
# 1: a1 := args[0]		// STRING <- STRING
#    b1 := args[1]		// STRING <- STRING
#    c1 := args[2]              // STRING <- STRING
#
# 2: if a1 is not numeric goto error0 // <- STRING
#    goto 3
#
# 3: b3:= phi(b1{2}, b4{8})    // STRING <- STRING x IMPURE NUMERIC
#    c3 := phi(c1{2}, c6{8})	// STRING <- STRING x IMPURE NUMERIC
#    i3 := phi(a1{2}, i6{8})    // STRING <- STRING x NUMERIC
#    x3 := phi(0{2}, x5a{8})	// NUMERIC <- ZERO x NUMERIC
#    i3a := impure numeric(i3)	// IMPURE NUMERIC <- STRING
#    if b3 is not numeric goto error1 // <- STRING
#    goto 4
#
# 4: b4 := impure numeric(b3)	// IMPURE NUMERIC <- STRING
#    t4 := (i3a < b4)            // ZEROONE <- IMPURE NUMERIC x IMPURE NUMERIC
#    if t4 is false goto 7
#    goto 5
#
# 5: x5 = impure numeric(x3)	// IMPURE NUMERIC <- STRING
#    x5a := x5 + i3a		// NUMERIC <- IMPURE NUMERIC x IMPURE NUMERIC
#    if c3 is not numeric goto error3 // <- STRING
#    goto 6
#
# 6: c6 := impure numeric(c3)	// IMPURE NUMERIC <- STRING
#    i6 := i3a + c6		// NUMERIC <- IMPURE NUMERIC x IMPURE NUMERIC
#    goto 8
#
# 7: return x3			// <- NUMERIC
#
# 8: if i6 is not numeric goto error0 // <- IMPURE NUMERIC
#    goto 3
#
#-----------------------------------------------------------------------------
#
# (There will also be phi's at the start of 'error0'. All error handling is
# omitted for clarity.)
#
# Note that the test in block 8 will always be false, and the existing
# optimizations in 'doTypeChecks' will eliminate it, and the block. But I
# defer reapplying the existing optimizations until all block splitting is
# complete.
#
# Block 3 now becomes a candidate for splitting. After the split, we have:
#
#-----------------------------------------------------------------------------
#
# 1: a1 := args[0]		// STRING <- STRING
#    b1 := args[1]		// STRING <- STRING
#    c1 := args[2]              // STRING <- STRING
#
# 2: if a1 is not numeric goto error0 // <- STRING
#    goto 3
#
# 3: i3 := impure numeric(a1)	// IMPURE NUMERIC <- STRING
#    if b1 is not numeric goto error1 // <- STRING
#    goto 4
#
# 4: b4 := phi(b1{3}, b4a{9})   // STRING <- STRING x IMPURE NUMERIC
#    c4 := phi(c1{3}, c7{9})	// STRING <- STRING x IMPURE NUMERIC
#    i4 := phi(i3{3}, i9{9})    // IMPURE NUMERIC <- IMPURE NUMERIC x NUMERIC
#    x4 := phi(0{3}, x6a{9})	// NUMERIC <- ZERO x NUMERIC
#    b4a := impure numeric(b4)	// IMPURE NUMERIC <- STRING
#    t4 := (i4 < b4)            // ZEROONE <- IMPURE NUMERIC x IMPURE NUMERIC
#    if t4 is false goto 7
#    goto 5
#
# 5: x5 = impure numeric(x4)	// IMPURE NUMERIC <- STRING
#    x5a := x5 + i4		// NUMERIC <- IMPURE NUMERIC x IMPURE NUMERIC
#    if c4 is not numeric goto error3 // <- STRING
#    goto 6
#
# 6: c6 := impure numeric(c4)	// IMPURE NUMERIC <- STRING
#    i6 := i4 + c6		// NUMERIC <- IMPURE NUMERIC x IMPURE NUMERIC
#    goto 8
#
# 7: return x4			// <- NUMERIC
#
# 8: if i6 is not numeric goto error0 // <- IMPURE NUMERIC
#    goto 10
#
# 9: i9 = impure numeric(i7)         // NUMERIC <- NUMERIC
#    if b4a is not numeric goto error1 // <- IMPURE NUMERIC
#    goto 4
#-----------------------------------------------------------------------------
#
# The whole of block 9 consists of redundant operations, which will be
# eliminated. More importantly, the test on a1 has been moved out of
# the loop and will be performed only once. We keep on splitting, hitting
# block 4 next. Phi operations are needed in blocks 5 and 7.
#
#-----------------------------------------------------------------------------
#
# 1: a1 := args[0]		// STRING <- STRING
#    b1 := args[1]		// STRING <- STRING
#    c1 := args[2]              // STRING <- STRING
#
# 2: if a1 is not numeric goto error0 // <- STRING
#    goto 3
#
# 3: i3 := impure numeric(a1)	// IMPURE NUMERIC <- STRING
#    if b1 is not numeric goto error1 // <- STRING
#    goto 4
#
# 4: b4 := impure numeric(b1)	// IMPURE NUMERIC <- STRING
#    t4 := (i3 < b4)            // ZEROONE <- IMPURE NUMERIC x IMPURE NUMERIC
#    if t4 is false goto 7
#    goto 5
#
# 5: b5 := phi(b4{4}, b10{10})  // STRING <- STRING x IMPURE NUMERIC
#    c5 := phi(c1{4}, c7{10})	// STRING <- STRING x IMPURE NUMERIC
#    i5 := phi(i3{4}, i9{10})   // IMPURE NUMERIC <- IMPURE NUMERIC x NUMERIC
#    x5 := phi(0{4}, x6a{10})	// NUMERIC <- ZERO x NUMERIC
#    x5a := x5 + i5		// NUMERIC <- IMPURE NUMERIC x IMPURE NUMERIC
#    if c5 is not numeric goto error3 // <- STRING
#    goto 6
#
# 6: c6 := impure numeric(c5)	// IMPURE NUMERIC <- STRING
#    i6 := i5 + c6		// NUMERIC <- IMPURE NUMERIC x IMPURE NUMERIC
#    goto 8
#
# 7: x7 := phi(0{4}, x5a{10})	// NUMERIC <- ZERO x NUMERIC
#    return x7			// <- NUMERIC
#
# 8: if i6 is not numeric goto error0 // <- IMPURE NUMERIC
#    goto 9
#
# 9: i9 = impure numeric(i6)         // NUMERIC <- NUMERIC
#    if b5 is not numeric goto error1 // <- IMPURE NUMERIC
#    goto 10
#
# 10: b10 := impure numeric(b5)	// IMPURE NUMERIC <- IMPURE NUMERIC
#     t10 := (i6a < b5)            // ZEROONE <- NUMERIC x IMPURE NUMERIC
#     if t10 is false goto 7
#     goto 5
#
#-----------------------------------------------------------------------------
#
# Another expensive type conversion moved out of the loop, and block 11
# begins with an instruction that will be eliminated in optimization.
# Onward to blocks 5 and 6!
#
#-----------------------------------------------------------------------------
#
# 1: a1 := args[0]		// STRING <- STRING
#    b1 := args[1]		// STRING <- STRING
#    c1 := args[2]              // STRING <- STRING
#
# 2: if a1 is not numeric goto error0 // <- STRING
#    goto 3
#
# 3: i3 := impure numeric(a1)	// IMPURE NUMERIC <- STRING
#    if b1 is not numeric goto error1 // <- STRING
#    goto 4
#
# 4: b4 := impure numeric(b1)	// IMPURE NUMERIC <- STRING
#    t4 := (i3 < b4)            // ZEROONE <- IMPURE NUMERIC x IMPURE NUMERIC
#    if t4 is false goto 7
#    goto 5
#
# 5: x5 := 0 + i3		// NUMERIC <- ZERO x IMPURE NUMERIC
#    if c1 is not numeric goto error3 // <- STRING
#    goto 6
#
# 6: c6 := impure numeric(c1)	// IMPURE NUMERIC <- STRING
#    i6 := i3 + c6		// NUMERIC <- IMPURE NUMERIC x IMPURE NUMERIC
#    goto 8
#
# 7: x7 := phi(0{4}, x5{10})	// NUMERIC <- ZERO x NUMERIC
#    return x7			// <- NUMERIC
#
# 8: b6 := phi(b4{6}, b10{12})
#                        // IMPURE NUMERIC <- IMPURE NUMERIC x IMPURE NUMERIC
#    c8 := phi(c6{6}, c12{12})	// IMPURE NUMERIC x IMPURE NUMERIC
#                        // IMPURE NUMERIC <- IMPURE NUMERIC x IMPURE NUMERIC
#    i8 := phi(i6{6}, i9{11})   //  NUMERIC <-  NUMERIC x NUMERIC
#    x8 := phi(x5{5}, x11{11})	// NUMERIC <- ZERO x NUMERIC
#    if i8 is not numeric goto error0 // <- NUMERIC
#    goto 9
#
# 9: i9 = impure numeric(i6a)   // NUMERIC <- NUMERIC
#    if b6 is not numeric goto error1 // <- IMPURE NUMERIC
#    goto 10
#
# 10: b10 := impure numeric(b6)	// IMPURE NUMERIC <- IMPURE NUMERIC
#     t10 := (i6a < b6)         // ZEROONE <- NUMERIC x IMPURE NUMERIC
#     if t10 is false goto 7
#     goto 11
#
# 11: x11 := x8 + i8		// NUMERIC <- NUMERIC x NUMERIC
#     if c6a is not numeric goto error3 // <- IMPURE NUMERIC
#     goto 12
#
# 12: c12 = impure numeric(c8)	// IMPURE NUMERIC <- IMPURE NUMERIC
#     i12 := i8 + c8		// NUMERIC <- IMPURE NUMERIC x IMPURE NUMERIC
#     goto 8
#
#-----------------------------------------------------------------------------
#
# There's nothing left to split, because now all the data types at phi's
# are consistent, but there are a lot of tautologies to eliminate,
# useless type conversions to discard, and similar rubbish. After running
# the optimizations from other modules, the code will look something like:
#
#-----------------------------------------------------------------------------
#
# 1: a1 := args[0]		// STRING <- STRING
#    b1 := args[1]		// STRING <- STRING
#    c1 := args[2]              // STRING <- STRING
#
# 2: if a1 is not numeric goto error0 // <- STRING
#    goto 3
#
# 3: i3 := impure numeric(a1)	// IMPURE NUMERIC <- STRING
#    if b1 is not numeric goto error1 // <- STRING
#    goto 4
#
# 4: b4 := impure numeric(b1)	// IMPURE NUMERIC <- STRING
#    t4 := (i3 < b4)            // ZEROONE <- IMPURE NUMERIC x IMPURE NUMERIC
#    if t4 is false goto 7
#    goto 5
#
# 5: if c1 is not numeric goto error3 // <- STRING
#    x5 = 0 + i3		// NUMERIC <- ZERO + IMPURE NUMERIC
#    goto 6
#
# 6: c6 := impure numeric(c1)	// IMPURE NUMERIC <- STRING
#    i6 := i3 + c6		// NUMERIC <- IMPURE NUMERIC x IMPURE NUMERIC
#    goto 8
#
# 7: x7 := phi(0{4}, x5{10})	// NUMERIC <- ZERO x NUMERIC
#    return x7			// <- NUMERIC
#
# 8: i8 := phi(i6{6}, i9{9})    // NUMERIC <- NUMERIC x NUMERIC
#    x8 := phi(a1{6}, x11{11})	// NUMERIC <- ZERO x NUMERIC
#    t8 := (i8 < b4)            // ZEROONE <- NUMERIC x IMPURE NUMERIC
#    if t8 is false goto 7
#    goto 9
#
# 9: x9 := x8 + i6a		// NUMERIC <- NUMERIC x NUMERIC
#    i9 := i8 + c6		// NUMERIC <- IMPURE NUMERIC x IMPURE NUMERIC
#    goto 8
#
#-----------------------------------------------------------------------------
#
# A tightly optimized inner loop has emerged, once constant branches and
# do-nothing type conversions have been eliminated. All the pesky string
# representations are gone from the intermediate results. A few odds and
# ends of the first trip through the loop have distributed themselves
# elsewhere in the code.
#
# Now, how is this basic block splitting
#
# Essentially, it's a copying operation. We remove the 'jump'
# instruction at the end of each predecessor of the block being split,
# and replace it with the body of the split block. (All the predecessor blocks
# are single-exit, because we've done critical edge splitting.)
#
# We replace the phi instructions in the block with copies from the
# source operands. We track all the variables defined in the split
# code, since they now have multiple assignments to them, violating
# SSA. We add uses to the du-chains of all the variables used in the
# split code.
#
# Once the splitting is done, the first thing that we have to do is to
# make sure that there are no critical edges. Critical edges will have
# been introduced any time that we split a block that has multiple
# exits. We remove them by introducing a new, empty, basic block on each
# of the exits from the new block.
#
# The original block is now unreachable code. All variable uses
# in it are discarded, the block itself is emptied, and 'bbpred' is
# cleansed of references to it, both from it to its predecessors and from
# its successors to it.
#
# These manipulations have destroyed the dominance hierarchy, and this
# must be repaired. Currently, it is rebuilt wholesale. It would most
# likely be possible to constrain the rebuilding, using the ideas in
#
# Vugranam C. Sreedhar, Guang R. Gao and Yong-fong Lee. "Incremental
# Computation of Dominator Trees." ACM Trans. Progrm. Languages and
# Systems, 1995, pp. 1-12.
# http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.46.3846
#
# It might also be possible to avoid use of the dominator tree during
# this operation - see Section 5.4 of "SSA-based Compiler Design"
# (http://ssabook.gforge.inria.fr/latest/book.pdf) for how this might
# be carried out.
#
# We now have a program where the control flows are again correct, and
# the dominance relations are understood. The program is,
# nevertheless, not in SSA form. The violation of SSA is well
# characterized. We have available to us a set of variables that are
# assigned at multiple places, and the places where they are
# assigned. We can use a 'black box' SSA reconstruction algorithm on
# those variables to repair SSA.
#
# Finally, type analysis has to be repeated. Once again, this is
# currently done wholesale. It might be possible to constrain it to
# the newly-split variables and their phi-webs.
# There are algorithms known for repair of the dominance relations as
# edges are cut and spliced. This implementation more or less follows
# Vugranam C. Sreedhar, Guang R Gao, and Yong-Fong Lee; "Incremental
# Computation of Dominator Trees." Proc. ACM SIGPLAN Workshop on
# Intermediate Representations. San Francisco, Calif.:ACM, 1995,
# pp. 1-12.
#
# You have been warned.
#
#-----------------------------------------------------------------------------

# quadcode::transformer method insertSplitMarkers --
#
#	Inserts markers into the code indicating the original basic
#	blocks prior to any node splitting.
#
# Results:
#	None.
#
# Side effects:
#	Every basic block has a 'split' instruction added at its head.
#	The 'split' has no result, and its sole source operand is a literal
#	giving the original basic block number.

oo::define quadcode::transformer method insertSplitMarkers {} {
    set b -1
    foreach bb $bbcontent {
	incr b
	lset bbcontent $b {}
	set pc -1
	foreach q $bb {
	    incr pc
	    if {[lindex $q 0 0] ni {"entry" "phi" "param"}} {
		break
	    }
	}
	lset bbcontent $b [linsert $bb[set bb ""] $pc \
			       [list split {} [list literal $b]]]
    }

    return
}

# quadcode::transformer method countSplits --
#
#	Walks through the quadcode, counting the number of split markers
#	present for each original location in the code.
#
# Parameters:
#	None.
#
# Results:
#	Returns a list, indexed by the original basic block number,
#	where the values are the number of copies of the original block that
#	are present. Zero counts are possible and expected if the original
#	code has been optimized away entirely.

oo::define quadcode::transformer method countSplits {} {
    set r {}
    set b -1
    foreach bb $bbcontent {
	incr b
	foreach q $bb {
	    if {[lindex $q 0 0] eq "split"} {
		set origb [lindex $q 2 1]
		while {$origb >= [llength $r]} {
		    lappend r 0
		}
		set n [lindex $r $origb]
		incr n
		lset r $origb $n
	    }
	}
    }

    return $r
}

# quadcode::transformer method removeSplitMarkers --
#
#	Removes markers that indicate the source of code when code splitting
#
# Results:
#	None
#
# Side effects:
#	Split markers are removed.

oo::define quadcode::transformer method removeSplitMarkers {} {
    set b -1
    foreach bb $bbcontent {
	incr b
	set newbb {}
	foreach q $bb {
	    if {[lindex $q 0] ne "split"} {
		lappend newbb $q
	    }
	}
	lset bbcontent $b $newbb
    }

    return
}

# quadcode::transformer method nodesplit --
#
#	Attempts to factor type checking out of loops by splitting a
#	single node of the flow graph.
#
# Results:
#	Return 1 if a node was split, 0 if no splittable node was found.
#
# Side effects:
#	Major surgery on the program, that requires cleanup optimization
#	and type inference (including possible repairs to interprocedural
#	type inference).

oo::define quadcode::transformer method nodesplit {} {

    #    global splitcount
    #    if {[incr splitcount] > 10} {puts "$splitcount splits"; return 0}

    my debug-nodesplit {
	puts "Before node splitting:"
	my dump-bb
    }

    # Determine how many copies of each original basic block are present
    set ns_counters [my countSplits]

    # Walk the basic blocks in reverse order and try to find the longest
    # possible dominator chain for jump threading.
    for {set b [expr {[llength $bbcontent] - 1}]} {$b >= 0} {incr b -1} {
	set splitb [my ns_checksForBB $b]
	if {$splitb ne {}} {
	    if {[my ns_splittable $splitb]} {
		# Split a single basic block
		my ns_cloneBB $splitb
		my debug-nodesplit {
		    puts "After splitting:"
		    my dump-bb
		}
		my debug-audit {
		    my audit-duchain "nodesplit"
		    my audit-phis "nodesplit"
		}
		return 1
	    }
	}
    }

    # Found nothing to split

    return 0
}

# quadcode::transformer method ns_checksForBB --
#
#	Characterizes a basic block according to what tests in it
#	might benefit from jump threading.
#
# Parameters:
#	b - Basic block to examine

oo::define quadcode::transformer method ns_checksForBB {b} {

    set checks {}
    foreach q [lindex $bbcontent $b] {
	my ns_checksForQuad checks $q
    }

    set splitLevel Inf
    set splitb {}
    dict for {v typedict} $checks {
	set vt [typeOfOperand $types $v]
	lassign [my findDef $v] db dpc dq
	if {[lindex $dq 0] eq "phi"} {
	    my debug-nodesplit {
		puts "$v comes from $db:$dpc: $dq"
		puts "Can $dq be split profitably?"
		set pptypes [lmap t [dict keys $typedict] {
		    list $t ([nameOfType $t])
		}]
		puts "typedict = $pptypes"
	    }
	    set split 0
	    dict for {t -} $typedict {
		foreach {from sourceVar} [lrange $dq 2 end] {
		    if {[quadcode::dataType::isa \
			     [typeOfOperand $types $sourceVar] $t]
			&& ![quadcode::dataType::isa $vt $t]} {
			my debug-nodesplit {
			    puts "Yes, because $sourceVar must be\
			          [nameOfType $t] but $v might not be"
			}
			set split 1
			break
		    } elseif {([typeOfOperand $types $sourceVar] & $t) == 0} {
			my debug-nodesplit {
			    puts "Yes, because $sourceVar cannot be\
			          [nameOfType $t] but $v might be"
			}
			set split 1
			break
		    }
		}
		if {$split} break
	    }
	    if {$split} {
		set lev [lindex $bblevel $db]
		my debug-nodesplit {
		    puts "Found a split, block $db at level $lev"
		}
		if {$lev < $splitLevel} {
		    set splitb $db
		    set splitLevel $lev
		}
	    } else {
		my debug-nodesplit {
		    puts "No benefit to splitting $db:$dpc: $dq"
		}
	    }
	}
    }
    if {$splitLevel != Inf} {
	my debug-nodesplit {
	    puts "Outermost phi to split is in block $splitb"
	}
    }
    return $splitb
}


# quadcode::transformer method ns_checksForQuad --
#
#	Characterizes a quadcode instruction according to whether
#	it could benefit from jump threading, and if so, what typechecks
#	are needed to thread it.
#
# Parameters:
#	q - Quadcode instruction

oo::define quadcode::transformer method ns_checksForQuad {checksVar q} {

    upvar 1 $checksVar checks

    namespace upvar ::quadcode::dataType \
	FAIL FAIL	IMPURE IMPURE	NEXIST NEXIST \
	CONST0 CONST0

    switch -exact [lindex $q 0 0] {

	purify {

#       add -       bitand -    bitnot -    bitor -     bitxor -
#       div -       eq -        expon -     ge -        gt -
#       land -      le -        lor -       lshift -    lt -
#       mod -       mult -      neq -       rshift -    sub -
#       uminus -    uplus -
#       dictIncr


	    foreach opd [lrange $q 2 end] {
		if {[lindex $opd 0] in {"var" "temp"}} {
		    dict set checks $opd $IMPURE {}
		}
	    }
	}

	exists -
	initIfNotExists {
	    set opd [lindex $q 2]
	    if {[lindex $opd 0] in {"var" "temp"}} {
		dict set checks $opd $NEXIST {}
	    }
	}

	jumpTrue -  jumpFalse {
	    set opd [lindex $q 2]
	    if {[lindex $opd 0] in {"var" "temp"}} {
 		dict set checks $opd $CONST0 {}
	    }
	}

	jumpMaybe - result - returnCode - returnOptions {
	    set opd [lindex $q 2]
	    if {[lindex $opd 0] in {"var" "temp"}} {
		dict set checks $opd $FAIL {}
	    }
	}

	narrowToType -
	instanceOf {
	    set type [lindex $q 0 1]
	    set opd [lindex $q 2]
	    if {[lindex $opd 0] in {"var" "temp"}} {
		dict set checks $opd $type {}
	    }
	}
    }
}

# quadcode::transformer method ns_splittable --
#
#	Determines whether a basic block is a candidate for splitting
#
# Parameters:
#	b - Basic block number of the block being examined
#
# Results:
#	Returns 1 if the block should be split, 0 otherwise.
#
# A basic block is splittable unless some quad in would have more than
# $bbSplitLimit copies after the split

oo::define quadcode::transformer method ns_splittable {b} {

    set bbSplitLimit 8

    set bb [lindex $bbcontent $b]

    # We will add one more instance of the block for every
    # predecessor, but delete one for the current instance
    set delta [dict size [lindex $bbpred $b]]
    incr delta -1

    set pc -1
    # Make sure that the block hasn't been split too often already
    while {[incr pc] < [llength $bb]} {
	set q [lindex $bb $pc]
	if {[lindex $q 0] eq "split"} {
	    set splitFrom [lindex $q 2 1]
	    set count [lindex $ns_counters $splitFrom]
	    if {$count + $delta > $bbSplitLimit} {
		my debug-nodesplit {
		    puts "$b:$pc: $splitFrom has been split too many\
                    	               times already."
		}
		return 0
	    }
	}
    }

    my debug-nodesplit {
	puts "split block $b: [llength $bb] quads and\
              [expr {$delta+1}] predecessors"
    }
    return 1
}

# quadcode::transformer method ns_cloneBB --
#
#	Clones a basic block that has been identified as a candidate
#	for splitting.
#
# Parameters:
#	b - Basic block number of the block being cloned
#
# Results:
#	None.
#
# Side effects:
#	Copies the block onto the end of its successors, and adds
#	edges, blocks for critical edge splitting, and phi nodes as
#	necessary to have the code in two places instead of one.
#
# The block must be multiple entry, therefore all its predecessors
# are single exit. That's the key concept that makes this splitting
# possible - that we can hoist the code up into the predecessors.

oo::define quadcode::transformer method ns_cloneBB {b} {
    my debug-nodesplit {
	puts "Cloning and eliminating basic block $b:"
	set bb [lindex $bbcontent $b]
	set pc -1
	foreach q $bb {
	    puts "  $b:[incr pc]: $q"
	}
	puts "------------------------------"
    }

    # dups is a three-level dictionary. The first key is the name of
    # a variable whose assignment has been duplicated. The second-level
    # key is the number of the basic block where it now appears, and
    # the content is the number of assignments to the variable that appear
    # in the block.

    set dups {}

    # Copy the block's instructions into its predecessors. Split critical
    # edges if the block has more than one exit.

    set ps [lindex $bbpred $b]
    dict for {pred -} $ps {
	my ns_rewriteBlockOntoPred dups $b $pred
    }

    # If the block had multiple successors, its successors are all
    # single entry and free of phi nodes. If it had a single successor,
    # then any phi's in the successor need to have it removed and the
    # clones inserted.

    set ss [my bbsucc $b]
    if {[llength $ss] eq 1} {
	my ns_fixupPhis [lindex $ss 0] $b $ps
    }

    # Unlink du-chains from the split block, and remove its
    # predecessor and successor links. Fixup bbidom, bbkids, and bblevel
    # to reflect the new state of the DJ graph.

    my ns_destroyBB $b
    my bbidom
    my bblevel

    # Repair SSA

    my debug-nodesplit {
	puts "Repair SSA after node splitting"
	dict for {v defs} $dups {
	    puts "   $v: [dict keys $defs] -> [dict keys [dict get $duchain $v]]"
	}
	my dump-bb
    }

    dict for {v defs} $dups {
	my repairSSAVariable $v $defs
    }

}

# quadcode::transformer method ns_rewriteBlockOnPred --
#
#	Copies the instructions of the body of a basic block
#	(which must be two-entry) onto the end of a predecessor,
#	renaming all assigned variables.
#
# Parameters:
#	dupsv - Name of a variable in the callers scope that holds a two-level
#               dictionary tracking the duplicated assignment statements. The
#	        first-level keys are basic block numbers; the second-level
#	        keys are basic blocks where the variables are assigned, and
#	        the values are counts of assignments within the blocks.
#	b - Number of the basic block to copy
#	pred - Number of the predecessor block
#
# Results:
#	None.
#
# Side effects:
#	Copies the code, and updates du-chains of variables appearing in
#	instructions being copied. Updates 'dupsv' with the locations of
#	variable definitions in the copies. Splits critical edges if
#	they are being introduced.

oo::define quadcode::transformer method ns_rewriteBlockOntoPred {dupsv b pred} {

    upvar 1 $dupsv dups

    # Look up the code we're copying and the code that we're appending to.

    set bb [lindex $bbcontent $b]
    set pb [lindex $bbcontent $pred]
    lset bbcontent $pred {}
    set pb [lreplace $pb[set pb {}] end end]

    my debug-nodesplit {
    	puts "Want to copy basic block $b into predecessor $pred"
    	set pc -1
    	foreach q $bb {
    	    puts "    $b:[incr pc]: $q"
    	}
    	puts " ------- "
    	set pc -1
    	foreach q $pb {
    	    puts "    $pred:[incr pc]: $q"
    	}
    	puts " +++++++ "
    }


    # Iterate through the instructions being copied
    set key [list bb $pred]
    set pc -1
    set critical 0
    foreach q $bb {
	incr pc

	set res [lindex $q 1]

	# If we're copying a phi, replace it with a copy from the appropriate
	# source variable.
	set op [lindex $q 0 0]
	switch -exact -- $op {
	    phi {
		set repvar [dict get $q $key]
		if {$repvar eq "Nothing"} {
		    set q [list unset $res]
		} else {
		    set q [list copy $res $repvar]
		}
	    }
	}

	# Examine the result of the operation. If the result is a value,
	# add the copy to the dictionary of multiple assignments that will
	# need SSA repair. If the operation is a conditional jump, peform
	# critical edge splitting. On any jump, update the 'bbpred' link.
	switch -exact -- [lindex $res 0] {
	    "temp" - "var" {
		if {[dict exists $dups $res]} {
		    set d [dict get $dups $res]
		    dict set dups $res {}
		} else {
		    set d {}
		}
		dict incr d $pred
		dict set dups $res $d
	    }
	    "bb" {
		if {[lindex $q 0] ne "jump"} {
		    set critical 1
		}
		if {$critical} {
		    set tgt [my makeEmptyBB [lindex $res 1]]
		    my debug-nodesplit {
			puts "       (* $tgt:0: [list jump $res] *)"
		    }
		    set res [list bb $tgt]
		    lset q 1 $res
		}
		my bblink $pred [lindex $res 1]
	    }
	}

	# Add du-chaining for the variable references in the copied code
	foreach opd [lrange $q 2 end] {
	    if {[lindex $opd 0] in {"temp" "var"}} {
		my addUse $opd $pred
	    }
	}

	# Copy the quad to its new home
	my debug-nodesplit {
	    puts "    $pred:[llength $pb]: $q"
	}
	lappend pb $q
    }

    lset bbcontent $pred $pb
    return
}

# ns_fixupPhis --
#
#	Repairs phi operations in a successor block after splitting a
#	basic block.
#
# Parameters:
#	b - Successor block
#	orig - Original block that was split
#	clones - Blocks into which the code was cloned.
#
# Results:
#	None.
#
# Side effects:
#	Phi operations are rewritten

oo::define quadcode::transformer method ns_fixupPhis {b orig clones} {
    my debug-nodesplit {
	puts "  redirect phi operations in $b that refer to $orig to use\
	        [dict keys $clones] instead"
    }

    set bb [lindex $bbcontent $b]
    lset bbcontent $b {}
    set key [list bb $orig]
    set repls [lmap {c -} $clones {list bb $c}]
    set pc -1
    foreach q $bb {
	if {[lindex $q 0] ne "phi"} {
	    break
	}
	incr pc
	set newq {}
	dict for {from fromvar} $q {
	    if {$from ne $key} {
		lappend newq $from $fromvar
	    } else {
		foreach newfrom $repls {
		    lappend newq $newfrom $fromvar
		}
	    }
	}
	my debug-nodesplit {
	    puts "    $b:$pc: redirected: $newq"
	}
	lset bb $pc $newq
    }
    lset bbcontent $b $bb

    return
}

# ns_destroyBB --
#
#	Destroys a basic block after node splitting has replaced it with
#	one or more new instances.
#
# Parameters:
#	b - Basic block number to be destroyed
#
# Results:
#	None.
#
# Side effects:
#	All values defined in the block have their du-chains unlinked.
#	The block is erased from 'bbcontent' and 'bbpred'.
#
# The next time that 'deadcode' runs, the last vestiges of the block,
# which is now unreachable, will be eliminated entirely.

oo::define quadcode::transformer method ns_destroyBB {b} {
    # Unlink du-chains
    set bb [lindex $bbcontent $b]
    set key [list bb $b]
    foreach q $bb {
	foreach opd [lrange $q 2 end] {
	    if {[lindex $opd 0] in {"var" "temp"}} {
		my removeUse $opd $b
	    }
	}
    }

    # Unlink predecessor relationships
    foreach s [my bbsucc $b] {
	set ps [lindex $bbpred $s]
	lset bbpred $s {}
	dict unset ps $b
	lset bbpred $s $ps
    }

    # Remove the block from 'bbpred' and 'bbcontent'
    set preds [lindex $bbpred $b]
    lset bbpred $b {}
    lset bbcontent $b {}

}
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<




































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































Deleted quadcode/renameTemps.tcl.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# renameTemps.tcl --
#
#	Renames temporaries in a quadcode program so as to avoid interferences.
#
# Copyright (c) 2017 by Kevin B. Kenny
#
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
#
#------------------------------------------------------------------------------

package require struct::disjointset

# quadcode::transformer method renameTemps --
#
#	Renames temporaries in a quadcode sequence so as to avoid interferences.
#
# Results:
#	None.
#
# Side effects:
#	Temporaries are renamed by their equivalence classes.
#
# Temporaries that meet at phi operations represent the same temporary and
# must be named alike. If temporaries do not meet at phi operations, they
# do not need to have the same names.

oo::define quadcode::transformer method renameTemps {} {
    my debug-renameTemps {
	puts "Before renaming temporaries:"
	my dump-bb
    }


    # Put all the temporaries in a disjoint sets structure
    struct::disjointset [namespace current]::tempEquiv
    foreach bb $bbcontent {
	foreach q $bb {
	    set res [lindex $q 1]
	    if {[lindex $res 0] eq "temp"} {
		tempEquiv add-partition [list $res]
	    }
	}
    }

    # Unify all temporaries that flow through phis.
    foreach bb $bbcontent {
	foreach q $bb {
	    if {[lindex $q 0] eq "phi" && [lindex $q 1 0] eq "temp"} {
		set res [lindex $q 1]
		if {[lindex $res 0] eq "temp"} {
		    foreach {- src} [lrange $q 2 end] {
			if {[lindex $src 0] eq "temp"} {
			    tempEquiv merge $res $src
			}
		    }
		}
	    }
	}
    }

    # Come up with new labels for the temporaries
    set n 0
    set renamed {}
    foreach p [tempEquiv partitions] {
	set m -1
	foreach v $p {
	    dict set renamed $v [my newVarInstance [list temp $n]]
	    my debug-renameTemps {
		puts "Rename: $v [dict get $renamed $v]"
	    }
	}
	incr n
    }
    tempEquiv destroy

    # Relabel the quads

    set b -1
    foreach bb $bbcontent {
	incr b
	set pc -1
	foreach q $bb {
	    incr pc
	    set i 0
	    foreach arg [lrange $q 1 end] {
		incr i
		if {[dict exists $renamed $arg]} {
		    lset bbcontent $b $pc $i [dict get $renamed $arg]
		}
	    }
	}
    }

    my debug-renameTemps {
	puts "After renaming temporaries:"
	my dump-bb
    }

}
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<








































































































































































































Changes to quadcode/transformer.tcl.
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
	foreach pass {
	    bbpartition
	    constJumpPeephole
	    sortbb
	    loopinv
	    callFrameMotion
	    ssa
	    renameTemps
	    ud_du_chain
	    copyprop
	    fqcmd
	    varargs
	    deadbb
	    bbidom
	    bblevel







<







316
317
318
319
320
321
322

323
324
325
326
327
328
329
	foreach pass {
	    bbpartition
	    constJumpPeephole
	    sortbb
	    loopinv
	    callFrameMotion
	    ssa

	    ud_du_chain
	    copyprop
	    fqcmd
	    varargs
	    deadbb
	    bbidom
	    bblevel