001    /*
002     *  This file is part of the Jikes RVM project (http://jikesrvm.org).
003     *
004     *  This file is licensed to You under the Eclipse Public License (EPL);
005     *  You may not use this file except in compliance with the License. You
006     *  may obtain a copy of the License at
007     *
008     *      http://www.opensource.org/licenses/eclipse-1.0.php
009     *
010     *  See the COPYRIGHT.txt file distributed with this work for information
011     *  regarding copyright ownership.
012     */
013    package org.jikesrvm.compilers.opt.ssa;
014    
015    import static org.jikesrvm.compilers.opt.driver.OptConstants.SYNTH_LOOP_VERSIONING_BCI;
016    import static org.jikesrvm.compilers.opt.ir.Operators.ARRAYLENGTH;
017    import static org.jikesrvm.compilers.opt.ir.Operators.ARRAYLENGTH_opcode;
018    import static org.jikesrvm.compilers.opt.ir.Operators.GOTO;
019    import static org.jikesrvm.compilers.opt.ir.Operators.GUARD_MOVE;
020    import static org.jikesrvm.compilers.opt.ir.Operators.INT_ADD;
021    import static org.jikesrvm.compilers.opt.ir.Operators.INT_ADD_opcode;
022    import static org.jikesrvm.compilers.opt.ir.Operators.INT_IFCMP;
023    import static org.jikesrvm.compilers.opt.ir.Operators.INT_SUB;
024    import static org.jikesrvm.compilers.opt.ir.Operators.INT_SUB_opcode;
025    import static org.jikesrvm.compilers.opt.ir.Operators.PHI;
026    import static org.jikesrvm.compilers.opt.ir.Operators.REF_IFCMP;
027    
028    import java.lang.reflect.Constructor;
029    import java.util.ArrayList;
030    import java.util.Enumeration;
031    import java.util.HashMap;
032    import java.util.HashSet;
033    import java.util.Iterator;
034    
035    import org.jikesrvm.VM;
036    import org.jikesrvm.classloader.TypeReference;
037    import org.jikesrvm.compilers.opt.DefUse;
038    import org.jikesrvm.compilers.opt.OptOptions;
039    import org.jikesrvm.compilers.opt.OptimizingCompilerException;
040    import org.jikesrvm.compilers.opt.controlflow.AnnotatedLSTGraph;
041    import org.jikesrvm.compilers.opt.controlflow.AnnotatedLSTNode;
042    import org.jikesrvm.compilers.opt.controlflow.DominatorTree;
043    import org.jikesrvm.compilers.opt.controlflow.DominatorsPhase;
044    import org.jikesrvm.compilers.opt.controlflow.LSTGraph;
045    import org.jikesrvm.compilers.opt.controlflow.LTDominators;
046    import org.jikesrvm.compilers.opt.driver.CompilerPhase;
047    import org.jikesrvm.compilers.opt.ir.BBend;
048    import org.jikesrvm.compilers.opt.ir.BasicBlock;
049    import org.jikesrvm.compilers.opt.ir.Binary;
050    import org.jikesrvm.compilers.opt.ir.BoundsCheck;
051    import org.jikesrvm.compilers.opt.ir.Goto;
052    import org.jikesrvm.compilers.opt.ir.GuardedUnary;
053    import org.jikesrvm.compilers.opt.ir.IR;
054    import org.jikesrvm.compilers.opt.ir.IREnumeration;
055    import org.jikesrvm.compilers.opt.ir.IfCmp;
056    import org.jikesrvm.compilers.opt.ir.Instruction;
057    import org.jikesrvm.compilers.opt.ir.Label;
058    import org.jikesrvm.compilers.opt.ir.Move;
059    import org.jikesrvm.compilers.opt.ir.NullCheck;
060    import org.jikesrvm.compilers.opt.ir.Phi;
061    import org.jikesrvm.compilers.opt.ir.Register;
062    import org.jikesrvm.compilers.opt.ir.operand.BasicBlockOperand;
063    import org.jikesrvm.compilers.opt.ir.operand.BranchProfileOperand;
064    import org.jikesrvm.compilers.opt.ir.operand.ConditionOperand;
065    import org.jikesrvm.compilers.opt.ir.operand.HeapOperand;
066    import org.jikesrvm.compilers.opt.ir.operand.IntConstantOperand;
067    import org.jikesrvm.compilers.opt.ir.operand.NullConstantOperand;
068    import org.jikesrvm.compilers.opt.ir.operand.Operand;
069    import org.jikesrvm.compilers.opt.ir.operand.RegisterOperand;
070    import org.jikesrvm.compilers.opt.util.GraphNode;
071    
072    /**
073     * This optimisation works from the outer most loop inward, optimising
074     * loops that conform to being regular {@link
075     * AnnotatedLSTNode}s. The optimisations performs the following
076     * operations:
077     * <ul>
078     * <li>
079     * 1) Determine the bound and null checks to be eliminated. These are
080     * the ones that operate on the loop iterator. If no bound checks can
081     * be eliminated, stop optimising this loop.
082     * <li>
083     * 2) Determine the registers defined in the loop.
084     * <li>
085     * 3) Generate phi nodes that define the original register defined by
086     * the loop and use two newly created registers.
087     * <li>
088     * 4) Create a version of the original loop that uses the first of the
089     * newly created registers instead of the original registers.
090     * <li>
091     * 5) Create a second version, this time with the result of the
092     * eliminated checks set to true.
093     * <li>
094     * 6) Work out what the maximum value for all the bounds checks are
095     * and create branches to optimal or suboptimal loops
096     * <li>
097     * 7) Fix up the phi node predecessors
098     * <li>
099     * 8) Remove the unoptimized loop if its redundant
100     * <li>
101     * 9) Replace register definitions in the original loop with phi
102     * instructions
103     * <li>
104     * 10) Compact node numbering so that CFG number of nodes reflects
105     * that some basic blocks may have been deleted
106     * </ul>
107     * <p>
108     * Example:
109     * <listing>
110     *   for (int t1=0; t1 &lt; 100; t1++) {
111     *      g1 = null_check   l0
112     *      g2 = bounds_check l0, t1
113     *      g3 = guard_combine g1,g2
114     *      t2 = aload l0, t1, g3
115     *      g4 = null_check   l1
116     *      g5 = bounds_check l1, t1
117     *      g6 = guard_combine g4,g5
118     *           astore t2, l1, t1, g6
119     *   }
120     * </listing>
121     *
122     * becomes:
123     *
124     * <listing>
125     *   goto explicit_test_block
126     * successor_to_loops:
127     *   g1 = phi g1_1, g1_2
128     *   g2 = phi g2_1, g2_2
129     *   g3 = phi g3_1, g3_2
130     *   t2 = phi t2_1, t2_2
131     *   g4 = phi g4_1, g4_2
132     *   g5 = phi g5_1, g5_2
133     *   g6 = phi g6_1, g6_2
134     *   goto after_loop
135     * explicit_test_block:
136     *   if l0 == null (unlikely) goto sub_optimal_loop
137     *   if 100 >= l0.length (unlikely) goto sub_optimal_loop
138     *   if l1 == null (unlikely) goto sub_optimal_loop
139     *   if 100 >= l1.length (unlikely) goto sub_optimal_loop
140     *   goto optimal_loop
141     * sub_optimal_loop:
142     *   for (int t1_1=0; t1_1 &lt; 100; t1_1++) {
143     *      g1_1 = null_check   l0
144     *      g2_1 = bounds_check l0, t1_1
145     *      g3_1 = guard_combine g1_1,g2_1
146     *      t2_1 = aload l0, t1_1, g3_1
147     *      g4_1 = null_check   l1
148     *      g5_1 = bounds_check l1, t1_1
149     *      g6_1 = guard_combine g4_1,g5_1
150     *             astore t2_1, l1, t1_1, g6_1
151     *   }
152     *   goto successor_to_loops
153     * optimal_loop:
154     *   for (int t1_2=0; t1_2 &lt; 100; t1_2++) {
155     *      g1_2 = true_guard
156     *      g2_2 = true_guard
157     *      g3_2 = guard_combine g1_2,g2_2
158     *      t2_2 = aload l0, t1_2, g3_2
159     *      g4_2 = null_check   l1
160     *      g5_2 = bounds_check l1, t1_2
161     *      g6_2 = guard_combine g4_2,g5_2
162     *             astore t2_2, l1, t1_2, g6_2
163     *   }
164     *   goto successor_to_loops
165     * after_loop:
166     * </listing>
167     *
168     * The optimisation works on the Heap SSA form. A more accurate
169     * example of the transformation would be:
170     *
171     * <listing>
172     *   heap1 = ...; // previous heap state
173     *   t1_1 = 0;
174     *   if t1_1 &ge; 100 goto label2
175     *   label1:
176     *      t1_2 = phi t1_1, t1_3
177     *      heap2 = phi heap1, heap3
178     *      g1 = null_check   l0
179     *      g2 = bounds_check l0, t1_2
180     *      g3 = guard_combine g1,g2
181     *      t2 = aload l0, t1_2, g3
182     *      g4 = null_check   l1
183     *      g5 = bounds_check l1, t1_2
184     *      g6 = guard_combine g4,g5
185     *      heap3 = astore t2, l1, t1_2, g6
186     *      t1_3 = t1_2 + 1
187     *      if t1_3 &lt; 100 label1 *   label2:
188     * </listing>
189     *
190     * becomes:
191     *
192     * <listing>
193     *   heap1 = ...; // previous heap state
194     *   t1_1 = 0;
195     *   if t1_1 &ge; 100 goto label2
196     *   goto explicit_test_block
197     * successor_to_loops:
198     *   t1_2 = phi t1_2_1, t1_2_2
199     *   heap2 = phi heap2_1, heap2_2
200     *   g1 = phi g1_1, g1_2
201     *   g2 = phi g2_1, g2_2
202     *   g3 = phi g3_1, g3_2
203     *   t2 = phi t2_1, t2_2
204     *   g4 = phi g4_1, g4_2
205     *   g5 = phi g5_1, g5_2
206     *   g6 = phi g6_1, g6_2
207     *   heap3 = phi heap3_1, heap3_2
208     *   t1_3 = phi t1_3_1, t1_3_2
209     *   goto after_loop
210     * explicit_test_block:
211     *   g1_2 = if l0 == null (unlikely) goto sub_optimal_loop
212     *   g2_2 = if 100 >= l0.length (unlikely) goto sub_optimal_loop
213     *   g4_2 = if l1 == null (unlikely) goto sub_optimal_loop
214     *   g5_2 = if 100 >= l1.length (unlikely) goto sub_optimal_loop
215     *   goto optimal_loop
216     * sub_optimal_loop:
217     *   label1_1:
218     *      t1_2_1 = phi t1_1, t1_3_1
219     *      heap2_1 = phi heap1, heap3_1
220     *      g1_1 = null_check   l0
221     *      g2_1 = bounds_check l0, t1_2_1
222     *      g3_1 = guard_combine g1_1,g2_1
223     *      t2_1 = aload l0, t1_2_1, g3_1
224     *      g4_1 = null_check   l1
225     *      g5_1 = bounds_check l1, t1_2_1
226     *      g6_1 = guard_combine g4_1,g5_1
227     *      heap3_1 = astore t2_1, l1, t1_2_1, g6_1
228     *      t1_3_1 = t1_2_1 + 1
229     *      if t1_3_1 &lt; 100 label1_1
230     *   goto successor_to_loops
231     * optimal_loop:
232     *   label1_2:
233     *      t1_2_2 = phi t1_1, t1_3_2
234     *      heap2_2 = phi heap1, heap3_2
235     *      g3_2 = guard_combine g1_2,g2_2
236     *      t2_2 = aload l0, t1_2_2, g3_2
237     *      g6_2 = guard_combine g4_2,g5_2
238     *      heap3_2 = astore t2_2, l1, t1_2_2, g6_2
239     *      t1_3_2 = t1_2_2 + 1
240     *      if t1_3_2 &lt; 100 label1_2
241     *   goto successor_to_loops
242     * after_loop:
243     * label2:
244     * </listing>
245     */
246    public final class LoopVersioning extends CompilerPhase {
247      // -oO Debug variables Oo-
248      /**
249       * Flag to optionally print verbose debugging messages
250       */
251      private static final boolean DEBUG = false;
252      /**
253       * Flag to verify computed IR
254       */
255      private static final boolean VERIFY = false;
256    
257      // -oO Debug routines Oo-
258      /**
259       * Human readable report of what goes on
260       *
261       * @param s String to print
262       **/
263      private static void report(String s) {
264        if (DEBUG) {
265          VM.sysWriteln(s);
266        }
267      }
268    
269      /**
270       * Return a string name for this phase.
271       * @return "Loop Versioning"
272       */
273      @Override
274      public String getName() {
275        return "Loop Versioning";
276      }
277    
278      // -oO Variables used throughout the optimisation phase Oo-
279      /**
280       * The phi instruction operand holding the optimized loop variable
281       */
282      private static final int OPTIMIZED_LOOP_OPERAND = 0;
283      /**
284       * The phi instruction operand holding the unoptimized loop variable
285       */
286      private static final int UNOPTIMIZED_LOOP_OPERAND = 1;
287    
288      /**
289       * IR for optimisation
290       */
291      private IR ir;
292    
293      /**
294       * Set used to store the loop related register
295       */
296      private HashSet<Register> loopRegisterSet;
297    
298      /**
299       * SSA options
300       */
301      private SSAOptions desiredSSAOptions;
302      /**
303       * Compiler phases called from this one
304       */
305      private CompilerPhase enterSSA, leaveSSA, domPhase;
306      /**
307       * Run inside SSA sub-phase
308       */
309      private static final boolean inSSAphase = true;
310    
311      // -oO Interface to the rest of the compiler Oo-
312    
313      /**
314       * Constructor for this compiler phase
315       */
316      private static final Constructor<CompilerPhase> constructor =
317          getCompilerPhaseConstructor(LoopVersioning.class);
318    
319      /**
320       * Get a constructor object for this compiler phase
321       * @return compiler phase constructor
322       */
323      @Override
324      public Constructor<CompilerPhase> getClassConstructor() {
325        return constructor;
326      }
327    
328      /**
329       * Constructor
330       */
331      public LoopVersioning() {
332        desiredSSAOptions = new SSAOptions();
333        desiredSSAOptions.setScalarsOnly(true);
334        if (!inSSAphase) {
335          enterSSA = new EnterSSA();
336          leaveSSA = new LeaveSSA();
337        }
338        domPhase = new DominatorsPhase(false);
339      }
340    
341      /**
342       * Should loop versioning be performed?
343       */
344      @Override
345      public boolean shouldPerform(OptOptions options) {
346        return options.SSA_LOOP_VERSIONING;
347      }
348    
349      /**
350       * @param _ir the IR to process
351       */
352      @Override
353      public void perform(IR _ir) {
354        ir = _ir;
355    
356        // Create SSA
357        ir.desiredSSAOptions = desiredSSAOptions;
358        if (!inSSAphase) {
359          enterSSA.perform(ir);
360        }
361    
362        if (DEBUG) {
363          SSA.printInstructions(ir);
364        }
365    
366        // Perform loop annotation
367        if (!ir.hasReachableExceptionHandlers()) {
368          // Build LST tree and dominator info
369          domPhase.perform(ir);
370          DefUse.computeDU(ir);
371          // Build annotated version
372          ir.HIRInfo.loopStructureTree = new AnnotatedLSTGraph(ir, ir.HIRInfo.loopStructureTree);
373        }
374        if (VERIFY) {
375          ir.verify(getName(), true);
376        }
377    
378        // Check loop annotation has been performed
379        if (!(ir.HIRInfo.loopStructureTree instanceof AnnotatedLSTGraph)) {
380          report("Optimisation of " + ir.getMethod() + " failed as LST wasn't annotated\n");
381        } else {
382          loopRegisterSet = new HashSet<Register>();
383    
384          if (DEBUG) {
385            VM.sysWriteln(ir.getMethod().toString());
386            VM.sysWriteln(ir.HIRInfo.loopStructureTree.toString());
387            SSA.printInstructions(ir);
388          }
389    
390          while (findLoopToOptimise((AnnotatedLSTNode) ir.HIRInfo.loopStructureTree.getRoot())) {
391            if (DEBUG) {
392              VM.sysWriteln("Successful optimisation of " + ir.getMethod());
393              SSA.printInstructions(ir);
394              VM.sysWriteln(ir.cfg.toString());
395            }
396            // Get IR into shape for next pass
397            DefUse.computeDU(ir);
398            LTDominators.perform(ir, true, true);
399            ir.HIRInfo.dominatorTree = new DominatorTree(ir, true);
400            LSTGraph.perform(ir);
401            AnnotatedLSTGraph.perform(ir);
402    
403            if (VERIFY) {
404              ir.verify(getName(), true);
405            }
406    
407            if (DEBUG) {
408              VM.sysWriteln("after an optimization pass");
409              VM.sysWriteln(ir.HIRInfo.loopStructureTree.toString());
410              SSA.printInstructions(ir);
411              VM.sysWriteln("Finish optimize: " + ir.getMethod().toString());
412            }
413          }
414          // No longer in use
415          loopRegisterSet = null;
416        }
417    
418        if (!inSSAphase) {
419          // Leave SSA
420          leaveSSA.perform(ir);
421        }
422    
423        if (VERIFY) {
424          ir.verify(getName(), true);
425        }
426      }
427    
428      // -oO Optimisation routines Oo-
429    
430      /**
431       * Find an outermost loop to optimise and optimise it. Focus on
432       * annotated regular loops, LICM should handle possible
433       * optimisation for the non-regular loops
434       *
435       * @param loop  Loop to search
436       * @return was optimisation performed
437       */
438      private boolean findLoopToOptimise(AnnotatedLSTNode loop) {
439        // Has this loop already been optimised?
440        Operand carriedLoopIterator = loop.getCarriedLoopIterator();
441        if ((carriedLoopIterator instanceof RegisterOperand) &&
442            (isOptimizedLoop(carriedLoopIterator.asRegister().getRegister()))) {
443          return false;
444        }
445    
446        // Process inner loops first
447        Enumeration<GraphNode> innerLoops = loop.outNodes();
448        // Iterate over loops
449        while (innerLoops.hasMoreElements()) {
450          AnnotatedLSTNode nestedLoop = (AnnotatedLSTNode) innerLoops.nextElement();
451          // Try to optimise inner loops first
452          if (findLoopToOptimise(nestedLoop)) {
453            // Exit early if inner loop optimisation succeeded
454            return true;
455          }
456        }
457        // Don't try to optimise irregular loops
458        if (loop.isNonRegularLoop()) {
459          return false;
460        }
461        if (DEBUG) {
462          report("LoopFissionOfArrayGuards: found loop in " + ir.getMethod());
463          VM.sysWriteln("dominator tree:");
464          VM.sysWriteln(ir.HIRInfo.dominatorTree.toString());
465        }
466    
467        // 1) Determine the bound and null checks to be eliminated. The
468        // bound checks are the ones that operate on the loop iterator. If
469        // no checks can be eliminated, stop optimising this loop.
470        ArrayList<Instruction> checksToEliminate = new ArrayList<Instruction>();
471        getListOfChecksToEliminate(loop, checksToEliminate);
472        if (checksToEliminate.isEmpty()) {
473          return false;
474        } else {
475          // We found instructions to eliminate
476          if (DEBUG) {
477            VM.sysWriteln("Loop being optimised:");
478            VM.sysWriteln(loop.toString());
479            VM.sysWriteln("Checks to eliminate:");
480            for (Instruction instruction : checksToEliminate) {
481              VM.sysWriteln(instruction.toString());
482            }
483          }
484          // 2) Determine the registers defined in the loop.
485          ArrayList<Register> registersDefinedInOriginalLoop = new ArrayList<Register>();
486          ArrayList<TypeReference> typesOfRegistersDefinedInOriginalLoop = new ArrayList<TypeReference>();
487          ArrayList<Instruction> definingInstructionsInOriginalLoop = new ArrayList<Instruction>();
488          getRegistersDefinedInLoop(loop,
489                                    registersDefinedInOriginalLoop,
490                                    typesOfRegistersDefinedInOriginalLoop,
491                                    definingInstructionsInOriginalLoop);
492          if (DEBUG) {
493            VM.sysWrite("Registers in original loop:\n{");
494            for (int i = 0; i < registersDefinedInOriginalLoop.size(); i++) {
495              VM.sysWrite(registersDefinedInOriginalLoop.get(i).toString());
496              if (definingInstructionsInOriginalLoop.get(i) != null) {
497                VM.sysWrite("(escapes),");
498              } else {
499                VM.sysWrite(",");
500              }
501            }
502            VM.sysWriteln("}");
503          }
504          // 3) Generate phi nodes that define the original register
505          // defined by the loop and use two newly created registers.
506          ArrayList<Instruction> phiInstructions = new ArrayList<Instruction>();
507          HashMap<Register, Register> subOptimalRegMap = new HashMap<Register, Register>();
508          HashMap<Register, Register> optimalRegMap = new HashMap<Register, Register>();
509          generatePhiNodes(loop,
510                           registersDefinedInOriginalLoop,
511                           typesOfRegistersDefinedInOriginalLoop,
512                           phiInstructions,
513                           subOptimalRegMap,
514                           optimalRegMap);
515          if (DEBUG) {
516            VM.sysWriteln("subOptimalRegMap");
517            VM.sysWriteln(subOptimalRegMap.toString());
518            VM.sysWriteln("optimalRegMap");
519            VM.sysWriteln(optimalRegMap.toString());
520          }
521    
522          // 4) Create a version of the original loop that uses the first of
523          // the newly created registers instead of the original
524          // registers.
525          HashMap<Register, BasicBlock> regToUnoptimizedBlockMap = new HashMap<Register, BasicBlock>();
526          HashMap<BasicBlock, BasicBlock> unoptimizedLoopMap =
527              createCloneLoop(loop, subOptimalRegMap, regToUnoptimizedBlockMap);
528          if (DEBUG) {
529            VM.sysWriteln("subOptimalLoopMap");
530            VM.sysWriteln(unoptimizedLoopMap.toString());
531          }
532          // 5) Create a second version, this time with the result of the
533          // eliminated checks set to explicit test guards.
534          HashMap<Register, BasicBlock> regToOptimizedBlockMap = new HashMap<Register, BasicBlock>();
535          HashMap<BasicBlock, BasicBlock> optimizedLoopMap =
536              createOptimizedLoop(loop, optimalRegMap, checksToEliminate, regToOptimizedBlockMap);
537          if (DEBUG) {
538            VM.sysWriteln("optimalLoopMap");
539            VM.sysWriteln(optimizedLoopMap.toString());
540          }
541          // 6) Work out what the maximum value for all the bounds checks
542          // are and create branches to optimal or suboptimal loops - with
543          // the unoptimized loop possibly being unreachable
544          BasicBlock firstBranchBlock = loop.header.createSubBlock(SYNTH_LOOP_VERSIONING_BCI, ir);
545          BasicBlock temp = (BasicBlock) loop.header.prev;
546          ir.cfg.breakCodeOrder(temp, loop.header);
547          ir.cfg.linkInCodeOrder(temp, firstBranchBlock);
548          ir.cfg.linkInCodeOrder(firstBranchBlock, loop.header);
549          temp.redirectOuts(loop.header, firstBranchBlock, ir);
550          boolean isUnoptimizedLoopReachable =
551              createBranchBlocks(loop,
552                                 firstBranchBlock,
553                                 checksToEliminate,
554                                 unoptimizedLoopMap.get(loop.predecessor),
555                                 optimizedLoopMap.get(loop.predecessor),
556                                 optimalRegMap);
557          // 7) Fix up the phi node predecessors
558          fixUpPhiPredecessors(phiInstructions,
559                               isUnoptimizedLoopReachable ? unoptimizedLoopMap.get(loop.exit) : null,
560                               optimizedLoopMap.get(loop.exit));
561          // 8) Remove the unoptimized loop if its redundant
562          if (!isUnoptimizedLoopReachable) {
563            removeUnoptimizedLoop(loop, unoptimizedLoopMap);
564          }
565    
566          // 9) Replace register definitions in the original
567          // loop with phi instructions
568          modifyOriginalLoop(loop, phiInstructions, definingInstructionsInOriginalLoop, subOptimalRegMap, optimalRegMap);
569          // 10) Compact node numbering so that CFG number of nodes
570          // reflects that some basic blocks may have been deleted
571          ir.cfg.compactNodeNumbering();
572          return true;
573        }
574      }
575    
576      /**
577       * Create a list of instructions to be eliminated
578       * @param loop the loop to examine
579       * @param instrToEliminate the instructions to remove
580       */
581      private void getListOfChecksToEliminate(AnnotatedLSTNode loop, ArrayList<Instruction> instrToEliminate) {
582        ArrayList<Instruction> nullChecks = new ArrayList<Instruction>();
583        ArrayList<Instruction> oddBoundChecks = new ArrayList<Instruction>();
584        Enumeration<BasicBlock> blocks = loop.getBasicBlocks();
585        while (blocks.hasMoreElements()) {
586          BasicBlock block = blocks.nextElement();
587          IREnumeration.AllInstructionsEnum instructions = new IREnumeration.AllInstructionsEnum(ir, block);
588          while (instructions.hasMoreElements()) {
589            Instruction instruction = instructions.nextElement();
590            if (NullCheck.conforms(instruction)) {
591              if (loop.isInvariant(NullCheck.getRef(instruction))) {
592                instrToEliminate.add(instruction);
593                nullChecks.add(instruction);
594              }
595            } else if (loop.isMonotonic() && BoundsCheck.conforms(instruction)) {
596              if (loop.isInvariant(BoundsCheck.getRef(instruction))) {
597                if (loop.isRelatedToIterator(BoundsCheck.getIndex(instruction))) {
598                  if (loop.isInvariant(BoundsCheck.getGuard(instruction))) {
599                    instrToEliminate.add(instruction);
600                  } else {
601                    // Null check isn't invariant but reference was, check
602                    // null check will be eliminated at end of loop
603                    oddBoundChecks.add(instruction);
604                  }
605                }
606              }
607            }
608          }
609        }
610        // Check cases where the null check isn't loop invariant, however,
611        // it will be in the optimized loop as we'll have eliminated it
612        for (Instruction oddBoundCheck : oddBoundChecks) {
613          Operand guard = BoundsCheck.getGuard(oddBoundCheck);
614          for (Instruction nullCheck : nullChecks) {
615            if (guard.similar(NullCheck.getGuardResult(nullCheck))) {
616              instrToEliminate.add(oddBoundCheck);
617              break;
618            }
619          }
620        }
621      }
622    
623      /**
624       * Get registers defined in the given loop. As we're in SSA form
625       * all register definitions must be unique.
626       * @param loop - the loop to examine
627       * @param registers - vector to which defined registers are added
628       */
629      private void getRegistersDefinedInLoop(AnnotatedLSTNode loop, ArrayList<Register> registers,
630                                             ArrayList<TypeReference> types,
631                                             ArrayList<Instruction> definingInstructions) {
632        Enumeration<BasicBlock> blocks = loop.getBasicBlocks();
633        while (blocks.hasMoreElements()) {
634          BasicBlock block = blocks.nextElement();
635          // can value escape
636          final boolean escapes = (block == loop.exit) || (ir.HIRInfo.dominatorTree.dominates(block, loop.exit));
637          IREnumeration.AllInstructionsEnum instructions = new IREnumeration.AllInstructionsEnum(ir, block);
638          while (instructions.hasMoreElements()) {
639            Instruction instruction = instructions.nextElement();
640            Enumeration<Operand> operands = instruction.getDefs();
641            while (operands.hasMoreElements()) {
642              Operand operand = operands.nextElement();
643              if (operand.isRegister()) {
644                registers.add(operand.asRegister().getRegister());
645                types.add(operand.asRegister().getType());
646                if (escapes) {
647                  definingInstructions.add(instruction);
648                } else {
649                  definingInstructions.add(null);
650                }
651              }
652            }
653          }
654        }
655      }
656    
657      /**
658       * Generate into a new block phi nodes that define the original
659       * register defined by the loop and use two newly created
660       * registers.
661       * @param registers - vector to which defined registers need to be
662       * created registers.x used in creating phi nodes
663       * @param types - vector of corresponding types of registers.
664       * @param phiInstructions - created phi instructions
665       * @param subOptimalRegMap - mapping of orignal destination to the
666       * newly created destination for the unoptimized loop
667       * @param optimalRegMap - mapping of orignal destination to the
668       * newly created destination for the optimized loop
669       */
670      private void generatePhiNodes(AnnotatedLSTNode loop, ArrayList<Register> registers,
671                                    ArrayList<TypeReference> types, ArrayList<Instruction> phiInstructions,
672                                    HashMap<Register, Register> subOptimalRegMap,
673                                    HashMap<Register, Register> optimalRegMap) {
674        // Get the carried loop iterator's register
675        Register carriedLoopIteratorRegister = ((RegisterOperand) loop.getCarriedLoopIterator()).getRegister();
676        for (int i = 0; i < registers.size(); i++) {
677          Register register = registers.get(i);
678          TypeReference type = types.get(i);
679          Instruction phi = Phi.create(PHI, new RegisterOperand(register, type), 2);
680          phi.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
681    
682          // new operand for optimized loop
683          Operand op0 = ir.regpool.makeTemp(type);
684          Phi.setValue(phi, OPTIMIZED_LOOP_OPERAND, op0);
685          optimalRegMap.put(register, op0.asRegister().getRegister());
686    
687          // new operand for unoptimized loop
688          Operand op1 = ir.regpool.makeTemp(type);
689          Phi.setValue(phi, UNOPTIMIZED_LOOP_OPERAND, op1);
690          subOptimalRegMap.put(register, op1.asRegister().getRegister());
691    
692          // Add the new created carried loop iterator registers to
693          // internal set to mark the optimized loops
694          if (register == carriedLoopIteratorRegister) {
695            setOptimizedLoop(op0.asRegister().getRegister());
696            setOptimizedLoop(op1.asRegister().getRegister());
697          }
698    
699          phiInstructions.add(phi);
700        }
701        // rename any optimized inner loops registers
702        renameOptimizedLoops(subOptimalRegMap, optimalRegMap);
703      }
704    
705      /**
706       * Create a clone of the loop replacing definitions in the cloned
707       * loop with those found in the register map
708       * @param loop - loop to clone
709       * @param regMap - mapping of original definition to new
710       * definition
711       * @return a mapping from original BBs to created BBs
712       */
713      private HashMap<BasicBlock, BasicBlock> createCloneLoop(AnnotatedLSTNode loop,
714                                                                      HashMap<Register, Register> regMap,
715                                                                      HashMap<Register, BasicBlock> regToBlockMap) {
716        HashMap<BasicBlock, BasicBlock> originalToCloneBBMap = new HashMap<BasicBlock, BasicBlock>();
717        // After the newly created loop goto the old loop header
718        originalToCloneBBMap.put(loop.successor, loop.header);
719        // Create an empty block to be the loop predecessor
720        BasicBlock new_pred = loop.header.createSubBlock(SYNTH_LOOP_VERSIONING_BCI, ir);
721        ir.cfg.linkInCodeOrder(ir.cfg.lastInCodeOrder(), new_pred);
722        originalToCloneBBMap.put(loop.predecessor, new_pred);
723        // Create copy blocks
724        Enumeration<BasicBlock> blocks = loop.getBasicBlocks();
725        while (blocks.hasMoreElements()) {
726          BasicBlock block = blocks.nextElement();
727          block.killFallThrough(); // get rid of fall through edges to aid recomputeNormalOuts
728          // Create copy and register mapping
729          BasicBlock copy = block.copyWithoutLinks(ir);
730          originalToCloneBBMap.put(block, copy);
731          // Link into code order
732          ir.cfg.linkInCodeOrder(ir.cfg.lastInCodeOrder(), copy);
733          // Alter register definitions and uses in copy
734          IREnumeration.AllInstructionsEnum instructions = new IREnumeration.AllInstructionsEnum(ir, copy);
735          while (instructions.hasMoreElements()) {
736            Instruction instruction = instructions.nextElement();
737            Enumeration<Operand> operands = instruction.getDefs();
738            while (operands.hasMoreElements()) {
739              Operand operand = operands.nextElement();
740              if (operand.isRegister()) {
741                Register register = operand.asRegister().getRegister();
742                if (regMap.containsKey(register)) {
743                  instruction.replaceRegister(register, regMap.get(register));
744                  regToBlockMap.put(regMap.get(register), copy);
745                }
746              }
747            }
748            operands = instruction.getUses();
749            while (operands.hasMoreElements()) {
750              Operand operand = operands.nextElement();
751              if (operand instanceof RegisterOperand) {
752                Register register = operand.asRegister().getRegister();
753                if (regMap.containsKey(register)) {
754                  instruction.replaceRegister(register, regMap.get(register));
755                }
756              }
757            }
758          }
759        }
760        // Fix up outs
761        // loop predecessor
762        new_pred.redirectOuts(loop.header, originalToCloneBBMap.get(loop.header), ir);
763        // loop blocks
764        blocks = loop.getBasicBlocks();
765        while (blocks.hasMoreElements()) {
766          BasicBlock block = blocks.nextElement();
767          BasicBlock copy = originalToCloneBBMap.get(block);
768          Enumeration<BasicBlock> outs = block.getOutNodes();
769          while (outs.hasMoreElements()) {
770            BasicBlock out = outs.nextElement();
771            if (originalToCloneBBMap.containsKey(out)) {
772              copy.redirectOuts(out, originalToCloneBBMap.get(out), ir);
773            }
774          }
775        }
776        // Fix up phis
777        blocks = loop.getBasicBlocks();
778        while (blocks.hasMoreElements()) {
779          BasicBlock block = blocks.nextElement();
780          BasicBlock copy = originalToCloneBBMap.get(block);
781          IREnumeration.AllInstructionsEnum instructions = new IREnumeration.AllInstructionsEnum(ir, copy);
782          while (instructions.hasMoreElements()) {
783            Instruction instruction = instructions.nextElement();
784            if (Phi.conforms(instruction)) {
785              for (int i = 0; i < Phi.getNumberOfValues(instruction); i++) {
786                BasicBlock phi_predecessor = Phi.getPred(instruction, i).block;
787                if (originalToCloneBBMap.containsKey(phi_predecessor)) {
788                  Phi.setPred(instruction, i, new BasicBlockOperand(originalToCloneBBMap.get(phi_predecessor)));
789                } else {
790                  dumpIR(ir, "Error when optimising" + ir.getMethod());
791                  throw new Error("There's > 1 route to this phi node " +
792                                  instruction +
793                                  " from outside the loop: " +
794                                  phi_predecessor);
795                }
796              }
797            }
798          }
799        }
800        return originalToCloneBBMap;
801      }
802    
803      /**
804       * Create a clone of the loop replacing definitions in the cloned
805       * loop with those found in the register map and eliminate
806       * unnecessary bound checks
807       * @param loop - loop to clone
808       * @param regMap - mapping of original definition to new
809       * definition
810       * @param instrToEliminate - instructions to eliminate
811       * @param regToBlockMap - mapping of a register to its defining BB
812       * @return a mapping from original BBs to created BBs
813       */
814      private HashMap<BasicBlock, BasicBlock> createOptimizedLoop(AnnotatedLSTNode loop,
815                                                                          HashMap<Register, Register> regMap,
816                                                                          ArrayList<Instruction> instrToEliminate,
817                                                                          HashMap<Register, BasicBlock> regToBlockMap) {
818        HashMap<BasicBlock, BasicBlock> originalToCloneBBMap = new HashMap<BasicBlock, BasicBlock>();
819        // After the newly created loop goto the old loop header
820        originalToCloneBBMap.put(loop.successor, loop.header);
821        // Create an empty block to be the loop predecessor
822        BasicBlock new_pred = loop.header.createSubBlock(SYNTH_LOOP_VERSIONING_BCI, ir);
823        ir.cfg.linkInCodeOrder(ir.cfg.lastInCodeOrder(), new_pred);
824        originalToCloneBBMap.put(loop.predecessor, new_pred);
825    
826        // Create copy blocks
827        Enumeration<BasicBlock> blocks = loop.getBasicBlocks();
828        while (blocks.hasMoreElements()) {
829          BasicBlock block = blocks.nextElement();
830          // N.B. fall through will have been killed by unoptimized loop
831          // Create copy and register mapping
832          BasicBlock copy = block.copyWithoutLinks(ir);
833          originalToCloneBBMap.put(block, copy);
834          // Link into code order
835          ir.cfg.linkInCodeOrder(ir.cfg.lastInCodeOrder(), copy);
836    
837          // Alter register definitions in copy
838          IREnumeration.AllInstructionsEnum instructions = new IREnumeration.AllInstructionsEnum(ir, copy);
839          loop_over_created_instructions:
840          while (instructions.hasMoreElements()) {
841            Instruction instruction = instructions.nextElement();
842            if (BoundsCheck.conforms(instruction)) {
843              for (Instruction anInstrToEliminate : instrToEliminate) {
844                if (instruction.similar(anInstrToEliminate)) {
845                  instruction.remove();
846                  continue loop_over_created_instructions;
847                }
848              }
849            } else if (NullCheck.conforms(instruction)) {
850              for (Instruction anInstrToEliminate : instrToEliminate) {
851                if (instruction.similar(anInstrToEliminate)) {
852                  instruction.remove();
853                  continue loop_over_created_instructions;
854                }
855              }
856            }
857            Enumeration<Operand> operands = instruction.getDefs();
858            while (operands.hasMoreElements()) {
859              Operand operand = operands.nextElement();
860              if (operand instanceof RegisterOperand) {
861                Register register = operand.asRegister().getRegister();
862                if (regMap.containsKey(register)) {
863                  instruction.replaceRegister(register, regMap.get(register));
864                  regToBlockMap.put(regMap.get(register), copy);
865                }
866              }
867            }
868            operands = instruction.getUses();
869            while (operands.hasMoreElements()) {
870              Operand operand = operands.nextElement();
871              if (operand.isRegister()) {
872                Register register = operand.asRegister().getRegister();
873                if (regMap.containsKey(register)) {
874                  instruction.replaceRegister(register, regMap.get(register));
875                }
876              }
877            }
878          }
879        }
880        // Fix up outs
881        // loop predecessor
882        new_pred.redirectOuts(loop.header, originalToCloneBBMap.get(loop.header), ir);
883        blocks = loop.getBasicBlocks();
884        while (blocks.hasMoreElements()) {
885          BasicBlock block = blocks.nextElement();
886          BasicBlock copy = originalToCloneBBMap.get(block);
887          Enumeration<BasicBlock> outs = block.getOutNodes();
888          while (outs.hasMoreElements()) {
889            BasicBlock out = outs.nextElement();
890            if (originalToCloneBBMap.containsKey(out)) {
891              copy.redirectOuts(out, originalToCloneBBMap.get(out), ir);
892            }
893          }
894        }
895        // Fix up phis
896        blocks = loop.getBasicBlocks();
897        while (blocks.hasMoreElements()) {
898          BasicBlock block = blocks.nextElement();
899          BasicBlock copy = originalToCloneBBMap.get(block);
900          IREnumeration.AllInstructionsEnum instructions = new IREnumeration.AllInstructionsEnum(ir, copy);
901          while (instructions.hasMoreElements()) {
902            Instruction instruction = instructions.nextElement();
903            if (Phi.conforms(instruction)) {
904              for (int i = 0; i < Phi.getNumberOfValues(instruction); i++) {
905                BasicBlock phi_predecessor = Phi.getPred(instruction, i).block;
906                if (originalToCloneBBMap.containsKey(phi_predecessor)) {
907                  Phi.setPred(instruction, i, new BasicBlockOperand(originalToCloneBBMap.get(phi_predecessor)));
908                } else {
909                  throw new Error("There's > 1 route to this phi node from outside the loop: " + phi_predecessor);
910                }
911              }
912            }
913          }
914        }
915        return originalToCloneBBMap;
916      }
917    
918      /**
919       * When phi nodes were generated the basic blocks weren't known for
920       * the predecessors, fix this up now. (It may also not be possible
921       * to reach the unoptimized loop any more)
922       */
923      private void fixUpPhiPredecessors(ArrayList<Instruction> phiInstructions, BasicBlock unoptimizedLoopExit,
924                                        BasicBlock optimizedLoopExit) {
925        if (unoptimizedLoopExit != null) {
926          for (Instruction instruction : phiInstructions) {
927            Phi.setPred(instruction, OPTIMIZED_LOOP_OPERAND, new BasicBlockOperand(optimizedLoopExit));
928            Phi.setPred(instruction, UNOPTIMIZED_LOOP_OPERAND, new BasicBlockOperand(unoptimizedLoopExit));
929          }
930        } else {
931          for (Instruction instruction : phiInstructions) {
932            Operand operand = Phi.getValue(instruction, OPTIMIZED_LOOP_OPERAND);
933            Phi.resizeNumberOfPreds(instruction, 1);
934            Phi.resizeNumberOfValues(instruction, 1);
935            Phi.setValue(instruction, OPTIMIZED_LOOP_OPERAND, operand);
936            Phi.setPred(instruction, OPTIMIZED_LOOP_OPERAND, new BasicBlockOperand(optimizedLoopExit));
937          }
938        }
939      }
940    
941      /**
942       * Create the block containing explict branches to either the
943       * optimized or unoptimized loops
944       * @param optimalRegMap - mapping used to map eliminated bound and
945       * null check guards to
946       */
947      private boolean createBranchBlocks(AnnotatedLSTNode loop, BasicBlock block,
948                                         ArrayList<Instruction> checksToEliminate, BasicBlock unoptimizedLoopEntry,
949                                         BasicBlock optimizedLoopEntry,
950                                         HashMap<Register, Register> optimalRegMap) {
951        BasicBlock blockOnEntry = block;
952        // 1) generate null check guards
953        block = generateNullCheckBranchBlocks(loop, checksToEliminate, optimalRegMap, block, unoptimizedLoopEntry);
954    
955        // 2) generate bound check guards
956        if (loop.isMonotonic()) {
957          // create new operands for values beyond initial and terminal iterator values
958          Operand terminal;
959          Operand terminalLessStrideOnce;
960          Operand terminalPlusStrideOnce;
961    
962          // NB. precomputing these makes life easier and the code easier to read,
963          //     it does create dead code though
964          if (loop.terminalIteratorValue.isIntConstant()) {
965            terminal = loop.terminalIteratorValue;
966            int terminalAsInt = terminal.asIntConstant().value;
967            int stride = loop.strideValue.asIntConstant().value;
968            terminalLessStrideOnce = new IntConstantOperand(terminalAsInt - stride);
969            terminalPlusStrideOnce = new IntConstantOperand(terminalAsInt + stride);
970          } else {
971            Instruction tempInstr;
972            terminal = loop.generateLoopInvariantOperand(block, loop.terminalIteratorValue);
973            terminalLessStrideOnce = ir.regpool.makeTempInt();
974            terminalPlusStrideOnce = ir.regpool.makeTempInt();
975            tempInstr =
976                Binary.create(INT_SUB, terminalLessStrideOnce.asRegister(), terminal.copy(), loop.strideValue.copy());
977            tempInstr.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
978            block.appendInstruction(tempInstr);
979            DefUse.updateDUForNewInstruction(tempInstr);
980    
981            tempInstr =
982                Binary.create(INT_ADD, terminalPlusStrideOnce.asRegister(), terminal.copy(), loop.strideValue.copy());
983            tempInstr.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
984            block.appendInstruction(tempInstr);
985            DefUse.updateDUForNewInstruction(tempInstr);
986          }
987    
988          // Determine maximum and minimum index values for different loop types
989          Operand phiMinIndexValue;
990          Operand phiMaxIndexValue;
991    
992          if (loop.isMonotonicIncreasing()) {
993            phiMinIndexValue = loop.initialIteratorValue;
994            if ((loop.condition.isLESS() ||
995                 loop.condition.isLOWER() ||
996                 loop.condition.isNOT_EQUAL())) {
997              phiMaxIndexValue = terminal;
998            } else if ((loop.condition.isLESS_EQUAL() ||
999                        loop.condition.isLOWER_EQUAL() ||
1000                        loop.condition.isEQUAL())) {
1001              phiMaxIndexValue = terminalPlusStrideOnce;
1002            } else {
1003              throw new Error("Unrecognised loop for fission " + loop);
1004            }
1005          } else if (loop.isMonotonicDecreasing()) {
1006            phiMaxIndexValue = loop.initialIteratorValue;
1007            if ((loop.condition.isGREATER() ||
1008                 loop.condition.isHIGHER() ||
1009                 loop.condition.isNOT_EQUAL())) {
1010              phiMinIndexValue = terminalPlusStrideOnce;
1011            } else if ((loop.condition.isGREATER_EQUAL() ||
1012                        loop.condition.isHIGHER_EQUAL() ||
1013                        loop.condition.isEQUAL())) {
1014              phiMinIndexValue = terminalLessStrideOnce;
1015            } else {
1016              throw new Error("Unrecognised loop for fission " + loop);
1017            }
1018          } else {
1019            throw new Error("Unrecognised loop for fission " + loop);
1020          }
1021          // Generate tests
1022          for (int i = 0; i < checksToEliminate.size(); i++) {
1023            Instruction instr = checksToEliminate.get(i);
1024            if (BoundsCheck.conforms(instr)) {
1025              // Have we already generated these tests?
1026              boolean alreadyChecked = false;
1027              for (int j = 0; j < i; j++) {
1028                Instruction old_instr = checksToEliminate.get(j);
1029                if (BoundsCheck.conforms(old_instr) &&
1030                    (BoundsCheck.getRef(old_instr).similar(BoundsCheck.getRef(instr))) &&
1031                    (BoundsCheck.getIndex(old_instr).similar(BoundsCheck.getIndex(instr)))) {
1032                  // yes - just create a guard move
1033                  alreadyChecked = true;
1034                  RegisterOperand guardResult = BoundsCheck.getGuardResult(instr).copyRO();
1035                  guardResult.setRegister(optimalRegMap.get(guardResult.getRegister()));
1036                  RegisterOperand guardSource = BoundsCheck.getGuardResult(old_instr).copyRO();
1037                  guardSource.setRegister(optimalRegMap.get(guardSource.getRegister()));
1038                  Instruction tempInstr = Move.create(GUARD_MOVE, guardResult, guardSource);
1039                  tempInstr.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
1040                  block.appendInstruction(tempInstr);
1041                  break;
1042                }
1043              }
1044              if (!alreadyChecked) {
1045                // no - generate tests
1046                Operand index = BoundsCheck.getIndex(instr);
1047                int distance = loop.getFixedDistanceFromPhiIterator(index);
1048                if (distance == 0) {
1049                  block =
1050                      generateExplicitBoundCheck(instr,
1051                                                 phiMinIndexValue,
1052                                                 phiMaxIndexValue,
1053                                                 optimalRegMap,
1054                                                 block,
1055                                                 unoptimizedLoopEntry);
1056                } else {
1057                  Instruction tempInstr;
1058                  RegisterOperand minIndex = ir.regpool.makeTempInt();
1059                  RegisterOperand maxIndex = ir.regpool.makeTempInt();
1060    
1061                  tempInstr =
1062                      Binary.create(INT_ADD, minIndex, phiMinIndexValue.copy(), new IntConstantOperand(distance));
1063                  tempInstr.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
1064                  block.appendInstruction(tempInstr);
1065                  DefUse.updateDUForNewInstruction(tempInstr);
1066    
1067                  tempInstr =
1068                      Binary.create(INT_ADD, maxIndex, phiMaxIndexValue.copy(), new IntConstantOperand(distance));
1069                  tempInstr.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
1070                  block.appendInstruction(tempInstr);
1071                  DefUse.updateDUForNewInstruction(tempInstr);
1072    
1073                  block = generateExplicitBoundCheck(instr, minIndex, maxIndex, optimalRegMap, block, unoptimizedLoopEntry);
1074                }
1075              }
1076            }
1077          }
1078        }
1079        // Have we had to create a new basic block since entry => we
1080        // generated a branch to the unoptimized loop
1081        boolean isUnoptimizedLoopReachable = (blockOnEntry != block);
1082        // 3) Finish up with goto and generate true guard value
1083        {
1084          Instruction branch; // the generated branch instruction
1085          branch = Goto.create(GOTO, optimizedLoopEntry.makeJumpTarget());
1086          branch.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
1087          block.appendInstruction(branch);
1088          block.deleteNormalOut();
1089          block.insertOut(optimizedLoopEntry);
1090        }
1091        return isUnoptimizedLoopReachable;
1092      }
1093    
1094      /**
1095       * Generate null check branch blocks
1096       *
1097       * @param loop the current loop where checks are being eliminated
1098       * @param checksToEliminate all of the checks that are being eliminated in the pass
1099       * @param optimalRegMap a map from original register to the register used in the optimal loop
1100       * @param block the block to generate code into
1101       * @param unoptimizedLoopEntry entry to the unoptimized loop for if the check fails
1102       * @return the new block to generate code into
1103       */
1104      private BasicBlock generateNullCheckBranchBlocks(AnnotatedLSTNode loop,
1105                                                           ArrayList<Instruction> checksToEliminate,
1106                                                           HashMap<Register, Register> optimalRegMap,
1107                                                           BasicBlock block, BasicBlock unoptimizedLoopEntry) {
1108        // Map of already generated null check references to their
1109        // corresponding guard result
1110        HashMap<Register, Operand> refToGuardMap = new HashMap<Register, Operand>();
1111        // Iterate over checks
1112        for (Instruction instr : checksToEliminate) {
1113          // Is this a null check
1114          if (NullCheck.conforms(instr)) {
1115            // the generated branch instruction
1116            Instruction branch;
1117            // the reference to compare
1118            Operand ref = AnnotatedLSTNode.follow(NullCheck.getRef(instr));
1119            // the guard result to define
1120            RegisterOperand guardResult = NullCheck.getGuardResult(instr).copyRO();
1121            guardResult.setRegister(optimalRegMap.get(guardResult.getRegister()));
1122            // check if we've generated this test already
1123            if (ref.isRegister() && refToGuardMap.containsKey(ref.asRegister().getRegister())) {
1124              // yes - generate just a guard move
1125              branch = Move.create(GUARD_MOVE, guardResult, refToGuardMap.get(ref.asRegister().getRegister()).copy());
1126              branch.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
1127              block.appendInstruction(branch);
1128            } else {
1129              // check if we can just move a guard from the loop predecessors
1130              RegisterOperand guard = nullCheckPerformedInLoopPredecessors(loop.header, instr);
1131              if (guard != null) {
1132                // yes - generate just a guard move
1133                branch = Move.create(GUARD_MOVE, guardResult, guard.copyRO());
1134                branch.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
1135                block.appendInstruction(branch);
1136              } else {
1137                // generate explicit null test
1138                branch =
1139                    IfCmp.create(REF_IFCMP,
1140                                 guardResult,
1141                                 ref.copy(),
1142                                 new NullConstantOperand(),
1143                                 ConditionOperand.EQUAL(),
1144                                 unoptimizedLoopEntry.makeJumpTarget(),
1145                                 BranchProfileOperand.unlikely());
1146                if (ref.isRegister()) {
1147                  refToGuardMap.put(ref.asRegister().getRegister(), guardResult);
1148                }
1149                branch.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
1150                block.appendInstruction(branch);
1151                // Adjust block
1152                block.insertOut(unoptimizedLoopEntry);
1153                BasicBlock new_block = block.createSubBlock(SYNTH_LOOP_VERSIONING_BCI, ir);
1154                BasicBlock temp = (BasicBlock) block.next;
1155                ir.cfg.breakCodeOrder(block, temp);
1156                ir.cfg.linkInCodeOrder(block, new_block);
1157                ir.cfg.linkInCodeOrder(new_block, temp);
1158                block.insertOut(new_block);
1159                block = new_block;
1160              }
1161            }
1162          }
1163        }
1164        return block;
1165      }
1166    
1167      /**
1168       * Generate bound check branch blocks
1169       *
1170       * @param boundCheckInstr the bound check instruction in question
1171       * @param minIndexValue the min value for an iterator a loop will generate
1172       * @param maxIndexValue the max value for an iterator a loop will generate
1173       * @param optimalRegMap a map from original register to the register used in the optimal loop
1174       * @param block the block to generate code into
1175       * @param unoptimizedLoopEntry entry to the unoptimized loop for if the check fails
1176       * @return the new block to generate code into
1177       */
1178      private BasicBlock generateExplicitBoundCheck(Instruction boundCheckInstr, Operand minIndexValue,
1179                                                        Operand maxIndexValue,
1180                                                        HashMap<Register, Register> optimalRegMap,
1181                                                        BasicBlock block, BasicBlock unoptimizedLoopEntry) {
1182        // 1) Work out what tests are necessary. NB we don't optimise for
1183        // the case when exceptions will always be generated
1184        boolean lowerBoundTestRedundant;
1185        boolean upperBoundTestRedundant;
1186        {
1187          // as array lengths must be >= 0 the lower bound test is not
1188          // necessary if:
1189          // (minIndexValue >= 0) or ((arraylength A) + zeroOrPositiveConstant)
1190          lowerBoundTestRedundant =
1191              ((minIndexValue.isIntConstant() && (minIndexValue.asIntConstant().value >= 0)) ||
1192               ((getConstantAdjustedArrayLengthRef(minIndexValue) != null) &&
1193                (getConstantAdjustedArrayLengthDistance(minIndexValue) >= 0)));
1194          // as the upper bound must be <= arraylength the test is not
1195          // necessary if:
1196          // maxIndexValue = (arraylength A) - zeroOrPositiveConstant
1197          Operand maxIndexArrayLengthRef = getConstantAdjustedArrayLengthRef(maxIndexValue);
1198          upperBoundTestRedundant =
1199              ((maxIndexArrayLengthRef != null) &&
1200               maxIndexArrayLengthRef.similar(BoundsCheck.getRef(boundCheckInstr)) &&
1201               (getConstantAdjustedArrayLengthDistance(maxIndexValue) <= 0));
1202        }
1203    
1204        // 2) Create explicit bound check
1205    
1206        // register to hold result (NB it's a guard for the optimal loop)
1207        RegisterOperand guardResult = BoundsCheck.getGuardResult(boundCheckInstr).copyRO();
1208        guardResult.setRegister(optimalRegMap.get(guardResult.getRegister()));
1209    
1210        // the guard on the bound check (mapped from the optimal loop as
1211        // it should already have been generated or may already be out of
1212        // the loop)
1213        Operand origGuard = BoundsCheck.getGuard(boundCheckInstr);
1214        Operand guard = origGuard.copy();
1215        if (origGuard.isRegister() && optimalRegMap.containsKey(origGuard.asRegister().getRegister())) {
1216          guard.asRegister().setRegister(optimalRegMap.get(origGuard.asRegister().getRegister()));
1217        }
1218    
1219        if (lowerBoundTestRedundant && upperBoundTestRedundant) {
1220          // both tests redundant so just generate a guard move of the
1221          // bound check guard
1222          Instruction move = Move.create(GUARD_MOVE, guardResult, guard);
1223          move.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
1224          block.appendInstruction(move);
1225        } else {
1226          // 2.1) Create array length
1227          RegisterOperand array_length = ir.regpool.makeTempInt();
1228          Instruction array_length_instr =
1229              GuardedUnary.create(ARRAYLENGTH,
1230                                  array_length,
1231                                  AnnotatedLSTNode.follow(BoundsCheck.getRef(boundCheckInstr)).copy(),
1232                                  guard);
1233          array_length_instr.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
1234          block.appendInstruction(array_length_instr);
1235    
1236          // 2.2) Create minimum index test unless test is redundant
1237          if (!lowerBoundTestRedundant) {
1238            RegisterOperand lowerBoundGuard = upperBoundTestRedundant ? guardResult : ir.regpool.makeTempValidation();
1239            // Generate bound check
1240            Instruction branch =
1241                IfCmp.create(INT_IFCMP,
1242                             lowerBoundGuard,
1243                             minIndexValue.copy(),
1244                             array_length.copyRO(),
1245                             ConditionOperand.LESS(),
1246                             unoptimizedLoopEntry.makeJumpTarget(),
1247                             BranchProfileOperand.unlikely());
1248            branch.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
1249            block.appendInstruction(branch);
1250            // Adjust block
1251            block.insertOut(unoptimizedLoopEntry);
1252            BasicBlock new_block = block.createSubBlock(SYNTH_LOOP_VERSIONING_BCI, ir);
1253            BasicBlock temp = (BasicBlock) block.next;
1254            ir.cfg.breakCodeOrder(block, temp);
1255            ir.cfg.linkInCodeOrder(block, new_block);
1256            ir.cfg.linkInCodeOrder(new_block, temp);
1257            block.insertOut(new_block);
1258            block = new_block;
1259          }
1260          // 2.3) Create maximum index test
1261          if (!upperBoundTestRedundant) {
1262            Instruction branch =
1263                IfCmp.create(INT_IFCMP,
1264                             guardResult,
1265                             maxIndexValue.copy(),
1266                             array_length.copyRO(),
1267                             ConditionOperand.GREATER(),
1268                             unoptimizedLoopEntry.makeJumpTarget(),
1269                             BranchProfileOperand.unlikely());
1270            branch.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
1271            block.appendInstruction(branch);
1272            // Adjust block
1273            block.insertOut(unoptimizedLoopEntry);
1274            BasicBlock new_block = block.createSubBlock(SYNTH_LOOP_VERSIONING_BCI, ir);
1275            BasicBlock temp = (BasicBlock) block.next;
1276            ir.cfg.breakCodeOrder(block, temp);
1277            ir.cfg.linkInCodeOrder(block, new_block);
1278            ir.cfg.linkInCodeOrder(new_block, temp);
1279            block.insertOut(new_block);
1280            block = new_block;
1281          }
1282        }
1283        return block;
1284      }
1285    
1286      /**
1287       * Can we eliminate a null check as it has lready been performed?
1288       * NB SSA guarantees that if a value is null it must always be null
1289       *
1290       * @param instr null check instruction
1291       */
1292      private RegisterOperand nullCheckPerformedInLoopPredecessors(BasicBlock header, Instruction instr) {
1293        if (VM.VerifyAssertions) VM._assert(NullCheck.conforms(instr));
1294        BasicBlock block = header;
1295        do {
1296          block = ir.HIRInfo.dominatorTree.getParent(block);
1297          Instruction lastInst = block.lastInstruction();
1298          for (Instruction itrInst = block.firstInstruction(); itrInst != lastInst; itrInst =
1299              itrInst.nextInstructionInCodeOrder()) {
1300            if (NullCheck.conforms(itrInst) && NullCheck.getRef(itrInst).similar(NullCheck.getRef(instr))) {
1301              return NullCheck.getGuardResult(itrInst);
1302            }
1303          }
1304        } while (block != ir.cfg.entry());
1305        return null;
1306      }
1307    
1308      /**
1309       * Get the array length reference ignoring instructions that adjust
1310       * its result by a fixed amount
1311       *
1312       * @param op operand to chase arraylength opcode to
1313       * constant value from an array length
1314       */
1315      private Operand getConstantAdjustedArrayLengthRef(Operand op) {
1316        Operand result = null;
1317        if (op.isRegister()) {
1318          Instruction opInstr = AnnotatedLSTNode.definingInstruction(op);
1319          if (opInstr.operator.opcode == ARRAYLENGTH_opcode) {
1320            result = GuardedUnary.getVal(opInstr);
1321          } else if ((opInstr.operator.opcode == INT_ADD_opcode) || (opInstr.operator.opcode == INT_SUB_opcode)) {
1322            Operand val1 = Binary.getVal1(opInstr);
1323            Operand val2 = Binary.getVal2(opInstr);
1324            if (val1.isConstant()) {
1325              result = getConstantAdjustedArrayLengthRef(val2);
1326            } else if (val2.isConstant()) {
1327              result = getConstantAdjustedArrayLengthRef(val1);
1328            }
1329          }
1330        }
1331        return result;
1332      }
1333    
1334      /**
1335       * Get the distance from an array length by addding up instructions
1336       * that adjust the array length result by a constant amount
1337       *
1338       * @param op operand to chase arraylength opcode to
1339       */
1340      private int getConstantAdjustedArrayLengthDistance(Operand op) {
1341        Instruction opInstr = AnnotatedLSTNode.definingInstruction(op);
1342        if (opInstr.operator.opcode == ARRAYLENGTH_opcode) {
1343          return 0;
1344        } else if (opInstr.operator.opcode == INT_ADD_opcode) {
1345          Operand val1 = Binary.getVal1(opInstr);
1346          Operand val2 = Binary.getVal2(opInstr);
1347          if (val1.isConstant()) {
1348            return val1.asIntConstant().value + getConstantAdjustedArrayLengthDistance(val2);
1349          } else {
1350            if (VM.VerifyAssertions) VM._assert(val2.isConstant());
1351            return getConstantAdjustedArrayLengthDistance(val1) + val2.asIntConstant().value;
1352          }
1353        } else if (opInstr.operator.opcode == INT_SUB_opcode) {
1354          Operand val1 = Binary.getVal1(opInstr);
1355          Operand val2 = Binary.getVal2(opInstr);
1356          if (val1.isConstant()) {
1357            return val1.asIntConstant().value - getConstantAdjustedArrayLengthDistance(val2);
1358          } else {
1359            if (VM.VerifyAssertions) VM._assert(val2.isConstant());
1360            return getConstantAdjustedArrayLengthDistance(val1) - val2.asIntConstant().value;
1361          }
1362        } else {
1363          throw new Error("Unexpected opcode when computing distance " + op);
1364        }
1365      }
1366    
1367      /**
1368       * Remove loop and replace register definitions in the original loop
1369       * with phi instructions
1370       */
1371      private void modifyOriginalLoop(AnnotatedLSTNode loop, ArrayList<Instruction> phiInstructions,
1372                                      ArrayList<Instruction> definingInstrInOriginalLoop,
1373                                      HashMap<Register, Register> subOptimalRegMap,
1374                                      HashMap<Register, Register> optimalRegMap) {
1375        // Remove instructions from loop header and exit, remove other
1376        // loop body blocks
1377        Enumeration<BasicBlock> blocks = loop.getBasicBlocks();
1378        while (blocks.hasMoreElements()) {
1379          BasicBlock block = blocks.nextElement();
1380          if ((block == loop.header) || (block == loop.exit)) {
1381            IREnumeration.AllInstructionsEnum instructions = new IREnumeration.AllInstructionsEnum(ir, block);
1382            while (instructions.hasMoreElements()) {
1383              Instruction instruction = instructions.nextElement();
1384              if (!BBend.conforms(instruction) && !Label.conforms(instruction)) {
1385                instruction.remove();
1386              }
1387            }
1388          } else {
1389            ir.cfg.removeFromCFGAndCodeOrder(block);
1390          }
1391        }
1392    
1393        // Place phi instructions in loop header
1394        for (int i = 0; i < phiInstructions.size(); i++) {
1395          Instruction origInstr = definingInstrInOriginalLoop.get(i);
1396          // Did the original instructions value escape the loop?
1397          if (origInstr != null) {
1398            // Was this a phi of a phi?
1399            if (Phi.conforms(origInstr)) {
1400              Instruction phi = phiInstructions.get(i);
1401              boolean phiHasUnoptimizedArg = Phi.getNumberOfValues(phi) == 2;
1402              // Phi of a phi - so make sure that we get the value to escape the loop, not the value at the loop header
1403              boolean fixed = false;
1404              for (int index = 0; index < Phi.getNumberOfPreds(origInstr); index++) {
1405                BasicBlockOperand predOp = Phi.getPred(origInstr, index);
1406                // Only worry about values who are on the backward branch
1407                if (predOp.block == loop.exit) {
1408                  if (fixed) { // We've tried to do 2 replaces => something wrong
1409                    SSA.printInstructions(ir);
1410                    OptimizingCompilerException.UNREACHABLE("LoopVersioning",
1411                                                                "Phi node in loop header with multiple in loop predecessors");
1412                  }
1413                  Operand rval = Phi.getValue(origInstr, index);
1414                  if (rval.isRegister()) {
1415                    // Sort out registers
1416                    Register origRegPhiRval = rval.asRegister().getRegister();
1417                    Register subOptRegPhiRval;
1418                    Register optRegPhiRval;
1419                    if (!subOptimalRegMap.containsKey(origRegPhiRval)) {
1420                      // Register comes from loop exit but it wasn't defined in the loop
1421                      subOptRegPhiRval = origRegPhiRval;
1422                      optRegPhiRval = origRegPhiRval;
1423                    } else {
1424                      subOptRegPhiRval = subOptimalRegMap.get(origRegPhiRval);
1425                      optRegPhiRval = optimalRegMap.get(origRegPhiRval);
1426                    }
1427                    if (phiHasUnoptimizedArg) {
1428                      Phi.getValue(phi, UNOPTIMIZED_LOOP_OPERAND).asRegister().setRegister(subOptRegPhiRval);
1429                    }
1430                    Phi.getValue(phi, OPTIMIZED_LOOP_OPERAND).asRegister().setRegister(optRegPhiRval);
1431                  } else if (rval.isConstant()) {
1432                    // Sort out constants
1433                    if (phiHasUnoptimizedArg) {
1434                      Phi.setValue(phi, UNOPTIMIZED_LOOP_OPERAND, rval.copy());
1435                    }
1436                    Phi.setValue(phi, OPTIMIZED_LOOP_OPERAND, rval.copy());
1437                  } else if (rval instanceof HeapOperand) {
1438                    // Sort out heap variables
1439                    @SuppressWarnings("unchecked") // Cast to generic type
1440                        HeapVariable<Object> origPhiRval = ((HeapOperand) rval).value;
1441                    HeapVariable<Object> subOptPhiRval;
1442                    HeapVariable<Object> optPhiRval;
1443                    if (true /*subOptimalRegMap.containsKey(origPhiRval) == false*/) {
1444                      // currently we only expect to optimise scalar SSA
1445                      // form
1446                      subOptPhiRval = origPhiRval;
1447                      optPhiRval = origPhiRval;
1448                    } else {
1449                      /*
1450                      subOptPhiRval   = (HeapVariable)subOptimalRegMap.get(origPhiRval);
1451                      optPhiRval      = (HeapVariable)optimalRegMap.get(origPhiRval);
1452                      */
1453                    }
1454                    if (phiHasUnoptimizedArg) {
1455                      Phi.setValue(phi, UNOPTIMIZED_LOOP_OPERAND, new HeapOperand<Object>(subOptPhiRval));
1456                    }
1457                    Phi.setValue(phi, OPTIMIZED_LOOP_OPERAND, new HeapOperand<Object>(optPhiRval));
1458                  } else {
1459                    OptimizingCompilerException.UNREACHABLE("LoopVersioning",
1460                                                                "Unknown operand type",
1461                                                                rval.toString());
1462                  }
1463                  fixed = true;
1464                }
1465              }
1466            }
1467            // Add back to loop
1468            loop.header.appendInstruction(phiInstructions.get(i));
1469          }
1470        }
1471        // Remove original loop and branch to loop successor
1472        Instruction tempInstr;
1473        if (loop.header != loop.exit) {
1474          tempInstr = Goto.create(GOTO, loop.exit.makeJumpTarget());
1475          tempInstr.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
1476          loop.header.appendInstruction(tempInstr);
1477          loop.header.deleteNormalOut();
1478          loop.header.insertOut(loop.exit);
1479    
1480        }
1481        tempInstr = Goto.create(GOTO, loop.successor.makeJumpTarget());
1482        tempInstr.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI);
1483        loop.exit.appendInstruction(tempInstr);
1484        loop.exit.deleteNormalOut();
1485        loop.exit.insertOut(loop.successor);
1486      }
1487    
1488      /**
1489       * Remove unreachable unoptimized loop
1490       */
1491      private void removeUnoptimizedLoop(AnnotatedLSTNode loop,
1492                                         HashMap<BasicBlock, BasicBlock> unoptimizedLoopMap) {
1493        Enumeration<BasicBlock> blocks = loop.getBasicBlocks();
1494        report("removing unoptimized loop");
1495        BasicBlock block = unoptimizedLoopMap.get(loop.predecessor);
1496        report("removing block " + block);
1497        ir.cfg.removeFromCFGAndCodeOrder(block);
1498        while (blocks.hasMoreElements()) {
1499          block = unoptimizedLoopMap.get(blocks.nextElement());
1500          if (!loop.contains(block)) {
1501            report("removing block " + block);
1502            ir.cfg.removeFromCFGAndCodeOrder(block);
1503          } else {
1504            report("not removing block that's in the original loop" + block);
1505          }
1506        }
1507      }
1508    
1509      /**
1510       * Put the optimized loop's iterator register into the hash set
1511       *
1512       * @param reg register
1513       */
1514      private void setOptimizedLoop(Register reg) {
1515        loopRegisterSet.add(reg);
1516      }
1517    
1518      /**
1519       * Check whether the loop that contain such iterator register had
1520       * been optimized
1521       *
1522       * @param reg register
1523       * @return the test result
1524       */
1525      private boolean isOptimizedLoop(Register reg) {
1526        return loopRegisterSet.contains(reg);
1527      }
1528    
1529      /**
1530       * Rename the iterators for optimized loops so we can tell they are still optimized
1531       */
1532      private void renameOptimizedLoops(HashMap<Register, Register> subOptimalRegMap,
1533                                        HashMap<Register, Register> optimalRegMap) {
1534        Iterator<Register> itr = loopRegisterSet.iterator();
1535        while (itr.hasNext()) {
1536          Register reg = itr.next();
1537          if (subOptimalRegMap.containsKey(reg)) {
1538            loopRegisterSet.remove(reg);
1539            loopRegisterSet.add(subOptimalRegMap.get(reg));
1540            loopRegisterSet.add(optimalRegMap.get(reg));
1541            itr = loopRegisterSet.iterator();
1542          }
1543        }
1544      }
1545    }