001    /*
002     *  This file is part of the Jikes RVM project (http://jikesrvm.org).
003     *
004     *  This file is licensed to You under the Eclipse Public License (EPL);
005     *  You may not use this file except in compliance with the License. You
006     *  may obtain a copy of the License at
007     *
008     *      http://www.opensource.org/licenses/eclipse-1.0.php
009     *
010     *  See the COPYRIGHT.txt file distributed with this work for information
011     *  regarding copyright ownership.
012     */
013    package org.jikesrvm.compilers.opt.controlflow;
014    
015    import static org.jikesrvm.compilers.opt.ir.Operators.ARRAYLENGTH_opcode;
016    import static org.jikesrvm.compilers.opt.ir.Operators.GOTO;
017    import static org.jikesrvm.compilers.opt.ir.Operators.GOTO_opcode;
018    import static org.jikesrvm.compilers.opt.ir.Operators.INT_ADD;
019    import static org.jikesrvm.compilers.opt.ir.Operators.INT_ADD_opcode;
020    import static org.jikesrvm.compilers.opt.ir.Operators.INT_AND;
021    import static org.jikesrvm.compilers.opt.ir.Operators.INT_IFCMP;
022    import static org.jikesrvm.compilers.opt.ir.Operators.INT_IFCMP_opcode;
023    import static org.jikesrvm.compilers.opt.ir.Operators.INT_MOVE;
024    import static org.jikesrvm.compilers.opt.ir.Operators.INT_SUB;
025    
026    import java.lang.reflect.Constructor;
027    import java.util.Enumeration;
028    
029    import org.jikesrvm.VM;
030    import org.jikesrvm.compilers.opt.DefUse;
031    import org.jikesrvm.compilers.opt.OptOptions;
032    import org.jikesrvm.compilers.opt.Simple;
033    import org.jikesrvm.compilers.opt.driver.CompilerPhase;
034    import org.jikesrvm.compilers.opt.ir.BasicBlock;
035    import org.jikesrvm.compilers.opt.ir.Binary;
036    import org.jikesrvm.compilers.opt.ir.ExceptionHandlerBasicBlock;
037    import org.jikesrvm.compilers.opt.ir.Goto;
038    import org.jikesrvm.compilers.opt.ir.GuardResultCarrier;
039    import org.jikesrvm.compilers.opt.ir.GuardedUnary;
040    import org.jikesrvm.compilers.opt.ir.IR;
041    import org.jikesrvm.compilers.opt.ir.IfCmp;
042    import org.jikesrvm.compilers.opt.ir.Instruction;
043    import org.jikesrvm.compilers.opt.ir.Label;
044    import org.jikesrvm.compilers.opt.ir.Move;
045    import org.jikesrvm.compilers.opt.ir.Register;
046    import org.jikesrvm.compilers.opt.ir.operand.BranchProfileOperand;
047    import org.jikesrvm.compilers.opt.ir.operand.ConditionOperand;
048    import org.jikesrvm.compilers.opt.ir.operand.ConstantOperand;
049    import org.jikesrvm.compilers.opt.ir.operand.IntConstantOperand;
050    import org.jikesrvm.compilers.opt.ir.operand.Operand;
051    import org.jikesrvm.compilers.opt.ir.operand.RegisterOperand;
052    import org.jikesrvm.compilers.opt.util.GraphNode;
053    import org.jikesrvm.util.BitVector;
054    
055    public class LoopUnrolling extends CompilerPhase {
056    
057      static final boolean DEBUG = false;
058      static final int MAX_BLOCKS_FOR_NAIVE_UNROLLING = 20;
059    
060      /**
061       * Returns the name of the phase.
062       */
063      @Override
064      public String getName() {
065        return "Loop Unrolling";
066      }
067    
068      /**
069       * Constructor for this compiler phase
070       */
071      private static final Constructor<CompilerPhase> constructor =
072          getCompilerPhaseConstructor(LoopUnrolling.class);
073    
074      /**
075       * Get a constructor object for this compiler phase
076       * @return compiler phase constructor
077       */
078      @Override
079      public Constructor<CompilerPhase> getClassConstructor() {
080        return constructor;
081      }
082    
083      /**
084       * This phase is disabled by default.
085       * <p>
086       * It will run only on O3 but O2 is the default maximum optimization level.
087       */
088      @Override
089      public boolean shouldPerform(OptOptions options) {
090        return ((options.getOptLevel() >= 3) && (options.CONTROL_UNROLL_LOG >= 1) && (!options.SSA_LOOP_VERSIONING));
091      }
092    
093      @Override
094      public void perform(IR ir) {
095        unrollFactor = (1 << ir.options.CONTROL_UNROLL_LOG);
096    
097        if (ir.hasReachableExceptionHandlers()) return;
098        DefUse.computeDU(ir);
099        new Simple(1, true, true, true, false).perform(ir);
100        new BranchOptimizations(-1, true, true).perform(ir, true);
101    
102        //new CFGTransformations().perform(ir);
103        // Note: the following unfactors the CFG
104        new DominatorsPhase(true).perform(ir);
105        DefUse.computeDU(ir);
106    
107        ir.setInstructionScratchWord(0);
108    
109        unrollLoops(ir);
110    
111        CFGTransformations.splitCriticalEdges(ir);
112        ir.cfg.compactNodeNumbering();
113      }
114    
115      /**
116       * unroll the loops in the given IR.
117       */
118      void unrollLoops(IR ir) {
119        LSTGraph lstg = ir.HIRInfo.loopStructureTree;
120    
121        for (int i = 1; lstg != null && i <= 1; ++i) {
122          unrollLoopTree((LSTNode) lstg.firstNode(), ir, i);
123          (new BuildLST()).perform(ir);
124        }
125      }
126    
127      /**
128       * loop unrolling on a given loop structure sub tree
129       * @param t
130       * @param ir
131       */
132      int unrollLoopTree(LSTNode t, IR ir, int target) {
133        int height = 1;
134        Enumeration<GraphNode> e = t.outNodes();
135        if (!e.hasMoreElements()) {
136          if (t.loop != null) {
137            report("Leaf loop in " + ir.method + ": " + t.loop + "\n");
138            // check infrequency
139            if (t.header.getInfrequent()) {
140              report("no unrolling of infrequent loop\n");
141            } else {
142              boolean doNaiveUnrolling = height == target && unrollLeaf(t, ir);
143              if (doNaiveUnrolling) naiveUnroller(t, ir);
144            }
145          }
146        } else {
147          while (e.hasMoreElements()) {
148            LSTNode n = (LSTNode) e.nextElement();
149            int heightOfTree = unrollLoopTree(n, ir, target);
150            height = Math.max(height, heightOfTree) + 1;
151          }
152        }
153        return height;
154      }
155    
156      static final int MaxInstructions = 100;
157      private int unrollFactor = 1;
158    
159      boolean unrollLeaf(LSTNode t, IR ir) {
160        int instructionsInLoop = 0;
161        BasicBlock exitBlock = null, backEdgeBlock = null, succBlock = null, predBlock = null;
162        BitVector nloop = t.loop;
163        BasicBlock header = t.header;
164        Instruction tmp;
165    
166        if (ir.hasReachableExceptionHandlers()) {
167          report("0 IR may have exception handlers\n");
168          return false;
169        }
170    
171        // determine loop structure by looking at its blocks
172        Enumeration<BasicBlock> loopBlocks = ir.getBasicBlocks(nloop);
173        int blocks = 0;
174        while (loopBlocks.hasMoreElements()) {
175          BasicBlock b = loopBlocks.nextElement();
176          blocks++;
177    
178          // check for size
179          instructionsInLoop += b.getNumberOfRealInstructions();
180          if (instructionsInLoop > MaxInstructions) {
181            report("1 is too big\n");
182            return false;
183          }
184    
185          // look at the in edges. We want the header to be the only
186          // block with out of loop incoming edges.
187          Enumeration<BasicBlock> e = b.getIn();
188          if (b != header) {
189            while (e.hasMoreElements()) {
190              BasicBlock o = e.nextElement();
191              if (!CFGTransformations.inLoop(o, nloop)) {
192                report("2 interior pointers.\n");
193                return true;
194              }
195            }
196          } else {
197            // check the headers predecessors: there should be
198            // one out of loop input and one backedge.
199            // We can extend this for loops with several backedges,
200            // if they all have the same conditions.
201            int inEdges = 0;
202            while (e.hasMoreElements()) {
203              inEdges++;
204              BasicBlock o = e.nextElement();
205              if (!CFGTransformations.inLoop(o, nloop)) {
206                if (predBlock == null) {
207                  predBlock = o;
208                } else {
209                  report("3 multi entry header.\n");
210                  return true;
211                }
212              } else {
213                if (backEdgeBlock == null) {
214                  backEdgeBlock = o;
215                } else {
216                  report("4 multiple back edges.\n");
217                  return true;
218                }
219              }
220            }
221          }
222    
223          // look at the out edges to find loop exits
224          e = b.getOut();
225          while (e.hasMoreElements()) {
226            BasicBlock out = e.nextElement();
227            if (!CFGTransformations.inLoop(out, nloop)) {
228              if (exitBlock == null) {
229                exitBlock = b;
230              } else {
231                report("5 multiple exit blocks.\n");
232                return true;
233              }
234            }
235          }
236        }
237    
238        // exitBlock must equal backEdgeBlock
239        if (exitBlock == null) {
240          report("6 no exit block found...infinite loop?");
241          return true;
242        }
243        if (exitBlock != backEdgeBlock) {
244          report("7 exit block is not immediate predecessor of loop head");
245          return true;
246        }
247    
248        // exitBlock must exit (skip over pads in critical edges)
249        while (exitBlock.getNumberOfOut() == 1 && exitBlock.getNumberOfIn() == 1) {
250          exitBlock = exitBlock.getIn().nextElement();
251        }
252    
253        if (exitBlock == header && blocks > 1) {
254          report("6 while loop? (" + blocks + ")\n");
255          return true;
256        }
257    
258        // So far, so good. Examine the exit test.
259        Instruction origBranch = exitBlock.firstBranchInstruction();
260        if (origBranch != exitBlock.lastRealInstruction()) {
261          Instruction aGoto = origBranch.nextInstructionInCodeOrder();
262          if (aGoto.operator.opcode != GOTO_opcode) {
263            report("7 too complex exit\n");
264            return true;
265          }
266          succBlock = Label.getBlock(Goto.getTarget(aGoto).target).block;
267          if (VM.VerifyAssertions) {
268            VM._assert(aGoto == exitBlock.lastRealInstruction());
269          }
270        } else {
271          succBlock = exitBlock.getFallThroughBlock();
272        }
273    
274        if (origBranch.operator.opcode != INT_IFCMP_opcode) {
275          report("8 branch isn't int_ifcmp: " + origBranch.operator + ".\n");
276          return true;
277        }
278    
279        // examine operands:
280        Operand op1 = follow(IfCmp.getVal1(origBranch));
281        Operand op2 = follow(IfCmp.getVal2(origBranch));
282        ConditionOperand cond = (ConditionOperand) IfCmp.getCond(origBranch).copy();
283        RegisterOperand ifcmpGuard = IfCmp.getGuardResult(origBranch);
284        float backBranchProbability = IfCmp.getBranchProfile(origBranch).takenProbability;
285        if (!loopInvariant(op2, nloop, 4)) {
286          if (loopInvariant(op1, nloop, 4)) {
287            Operand op = op1;
288            op1 = op2;
289            op2 = op;
290            cond.flipOperands();
291          } else {
292            if (DEBUG) {
293              printDefs(op1, nloop, 4);
294              printDefs(op2, nloop, 4);
295              VM.sysWrite("" + origBranch + "\n");
296            }
297            report("8a op1 and op2 may not be loop invariant\n");
298            return true;
299          }
300        }
301        BasicBlock target = Label.getBlock(IfCmp.getTarget(origBranch).target).block;
302    
303        if (!(op1 instanceof RegisterOperand)) {
304          report("9 op1 of ifcmp isn't a register\n");
305          return true;
306        }
307    
308        RegisterOperand rop1 = (RegisterOperand) op1;
309    
310        Register reg = rop1.getRegister();
311        if (reg.isPhysical()) {
312          report("10 loops over physical register\n");
313          return false;
314        }
315        if (succBlock == header && !CFGTransformations.inLoop(target, nloop)) {
316          succBlock = target;
317          target = header;
318          cond.flipCode();
319        }
320        if (target != header) {
321          report("11 ifcmp doesn't jump to header\n");
322          return true;
323        }
324    
325        Instruction iterator = null;
326        Enumeration<Operand> defs = new RealDefs(rop1);
327        while (defs.hasMoreElements()) {
328          Operand def = defs.nextElement();
329          Instruction inst = def.instruction;
330          BasicBlock block = inst.getBasicBlock();
331          //VM.sysWrite (""+block+": "+inst+"\n");
332          if (CFGTransformations.inLoop(block, nloop)) {
333            if (iterator == null) {
334              iterator = inst;
335            } else {
336              report("12 iterator not unique.\n");
337              return true;
338            }
339          }
340        }
341    
342        if (iterator == null) {
343          report("15 iterator not found.\n");
344          return true;
345        }
346    
347        if (iterator.operator.opcode != INT_ADD_opcode) {
348          //dumpIR (ir, "malformed");
349          report("16 iterator is no addition: " + iterator.operator + "\n");
350          return true;
351        }
352    
353        if (!rop1.similar(follow(Binary.getVal1(iterator)))) {
354          //dumpIR (ir, "malformed");
355          report("17 malformed iterator.\n" + iterator + "\n");
356          return true;
357        }
358    
359        Operand strideOp = follow(Binary.getVal2(iterator));
360        if (!(strideOp instanceof IntConstantOperand)) {
361          report("18 stride not constant\n");
362          return true;
363        }
364    
365        int stride = ((IntConstantOperand) strideOp).value;
366        if (stride != 1 && stride != -1) {
367          report("18b stride != +/-1 (" + stride + ")\n");
368          return true;
369        }
370    
371        if ((stride == 1 &&
372             ((cond.value != ConditionOperand.LESS) &&
373              cond.value != ConditionOperand.LESS_EQUAL &&
374              cond.value != ConditionOperand.NOT_EQUAL)) ||
375                                                             (stride == -1 &&
376                                                              ((cond.value != ConditionOperand.GREATER) &&
377                                                               cond.value != ConditionOperand.GREATER_EQUAL &&
378                                                               cond.value != ConditionOperand.NOT_EQUAL))) {
379          report("19 unexpected condition: " + cond + "\n" + iterator + "\n" + origBranch + "\n\n");
380          return true;
381        }
382    
383        RegisterOperand outerGuard;
384        BasicBlock outer = predBlock;
385        while (outer.getNumberOfOut() == 1 && outer.getNumberOfIn() == 1) {
386          outer = outer.getIn().nextElement();
387        }
388        if (outer.getNumberOfIn() > 0 && outer.getNumberOfOut() < 2) {
389          report("23 no suitable outer guard found.\n");
390          return true;
391        }
392    
393        tmp = outer.firstBranchInstruction();
394        if (tmp != null && GuardResultCarrier.conforms(tmp)) {
395          outerGuard = GuardResultCarrier.getGuardResult(tmp);
396        } else {
397          outerGuard = ir.regpool.makeTempValidation();
398        }
399    
400        ////////////
401        // transfom
402    
403        // transform this:
404        //
405        // Orig:
406        //  B
407        //  if i CC b goto Orig
408        //  else goto exit
409        //
410        // exit:
411        //
412        // into this:
413        //
414    //
415    //  stride == 1:           common:                      stride == -1:
416    //--------------------------------------------------------------------------
417    //                          guard0:
418    //                           limit = b;
419    //   if a > b goto Orig                                  if b > a goto Orig
420    //                           else guard1
421    //
422    //
423    //                          guard 1:
424    //   remainder = b - a;                                  remainder = a - b;
425    // if cond == '<='                                    if cond == '>='
426    //   remainder++;                                         remainder++;
427    //                           remainder = remainder & 3
428    //   limit = a + remainder                               limit = a - remainder
429    // if cond == '<='                                    if cond == '>='
430    //   limit--;                                            limit++;
431    //                           if remainder == 0 goto mllp
432    //                           goto Orig
433    //
434    //                          Orig:
435    //                           LOOP;
436    //                           if i CC limit goto Orig
437    //                           else guard2
438    //
439    //                          guard2: if i CC b goto mllp
440    //                           else exit
441    //
442    //                           mllp: // landing pad
443    //                           goto ml
444    //
445    //                          ml:
446    //                           LOOP;LOOP;LOOP;LOOP;
447    //                           if i CC b goto ml
448    //                           else exit
449    //
450    //                          exit:
451    //--------------------------------------------------------------------------
452        report("...transforming.\n");
453        if (DEBUG && ir.options.hasMETHOD_TO_PRINT() && ir.options.fuzzyMatchMETHOD_TO_PRINT(ir.method.toString())) {
454          dumpIR(ir, "before unroll");
455        }
456    
457        CFGTransformations.killFallThroughs(ir, nloop);
458        BasicBlock[] handles = makeSomeCopies(unrollFactor, ir, nloop, blocks, header, exitBlock, exitBlock);
459        BasicBlock mainHeader = handles[0];
460        BasicBlock mainExit = handles[1];
461    
462        // test block for well formed bounds
463        BasicBlock guardBlock0 = header.createSubBlock(header.firstInstruction().bcIndex, ir);
464        predBlock.redirectOuts(header, guardBlock0, ir);
465    
466        // test block for iteration alignemnt
467        BasicBlock guardBlock1 = header.createSubBlock(header.firstInstruction().bcIndex, ir);
468    
469        // landing pad for orig loop
470        BasicBlock olp = header.createSubBlock(header.firstInstruction().bcIndex, ir);
471        olp.setLandingPad();
472    
473        BasicBlock predSucc = predBlock.nextBasicBlockInCodeOrder();
474        if (predSucc != null) {
475          ir.cfg.breakCodeOrder(predBlock, predSucc);
476          ir.cfg.linkInCodeOrder(olp, predSucc);
477        }
478        ir.cfg.linkInCodeOrder(predBlock, guardBlock0);
479        ir.cfg.linkInCodeOrder(guardBlock0, guardBlock1);
480        ir.cfg.linkInCodeOrder(guardBlock1, olp);
481    
482        // guard block for main loop
483        BasicBlock guardBlock2 = header.createSubBlock(header.firstInstruction().bcIndex, ir);
484    
485        // landing pad for main loop
486        BasicBlock landingPad = header.createSubBlock(header.firstInstruction().bcIndex, ir);
487        landingPad.setLandingPad();
488    
489        BasicBlock mainLoop = exitBlock.nextBasicBlockInCodeOrder();
490        ir.cfg.breakCodeOrder(exitBlock, mainLoop);
491        ir.cfg.linkInCodeOrder(exitBlock, guardBlock2);
492        ir.cfg.linkInCodeOrder(guardBlock2, landingPad);
493        ir.cfg.linkInCodeOrder(landingPad, mainLoop);
494    
495        RegisterOperand remainder = ir.regpool.makeTemp(rop1.getType());
496        RegisterOperand limit = ir.regpool.makeTemp(rop1.getType());
497    
498        // test whether a <= b for stride == 1 and a >= b for stride == -1
499        tmp = guardBlock0.lastInstruction();
500        tmp.insertBefore(Move.create(INT_MOVE, limit, op2.copy()));
501    
502        ConditionOperand g0cond = ConditionOperand.GREATER_EQUAL();
503        if (stride == -1) g0cond = ConditionOperand.LESS_EQUAL();
504    
505        tmp.insertBefore(IfCmp.create(INT_IFCMP,
506                                      outerGuard.copyD2D(),
507                                      rop1.copyD2U(),
508                                      op2.copy(),
509                                      g0cond,
510                                      olp.makeJumpTarget(),
511                                      BranchProfileOperand.unlikely()));
512        tmp.insertBefore(Goto.create(GOTO, guardBlock1.makeJumpTarget()));
513    
514        // align the loop iterations
515        tmp = guardBlock1.lastInstruction();
516        if (stride == 1) {
517          tmp.insertBefore(Binary.create(INT_SUB, remainder, op2.copy(), rop1.copyD2U()));
518        } else {
519          tmp.insertBefore(Binary.create(INT_SUB, remainder, rop1.copyD2U(), op2.copy()));
520        }
521    
522        if (cond.isGREATER_EQUAL() || cond.isLESS_EQUAL()) {
523          tmp.insertBefore(Binary.create(INT_ADD, remainder.copyD2D(), remainder.copyD2U(), new IntConstantOperand(1)));
524        }
525    
526        tmp.insertBefore(Binary.create(INT_ADD, remainder.copyD2D(), remainder.copyD2U(), new IntConstantOperand(-1)));
527    
528        tmp.insertBefore(Binary.create(INT_AND,
529                                       remainder.copyD2D(),
530                                       remainder.copyD2U(),
531                                       new IntConstantOperand(unrollFactor - 1)));
532    
533        tmp.insertBefore(Binary.create(INT_ADD, remainder.copyD2D(), remainder.copyD2U(), new IntConstantOperand(1)));
534    
535        if (stride == 1) {
536          tmp.insertBefore(Binary.create(INT_ADD, limit.copyD2U(), op1.copy(), remainder.copyD2U()));
537        } else {
538          tmp.insertBefore(Binary.create(INT_SUB, limit.copyD2U(), op1.copy(), remainder.copyD2U()));
539        }
540    
541        if (cond.isLESS_EQUAL()) {
542          tmp.insertBefore(Binary.create(INT_ADD, limit.copyD2D(), limit.copyD2U(), new IntConstantOperand(-1)));
543        }
544    
545        if (cond.isGREATER_EQUAL()) {
546          tmp.insertBefore(Binary.create(INT_ADD, limit.copyD2D(), limit.copyD2U(), new IntConstantOperand(1)));
547        }
548    
549        tmp.insertBefore(Goto.create(GOTO, olp.makeJumpTarget()));
550    
551        // build landing pad for original loop
552        tmp = olp.lastInstruction();
553        tmp.insertBefore(Goto.create(GOTO, header.makeJumpTarget()));
554    
555        // change the back branch in the original loop
556        deleteBranches(exitBlock);
557        tmp = exitBlock.lastInstruction();
558        tmp.insertBefore(IfCmp.create(INT_IFCMP,
559                                      outerGuard.copyD2D(),
560                                      rop1.copyU2U(),
561                                      limit.copyD2U(),
562                                      (ConditionOperand) cond.copy(),
563                                      header.makeJumpTarget(),
564                                      new BranchProfileOperand(1.0f - 1.0f / (unrollFactor / 2))));
565        tmp.insertBefore(Goto.create(GOTO, guardBlock2.makeJumpTarget()));
566    
567        // only enter main loop if iterations left
568        tmp = guardBlock2.lastInstruction();
569        tmp.insertBefore(IfCmp.create(INT_IFCMP,
570                                      outerGuard.copyD2D(),
571                                      rop1.copyU2U(),
572                                      op2.copy(),
573                                      (ConditionOperand) cond.copy(),
574                                      landingPad.makeJumpTarget(),
575                                      new BranchProfileOperand(backBranchProbability)));
576        tmp.insertBefore(Goto.create(GOTO, succBlock.makeJumpTarget()));
577    
578        // landing pad jumps to mainHeader
579        tmp = landingPad.lastInstruction();
580        tmp.insertBefore(Goto.create(GOTO, mainHeader.makeJumpTarget()));
581    
582        // repair back edge in mainExit
583        if (VM.VerifyAssertions) VM._assert(mainExit != null);
584        tmp = mainExit.lastInstruction();
585        if (VM.VerifyAssertions) {
586          VM._assert((mainExit.lastRealInstruction() == null) || !mainExit.lastRealInstruction().isBranch());
587        }
588        tmp.insertBefore(IfCmp.create(INT_IFCMP,
589                                      ifcmpGuard.copyU2U(),
590                                      rop1.copyU2U(),
591                                      op2.copy(),
592                                      (ConditionOperand) cond.copy(),
593                                      mainHeader.makeJumpTarget(),
594                                      new BranchProfileOperand(1.0f - (1.0f - backBranchProbability) * unrollFactor)));
595        tmp.insertBefore(Goto.create(GOTO, succBlock.makeJumpTarget()));
596    
597        // recompute normal outs
598        guardBlock0.recomputeNormalOut(ir);
599        guardBlock1.recomputeNormalOut(ir);
600        olp.recomputeNormalOut(ir);
601        guardBlock2.recomputeNormalOut(ir);
602        exitBlock.recomputeNormalOut(ir);
603        landingPad.recomputeNormalOut(ir);
604        mainExit.recomputeNormalOut(ir);
605        if (DEBUG && ir.options.hasMETHOD_TO_PRINT() && ir.options.fuzzyMatchMETHOD_TO_PRINT(ir.method.toString())) {
606          dumpIR(ir, "after unroll");
607        }
608        return false;
609      }
610    
611      private void naiveUnroller(LSTNode t, IR ir) {
612        BitVector nloop = t.loop;
613        BasicBlock seqStart = null;
614        Enumeration<BasicBlock> bs;
615    
616        if (t.loop.populationCount() > MAX_BLOCKS_FOR_NAIVE_UNROLLING) {
617          report("1 is too big\n");
618          return;
619        }
620        report("Naively unrolling\n");
621    
622        CFGTransformations.killFallThroughs(ir, nloop);
623    
624        // first, capture the blocks in the loop body.
625        int bodyBlocks = nloop.populationCount();
626        BasicBlock[] body = new BasicBlock[bodyBlocks];
627        {
628          int i = 0;
629          bs = ir.getBasicBlocks(nloop);
630          while (bs.hasMoreElements()) {
631            BasicBlock b = bs.nextElement();
632            if (VM.VerifyAssertions) {
633              VM._assert(!(b instanceof ExceptionHandlerBasicBlock));
634            }
635            body[i++] = b;
636            BasicBlock next = b.nextBasicBlockInCodeOrder();
637            if (next == null || !CFGTransformations.inLoop(next, nloop)) {
638              seqStart = b; // end of loop in code order
639            }
640          }
641        }
642    
643        BasicBlock seqEnd = seqStart.nextBasicBlockInCodeOrder();
644        if (seqEnd != null) ir.cfg.breakCodeOrder(seqStart, seqEnd);
645        BasicBlock seqLast = seqStart;
646        BasicBlock firstHeaderCopy = null;
647        BasicBlock currentBlock = seqLast;
648    
649        for (int i = 1; i <= unrollFactor; ++i) {
650    
651          // copy body
652          for (BasicBlock bb : body) {
653            seqLast = copyAndLinkBlock(ir, seqLast, bb);
654            if (bb == t.header) {
655              if (firstHeaderCopy == null) {
656                firstHeaderCopy = seqLast;
657              }
658            }
659          }
660    
661          // redirect internal branches
662          currentBlock = seqLast;
663          for (int j = 0; j < bodyBlocks; ++j) {
664            currentBlock.recomputeNormalOut(ir);
665            Enumeration<BasicBlock> be = currentBlock.getOut();
666            while (be.hasMoreElements()) {
667              BasicBlock out = be.nextElement();
668              if (out != t.header && CFGTransformations.inLoop(out, nloop)) {
669                BasicBlock outCopy = (BasicBlock) out.scratchObject;
670                currentBlock.redirectOuts(out, outCopy, ir);
671              }
672            }
673            currentBlock.recomputeNormalOut(ir);
674            currentBlock = currentBlock.prevBasicBlockInCodeOrder();
675          }
676    
677          if (i != 1) {
678            // redirect the branches to the header in the (i-1)th copy
679            for (int j = 0; j < bodyBlocks; ++j) {
680              Enumeration<BasicBlock> be = currentBlock.getOut();
681              while (be.hasMoreElements()) {
682                BasicBlock out = be.nextElement();
683                if (out == t.header) {
684                  BasicBlock headerCopy;
685                  headerCopy = (BasicBlock) t.header.scratchObject;
686                  currentBlock.redirectOuts(t.header, headerCopy, ir);
687                }
688              }
689              currentBlock.recomputeNormalOut(ir);
690              currentBlock = currentBlock.prevBasicBlockInCodeOrder();
691            }
692          }
693        }
694        if (seqEnd != null) ir.cfg.linkInCodeOrder(seqLast, seqEnd);
695    
696        // in the original loop, redirect branches that go to the header
697        // and make them point to the first header copy
698    
699        for (int j = 0; j < bodyBlocks; ++j) {
700          Enumeration<BasicBlock> be = body[j].getOut();
701          while (be.hasMoreElements()) {
702            BasicBlock out = be.nextElement();
703            if (out == t.header) {
704              body[j].redirectOuts(t.header, firstHeaderCopy, ir);
705            }
706          }
707          body[j].recomputeNormalOut(ir);
708        }
709    
710        // the following loop redirects backedges that start in the last
711        // copy to point to the first copy instead and not to the original
712        // header.
713        //                |                    |
714        // Thus we get   [ ]     instead of   [ ]<-.
715        //                |                    |   |
716        //               [ ]<-.               [ ]  |
717        //                |   |                |   |
718        //               [ ]  |               [ ]  |
719        //                |   |                |   |
720        //               [ ]  |               [ ]  |
721        //                |\_/                 |\_/
722        //
723        // Instead of 2^(unroll_log) we only have 2^(unroll_log-1) bodies
724        // in the unrolled loop, but there is one copy of the loop's body
725        // that dominates the unrolled version. Peeling of this first
726        // version should have benefits for global code placement.
727        currentBlock = seqLast;
728        for (int j = 0; j < bodyBlocks; ++j) {
729          Enumeration<BasicBlock> be = currentBlock.getOut();
730          while (be.hasMoreElements()) {
731            BasicBlock out = be.nextElement();
732            if (out == t.header) {
733              currentBlock.redirectOuts(t.header, firstHeaderCopy, ir);
734            }
735          }
736          currentBlock.recomputeNormalOut(ir);
737          currentBlock = currentBlock.prevBasicBlockInCodeOrder();
738        }
739      }
740    
741      static void report(String s) {
742        if (DEBUG) VM.sysWrite("] " + s);
743      }
744    
745      private static int theVisit = 1;
746    
747      private static Operand follow(Operand use) {
748        theVisit++;
749        return _follow(use);
750      }
751    
752      private static Operand _follow(Operand use) {
753        while (true) {
754          if (!(use instanceof RegisterOperand)) return use;
755          RegisterOperand rop = (RegisterOperand) use;
756          Enumeration<RegisterOperand> defs = DefUse.defs(rop.getRegister());
757          if (!defs.hasMoreElements()) {return use;}
758          Instruction def = defs.nextElement().instruction;
759          if (!Move.conforms(def)) return use;
760          if (defs.hasMoreElements()) {return use;}
761    
762          if (def.scratch == theVisit) return use;
763          def.scratch = theVisit;
764    
765          use = Move.getVal(def);
766        }
767      }
768    
769      private static Instruction definingInstruction(Operand op) {
770        if (!(op instanceof RegisterOperand)) return op.instruction;
771        Enumeration<RegisterOperand> defs = DefUse.defs(((RegisterOperand) op).getRegister());
772        if (!defs.hasMoreElements()) {return op.instruction;}
773        Instruction def = defs.nextElement().instruction;
774        if (defs.hasMoreElements()) {return op.instruction;}
775        return def;
776      }
777    
778      private static boolean loopInvariant(Operand op, BitVector nloop, int depth) {
779        if (depth <= 0) {
780          return false;
781        } else if (op instanceof ConstantOperand) {
782          return true;
783        } else if (op instanceof RegisterOperand) {
784          Register reg = ((RegisterOperand) op).getRegister();
785          Enumeration<RegisterOperand> defs = DefUse.defs(reg);
786          // if no definitions of this register (very strange) give up
787          if (!defs.hasMoreElements()) return false;
788          Instruction inst = defs.nextElement().instruction;
789          // if multiple definitions of a register give up as follow may
790          // fail to give the correct invariant
791          return !defs.hasMoreElements() && !CFGTransformations.inLoop(inst.getBasicBlock(), nloop);
792        } else {
793          return false;
794        }
795      }
796    
797      private static boolean printDefs(Operand op, BitVector nloop, int depth) {
798        if (depth <= 0) return false;
799        if (op instanceof ConstantOperand) {
800          VM.sysWrite(">> " + op + "\n");
801          return true;
802        }
803        if (op instanceof RegisterOperand) {
804          boolean invariant = true;
805          Register reg = ((RegisterOperand) op).getRegister();
806          Enumeration<RegisterOperand> defs = DefUse.defs(reg);
807          while (defs.hasMoreElements()) {
808            Instruction inst = defs.nextElement().instruction;
809            VM.sysWrite(">> " + inst.getBasicBlock() + ": " + inst + "\n");
810            if (CFGTransformations.inLoop(inst.getBasicBlock(), nloop)) {
811              if (Move.conforms(inst)) {
812                invariant &= printDefs(Move.getVal(inst), nloop, depth - 1);
813              } else if (inst.operator.opcode == ARRAYLENGTH_opcode) {
814                invariant &= printDefs(GuardedUnary.getVal(inst), nloop, depth);
815              } else {
816                invariant = false;
817              }
818            }
819            if (!invariant) break;
820          }
821          return invariant;
822        }
823        return false;
824      }
825    
826      @SuppressWarnings("unused")
827      // For debugging
828      private static void _printDefs(Operand op) {
829        if (op instanceof RegisterOperand) {
830          Register reg = ((RegisterOperand) op).getRegister();
831          Enumeration<RegisterOperand> defs = DefUse.defs(reg);
832          defs = DefUse.defs(reg);
833          while (defs.hasMoreElements()) {
834            Instruction inst = defs.nextElement().instruction;
835            if (Move.conforms(inst)) {
836              inst = definingInstruction(follow(Move.getVal(inst)));
837            }
838            VM.sysWrite(">> " + inst.getBasicBlock() + ": " + inst + "\n");
839          }
840        } else {
841          VM.sysWrite(">> " + op + "\n");
842        }
843      }
844    
845      static void linkToLST(IR ir) {
846        Enumeration<BasicBlock> e = ir.getBasicBlocks();
847        while (e.hasMoreElements()) {
848          e.nextElement().scratchObject = null;
849          e.nextElement().scratch = 0;
850        }
851        LSTGraph lstg = ir.HIRInfo.loopStructureTree;
852        if (lstg != null) markHeaders((LSTNode) lstg.firstNode());
853      }
854    
855      // for all loops:
856      // make the header block point to the corresponding loop structure tree node.
857      private static void markHeaders(LSTNode t) {
858        BasicBlock header = t.header;
859        header.scratchObject = t;
860        Enumeration<GraphNode> e = t.outNodes();
861        while (e.hasMoreElements()) {
862          LSTNode n = (LSTNode) e.nextElement();
863          markHeaders(n);
864        }
865      }
866    
867      // inserts unrollFactor copies of the loop after seqStart
868      static BasicBlock[] makeSomeCopies(int unrollFactor, IR ir, BitVector nloop, int blocks,
869                                             BasicBlock header, BasicBlock exitBlock, BasicBlock seqStart) {
870        // make some copies of the original loop
871    
872        // first, capture the blocks in the loop body.
873        BitVector loop = new BitVector(nloop);
874        loop.clear(header.getNumber());
875        loop.clear(exitBlock.getNumber());
876        int bodyBlocks = 0;
877        Enumeration<BasicBlock> bs = ir.getBasicBlocks(loop);
878        while (bs.hasMoreElements()) {
879          bodyBlocks++;
880          bs.nextElement();
881        }
882        BasicBlock[] body = new BasicBlock[bodyBlocks];
883        {
884          int i = 0;
885          bs = ir.getBasicBlocks(loop);
886          while (bs.hasMoreElements()) {
887            body[i++] = bs.nextElement();
888          }
889        }
890    
891        BasicBlock seqEnd = seqStart.nextBasicBlockInCodeOrder();
892        if (seqEnd != null) ir.cfg.breakCodeOrder(seqStart, seqEnd);
893        BasicBlock seqLast = seqStart;
894    
895        BasicBlock firstHeader = null;
896        BasicBlock lastHeader = null;
897        BasicBlock lastExit = null;
898        BasicBlock[] handles = new BasicBlock[2];
899    
900        for (int i = 0; i < unrollFactor; ++i) {
901    
902          // copy header
903          seqLast = copyAndLinkBlock(ir, seqLast, header);
904          lastHeader = seqLast;
905    
906          if (i == 0) {
907            firstHeader = seqLast;
908          } else {
909            // link copies by jumps
910            lastExit.appendInstruction(Goto.create(GOTO, seqLast.makeJumpTarget()));
911            lastExit.recomputeNormalOut(ir);
912          }
913    
914          // copy body
915          for (BasicBlock bb : body) {
916            seqLast = copyAndLinkBlock(ir, seqLast, bb);
917          }
918    
919          // copy exit block
920          if (exitBlock != header) {
921            seqLast = copyAndLinkBlock(ir, seqLast, exitBlock);
922            lastExit = seqLast;
923          } else {
924            lastExit = lastHeader;
925          }
926    
927          // delete all branches in the copies of the exit block
928          deleteBranches(lastExit);
929    
930          // redirect internal branches
931          BasicBlock cb = seqLast;
932          for (int j = 0; j < blocks; ++j) {
933            cb.recomputeNormalOut(ir);
934            Enumeration<BasicBlock> be = cb.getOut();
935            while (be.hasMoreElements()) {
936              BasicBlock out = be.nextElement();
937              if (CFGTransformations.inLoop(out, nloop)) {
938                cb.redirectOuts(out, (BasicBlock) out.scratchObject, ir);
939              }
940            }
941            cb.recomputeNormalOut(ir);
942            cb = cb.prevBasicBlockInCodeOrder();
943          }
944        }
945        if (seqEnd != null) ir.cfg.linkInCodeOrder(seqLast, seqEnd);
946        handles[0] = firstHeader;
947        handles[1] = lastExit;
948        return handles;
949      }
950    
951      static BasicBlock copyAndLinkBlock(IR ir, BasicBlock seqLast, BasicBlock block) {
952        BasicBlock copy = block.copyWithoutLinks(ir);
953        ir.cfg.linkInCodeOrder(seqLast, copy);
954        block.scratchObject = copy;
955        return copy;
956      }
957    
958      static void deleteBranches(BasicBlock b) {
959        Instruction branch = b.lastRealInstruction();
960        while (branch.isBranch()) {
961          Instruction nextBranch = branch.prevInstructionInCodeOrder();
962          branch.remove();
963          branch = nextBranch;
964        }
965      }
966    
967      static final class RealDefs implements Enumeration<Operand> {
968        private Enumeration<RegisterOperand> defs = null;
969        private Operand use;
970        private RealDefs others = null;
971    
972        private void init(Operand use) {
973          this.use = use;
974          if (use instanceof RegisterOperand) {
975            RegisterOperand rop = (RegisterOperand) use;
976            defs = DefUse.defs(rop.getRegister());
977            this.use = null;
978            if (!defs.hasMoreElements()) defs = null;
979          }
980        }
981    
982        public RealDefs(Operand use) {
983          this.init(use);
984          theVisit++;
985        }
986    
987        public RealDefs(Operand use, int visit) {
988          this.init(use);
989          theVisit = visit;
990        }
991    
992        @Override
993        public boolean hasMoreElements() {
994          return use != null || (others != null && others.hasMoreElements()) || (defs != null && defs.hasMoreElements());
995        }
996    
997        @Override
998        public Operand nextElement() {
999          Operand res = use;
1000          if (res != null) {
1001            use = null;
1002            return res;
1003          }
1004          if (others != null && others.hasMoreElements()) {
1005            return others.nextElement();
1006          }
1007    
1008          res = defs.nextElement();
1009          Instruction inst = res.instruction;
1010          if (!(Move.conforms(inst)) || inst.scratch == theVisit) {
1011            return res;
1012          }
1013          inst.scratch = theVisit;
1014    
1015          others = new RealDefs(Move.getVal(inst), theVisit);
1016          if (!(others.hasMoreElements())) return res;
1017          return others.nextElement();
1018        }
1019    
1020        public Operand nextClear() {
1021          Operand res = nextElement();
1022          res.instruction = null;
1023          return res;
1024        }
1025      }
1026    }