001 /* 002 * This file is part of the Jikes RVM project (http://jikesrvm.org). 003 * 004 * This file is licensed to You under the Eclipse Public License (EPL); 005 * You may not use this file except in compliance with the License. You 006 * may obtain a copy of the License at 007 * 008 * http://www.opensource.org/licenses/eclipse-1.0.php 009 * 010 * See the COPYRIGHT.txt file distributed with this work for information 011 * regarding copyright ownership. 012 */ 013 package org.jikesrvm.compilers.opt.controlflow; 014 015 import static org.jikesrvm.compilers.opt.ir.Operators.ARRAYLENGTH_opcode; 016 import static org.jikesrvm.compilers.opt.ir.Operators.GOTO; 017 import static org.jikesrvm.compilers.opt.ir.Operators.GOTO_opcode; 018 import static org.jikesrvm.compilers.opt.ir.Operators.INT_ADD; 019 import static org.jikesrvm.compilers.opt.ir.Operators.INT_ADD_opcode; 020 import static org.jikesrvm.compilers.opt.ir.Operators.INT_AND; 021 import static org.jikesrvm.compilers.opt.ir.Operators.INT_IFCMP; 022 import static org.jikesrvm.compilers.opt.ir.Operators.INT_IFCMP_opcode; 023 import static org.jikesrvm.compilers.opt.ir.Operators.INT_MOVE; 024 import static org.jikesrvm.compilers.opt.ir.Operators.INT_SUB; 025 026 import java.lang.reflect.Constructor; 027 import java.util.Enumeration; 028 029 import org.jikesrvm.VM; 030 import org.jikesrvm.compilers.opt.DefUse; 031 import org.jikesrvm.compilers.opt.OptOptions; 032 import org.jikesrvm.compilers.opt.Simple; 033 import org.jikesrvm.compilers.opt.driver.CompilerPhase; 034 import org.jikesrvm.compilers.opt.ir.BasicBlock; 035 import org.jikesrvm.compilers.opt.ir.Binary; 036 import org.jikesrvm.compilers.opt.ir.ExceptionHandlerBasicBlock; 037 import org.jikesrvm.compilers.opt.ir.Goto; 038 import org.jikesrvm.compilers.opt.ir.GuardResultCarrier; 039 import org.jikesrvm.compilers.opt.ir.GuardedUnary; 040 import org.jikesrvm.compilers.opt.ir.IR; 041 import org.jikesrvm.compilers.opt.ir.IfCmp; 042 import org.jikesrvm.compilers.opt.ir.Instruction; 043 import org.jikesrvm.compilers.opt.ir.Label; 044 import org.jikesrvm.compilers.opt.ir.Move; 045 import org.jikesrvm.compilers.opt.ir.Register; 046 import org.jikesrvm.compilers.opt.ir.operand.BranchProfileOperand; 047 import org.jikesrvm.compilers.opt.ir.operand.ConditionOperand; 048 import org.jikesrvm.compilers.opt.ir.operand.ConstantOperand; 049 import org.jikesrvm.compilers.opt.ir.operand.IntConstantOperand; 050 import org.jikesrvm.compilers.opt.ir.operand.Operand; 051 import org.jikesrvm.compilers.opt.ir.operand.RegisterOperand; 052 import org.jikesrvm.compilers.opt.util.GraphNode; 053 import org.jikesrvm.util.BitVector; 054 055 public class LoopUnrolling extends CompilerPhase { 056 057 static final boolean DEBUG = false; 058 static final int MAX_BLOCKS_FOR_NAIVE_UNROLLING = 20; 059 060 /** 061 * Returns the name of the phase. 062 */ 063 @Override 064 public String getName() { 065 return "Loop Unrolling"; 066 } 067 068 /** 069 * Constructor for this compiler phase 070 */ 071 private static final Constructor<CompilerPhase> constructor = 072 getCompilerPhaseConstructor(LoopUnrolling.class); 073 074 /** 075 * Get a constructor object for this compiler phase 076 * @return compiler phase constructor 077 */ 078 @Override 079 public Constructor<CompilerPhase> getClassConstructor() { 080 return constructor; 081 } 082 083 /** 084 * This phase is disabled by default. 085 * <p> 086 * It will run only on O3 but O2 is the default maximum optimization level. 087 */ 088 @Override 089 public boolean shouldPerform(OptOptions options) { 090 return ((options.getOptLevel() >= 3) && (options.CONTROL_UNROLL_LOG >= 1) && (!options.SSA_LOOP_VERSIONING)); 091 } 092 093 @Override 094 public void perform(IR ir) { 095 unrollFactor = (1 << ir.options.CONTROL_UNROLL_LOG); 096 097 if (ir.hasReachableExceptionHandlers()) return; 098 DefUse.computeDU(ir); 099 new Simple(1, true, true, true, false).perform(ir); 100 new BranchOptimizations(-1, true, true).perform(ir, true); 101 102 //new CFGTransformations().perform(ir); 103 // Note: the following unfactors the CFG 104 new DominatorsPhase(true).perform(ir); 105 DefUse.computeDU(ir); 106 107 ir.setInstructionScratchWord(0); 108 109 unrollLoops(ir); 110 111 CFGTransformations.splitCriticalEdges(ir); 112 ir.cfg.compactNodeNumbering(); 113 } 114 115 /** 116 * unroll the loops in the given IR. 117 */ 118 void unrollLoops(IR ir) { 119 LSTGraph lstg = ir.HIRInfo.loopStructureTree; 120 121 for (int i = 1; lstg != null && i <= 1; ++i) { 122 unrollLoopTree((LSTNode) lstg.firstNode(), ir, i); 123 (new BuildLST()).perform(ir); 124 } 125 } 126 127 /** 128 * loop unrolling on a given loop structure sub tree 129 * @param t 130 * @param ir 131 */ 132 int unrollLoopTree(LSTNode t, IR ir, int target) { 133 int height = 1; 134 Enumeration<GraphNode> e = t.outNodes(); 135 if (!e.hasMoreElements()) { 136 if (t.loop != null) { 137 report("Leaf loop in " + ir.method + ": " + t.loop + "\n"); 138 // check infrequency 139 if (t.header.getInfrequent()) { 140 report("no unrolling of infrequent loop\n"); 141 } else { 142 boolean doNaiveUnrolling = height == target && unrollLeaf(t, ir); 143 if (doNaiveUnrolling) naiveUnroller(t, ir); 144 } 145 } 146 } else { 147 while (e.hasMoreElements()) { 148 LSTNode n = (LSTNode) e.nextElement(); 149 int heightOfTree = unrollLoopTree(n, ir, target); 150 height = Math.max(height, heightOfTree) + 1; 151 } 152 } 153 return height; 154 } 155 156 static final int MaxInstructions = 100; 157 private int unrollFactor = 1; 158 159 boolean unrollLeaf(LSTNode t, IR ir) { 160 int instructionsInLoop = 0; 161 BasicBlock exitBlock = null, backEdgeBlock = null, succBlock = null, predBlock = null; 162 BitVector nloop = t.loop; 163 BasicBlock header = t.header; 164 Instruction tmp; 165 166 if (ir.hasReachableExceptionHandlers()) { 167 report("0 IR may have exception handlers\n"); 168 return false; 169 } 170 171 // determine loop structure by looking at its blocks 172 Enumeration<BasicBlock> loopBlocks = ir.getBasicBlocks(nloop); 173 int blocks = 0; 174 while (loopBlocks.hasMoreElements()) { 175 BasicBlock b = loopBlocks.nextElement(); 176 blocks++; 177 178 // check for size 179 instructionsInLoop += b.getNumberOfRealInstructions(); 180 if (instructionsInLoop > MaxInstructions) { 181 report("1 is too big\n"); 182 return false; 183 } 184 185 // look at the in edges. We want the header to be the only 186 // block with out of loop incoming edges. 187 Enumeration<BasicBlock> e = b.getIn(); 188 if (b != header) { 189 while (e.hasMoreElements()) { 190 BasicBlock o = e.nextElement(); 191 if (!CFGTransformations.inLoop(o, nloop)) { 192 report("2 interior pointers.\n"); 193 return true; 194 } 195 } 196 } else { 197 // check the headers predecessors: there should be 198 // one out of loop input and one backedge. 199 // We can extend this for loops with several backedges, 200 // if they all have the same conditions. 201 int inEdges = 0; 202 while (e.hasMoreElements()) { 203 inEdges++; 204 BasicBlock o = e.nextElement(); 205 if (!CFGTransformations.inLoop(o, nloop)) { 206 if (predBlock == null) { 207 predBlock = o; 208 } else { 209 report("3 multi entry header.\n"); 210 return true; 211 } 212 } else { 213 if (backEdgeBlock == null) { 214 backEdgeBlock = o; 215 } else { 216 report("4 multiple back edges.\n"); 217 return true; 218 } 219 } 220 } 221 } 222 223 // look at the out edges to find loop exits 224 e = b.getOut(); 225 while (e.hasMoreElements()) { 226 BasicBlock out = e.nextElement(); 227 if (!CFGTransformations.inLoop(out, nloop)) { 228 if (exitBlock == null) { 229 exitBlock = b; 230 } else { 231 report("5 multiple exit blocks.\n"); 232 return true; 233 } 234 } 235 } 236 } 237 238 // exitBlock must equal backEdgeBlock 239 if (exitBlock == null) { 240 report("6 no exit block found...infinite loop?"); 241 return true; 242 } 243 if (exitBlock != backEdgeBlock) { 244 report("7 exit block is not immediate predecessor of loop head"); 245 return true; 246 } 247 248 // exitBlock must exit (skip over pads in critical edges) 249 while (exitBlock.getNumberOfOut() == 1 && exitBlock.getNumberOfIn() == 1) { 250 exitBlock = exitBlock.getIn().nextElement(); 251 } 252 253 if (exitBlock == header && blocks > 1) { 254 report("6 while loop? (" + blocks + ")\n"); 255 return true; 256 } 257 258 // So far, so good. Examine the exit test. 259 Instruction origBranch = exitBlock.firstBranchInstruction(); 260 if (origBranch != exitBlock.lastRealInstruction()) { 261 Instruction aGoto = origBranch.nextInstructionInCodeOrder(); 262 if (aGoto.operator.opcode != GOTO_opcode) { 263 report("7 too complex exit\n"); 264 return true; 265 } 266 succBlock = Label.getBlock(Goto.getTarget(aGoto).target).block; 267 if (VM.VerifyAssertions) { 268 VM._assert(aGoto == exitBlock.lastRealInstruction()); 269 } 270 } else { 271 succBlock = exitBlock.getFallThroughBlock(); 272 } 273 274 if (origBranch.operator.opcode != INT_IFCMP_opcode) { 275 report("8 branch isn't int_ifcmp: " + origBranch.operator + ".\n"); 276 return true; 277 } 278 279 // examine operands: 280 Operand op1 = follow(IfCmp.getVal1(origBranch)); 281 Operand op2 = follow(IfCmp.getVal2(origBranch)); 282 ConditionOperand cond = (ConditionOperand) IfCmp.getCond(origBranch).copy(); 283 RegisterOperand ifcmpGuard = IfCmp.getGuardResult(origBranch); 284 float backBranchProbability = IfCmp.getBranchProfile(origBranch).takenProbability; 285 if (!loopInvariant(op2, nloop, 4)) { 286 if (loopInvariant(op1, nloop, 4)) { 287 Operand op = op1; 288 op1 = op2; 289 op2 = op; 290 cond.flipOperands(); 291 } else { 292 if (DEBUG) { 293 printDefs(op1, nloop, 4); 294 printDefs(op2, nloop, 4); 295 VM.sysWrite("" + origBranch + "\n"); 296 } 297 report("8a op1 and op2 may not be loop invariant\n"); 298 return true; 299 } 300 } 301 BasicBlock target = Label.getBlock(IfCmp.getTarget(origBranch).target).block; 302 303 if (!(op1 instanceof RegisterOperand)) { 304 report("9 op1 of ifcmp isn't a register\n"); 305 return true; 306 } 307 308 RegisterOperand rop1 = (RegisterOperand) op1; 309 310 Register reg = rop1.getRegister(); 311 if (reg.isPhysical()) { 312 report("10 loops over physical register\n"); 313 return false; 314 } 315 if (succBlock == header && !CFGTransformations.inLoop(target, nloop)) { 316 succBlock = target; 317 target = header; 318 cond.flipCode(); 319 } 320 if (target != header) { 321 report("11 ifcmp doesn't jump to header\n"); 322 return true; 323 } 324 325 Instruction iterator = null; 326 Enumeration<Operand> defs = new RealDefs(rop1); 327 while (defs.hasMoreElements()) { 328 Operand def = defs.nextElement(); 329 Instruction inst = def.instruction; 330 BasicBlock block = inst.getBasicBlock(); 331 //VM.sysWrite (""+block+": "+inst+"\n"); 332 if (CFGTransformations.inLoop(block, nloop)) { 333 if (iterator == null) { 334 iterator = inst; 335 } else { 336 report("12 iterator not unique.\n"); 337 return true; 338 } 339 } 340 } 341 342 if (iterator == null) { 343 report("15 iterator not found.\n"); 344 return true; 345 } 346 347 if (iterator.operator.opcode != INT_ADD_opcode) { 348 //dumpIR (ir, "malformed"); 349 report("16 iterator is no addition: " + iterator.operator + "\n"); 350 return true; 351 } 352 353 if (!rop1.similar(follow(Binary.getVal1(iterator)))) { 354 //dumpIR (ir, "malformed"); 355 report("17 malformed iterator.\n" + iterator + "\n"); 356 return true; 357 } 358 359 Operand strideOp = follow(Binary.getVal2(iterator)); 360 if (!(strideOp instanceof IntConstantOperand)) { 361 report("18 stride not constant\n"); 362 return true; 363 } 364 365 int stride = ((IntConstantOperand) strideOp).value; 366 if (stride != 1 && stride != -1) { 367 report("18b stride != +/-1 (" + stride + ")\n"); 368 return true; 369 } 370 371 if ((stride == 1 && 372 ((cond.value != ConditionOperand.LESS) && 373 cond.value != ConditionOperand.LESS_EQUAL && 374 cond.value != ConditionOperand.NOT_EQUAL)) || 375 (stride == -1 && 376 ((cond.value != ConditionOperand.GREATER) && 377 cond.value != ConditionOperand.GREATER_EQUAL && 378 cond.value != ConditionOperand.NOT_EQUAL))) { 379 report("19 unexpected condition: " + cond + "\n" + iterator + "\n" + origBranch + "\n\n"); 380 return true; 381 } 382 383 RegisterOperand outerGuard; 384 BasicBlock outer = predBlock; 385 while (outer.getNumberOfOut() == 1 && outer.getNumberOfIn() == 1) { 386 outer = outer.getIn().nextElement(); 387 } 388 if (outer.getNumberOfIn() > 0 && outer.getNumberOfOut() < 2) { 389 report("23 no suitable outer guard found.\n"); 390 return true; 391 } 392 393 tmp = outer.firstBranchInstruction(); 394 if (tmp != null && GuardResultCarrier.conforms(tmp)) { 395 outerGuard = GuardResultCarrier.getGuardResult(tmp); 396 } else { 397 outerGuard = ir.regpool.makeTempValidation(); 398 } 399 400 //////////// 401 // transfom 402 403 // transform this: 404 // 405 // Orig: 406 // B 407 // if i CC b goto Orig 408 // else goto exit 409 // 410 // exit: 411 // 412 // into this: 413 // 414 // 415 // stride == 1: common: stride == -1: 416 //-------------------------------------------------------------------------- 417 // guard0: 418 // limit = b; 419 // if a > b goto Orig if b > a goto Orig 420 // else guard1 421 // 422 // 423 // guard 1: 424 // remainder = b - a; remainder = a - b; 425 // if cond == '<=' if cond == '>=' 426 // remainder++; remainder++; 427 // remainder = remainder & 3 428 // limit = a + remainder limit = a - remainder 429 // if cond == '<=' if cond == '>=' 430 // limit--; limit++; 431 // if remainder == 0 goto mllp 432 // goto Orig 433 // 434 // Orig: 435 // LOOP; 436 // if i CC limit goto Orig 437 // else guard2 438 // 439 // guard2: if i CC b goto mllp 440 // else exit 441 // 442 // mllp: // landing pad 443 // goto ml 444 // 445 // ml: 446 // LOOP;LOOP;LOOP;LOOP; 447 // if i CC b goto ml 448 // else exit 449 // 450 // exit: 451 //-------------------------------------------------------------------------- 452 report("...transforming.\n"); 453 if (DEBUG && ir.options.hasMETHOD_TO_PRINT() && ir.options.fuzzyMatchMETHOD_TO_PRINT(ir.method.toString())) { 454 dumpIR(ir, "before unroll"); 455 } 456 457 CFGTransformations.killFallThroughs(ir, nloop); 458 BasicBlock[] handles = makeSomeCopies(unrollFactor, ir, nloop, blocks, header, exitBlock, exitBlock); 459 BasicBlock mainHeader = handles[0]; 460 BasicBlock mainExit = handles[1]; 461 462 // test block for well formed bounds 463 BasicBlock guardBlock0 = header.createSubBlock(header.firstInstruction().bcIndex, ir); 464 predBlock.redirectOuts(header, guardBlock0, ir); 465 466 // test block for iteration alignemnt 467 BasicBlock guardBlock1 = header.createSubBlock(header.firstInstruction().bcIndex, ir); 468 469 // landing pad for orig loop 470 BasicBlock olp = header.createSubBlock(header.firstInstruction().bcIndex, ir); 471 olp.setLandingPad(); 472 473 BasicBlock predSucc = predBlock.nextBasicBlockInCodeOrder(); 474 if (predSucc != null) { 475 ir.cfg.breakCodeOrder(predBlock, predSucc); 476 ir.cfg.linkInCodeOrder(olp, predSucc); 477 } 478 ir.cfg.linkInCodeOrder(predBlock, guardBlock0); 479 ir.cfg.linkInCodeOrder(guardBlock0, guardBlock1); 480 ir.cfg.linkInCodeOrder(guardBlock1, olp); 481 482 // guard block for main loop 483 BasicBlock guardBlock2 = header.createSubBlock(header.firstInstruction().bcIndex, ir); 484 485 // landing pad for main loop 486 BasicBlock landingPad = header.createSubBlock(header.firstInstruction().bcIndex, ir); 487 landingPad.setLandingPad(); 488 489 BasicBlock mainLoop = exitBlock.nextBasicBlockInCodeOrder(); 490 ir.cfg.breakCodeOrder(exitBlock, mainLoop); 491 ir.cfg.linkInCodeOrder(exitBlock, guardBlock2); 492 ir.cfg.linkInCodeOrder(guardBlock2, landingPad); 493 ir.cfg.linkInCodeOrder(landingPad, mainLoop); 494 495 RegisterOperand remainder = ir.regpool.makeTemp(rop1.getType()); 496 RegisterOperand limit = ir.regpool.makeTemp(rop1.getType()); 497 498 // test whether a <= b for stride == 1 and a >= b for stride == -1 499 tmp = guardBlock0.lastInstruction(); 500 tmp.insertBefore(Move.create(INT_MOVE, limit, op2.copy())); 501 502 ConditionOperand g0cond = ConditionOperand.GREATER_EQUAL(); 503 if (stride == -1) g0cond = ConditionOperand.LESS_EQUAL(); 504 505 tmp.insertBefore(IfCmp.create(INT_IFCMP, 506 outerGuard.copyD2D(), 507 rop1.copyD2U(), 508 op2.copy(), 509 g0cond, 510 olp.makeJumpTarget(), 511 BranchProfileOperand.unlikely())); 512 tmp.insertBefore(Goto.create(GOTO, guardBlock1.makeJumpTarget())); 513 514 // align the loop iterations 515 tmp = guardBlock1.lastInstruction(); 516 if (stride == 1) { 517 tmp.insertBefore(Binary.create(INT_SUB, remainder, op2.copy(), rop1.copyD2U())); 518 } else { 519 tmp.insertBefore(Binary.create(INT_SUB, remainder, rop1.copyD2U(), op2.copy())); 520 } 521 522 if (cond.isGREATER_EQUAL() || cond.isLESS_EQUAL()) { 523 tmp.insertBefore(Binary.create(INT_ADD, remainder.copyD2D(), remainder.copyD2U(), new IntConstantOperand(1))); 524 } 525 526 tmp.insertBefore(Binary.create(INT_ADD, remainder.copyD2D(), remainder.copyD2U(), new IntConstantOperand(-1))); 527 528 tmp.insertBefore(Binary.create(INT_AND, 529 remainder.copyD2D(), 530 remainder.copyD2U(), 531 new IntConstantOperand(unrollFactor - 1))); 532 533 tmp.insertBefore(Binary.create(INT_ADD, remainder.copyD2D(), remainder.copyD2U(), new IntConstantOperand(1))); 534 535 if (stride == 1) { 536 tmp.insertBefore(Binary.create(INT_ADD, limit.copyD2U(), op1.copy(), remainder.copyD2U())); 537 } else { 538 tmp.insertBefore(Binary.create(INT_SUB, limit.copyD2U(), op1.copy(), remainder.copyD2U())); 539 } 540 541 if (cond.isLESS_EQUAL()) { 542 tmp.insertBefore(Binary.create(INT_ADD, limit.copyD2D(), limit.copyD2U(), new IntConstantOperand(-1))); 543 } 544 545 if (cond.isGREATER_EQUAL()) { 546 tmp.insertBefore(Binary.create(INT_ADD, limit.copyD2D(), limit.copyD2U(), new IntConstantOperand(1))); 547 } 548 549 tmp.insertBefore(Goto.create(GOTO, olp.makeJumpTarget())); 550 551 // build landing pad for original loop 552 tmp = olp.lastInstruction(); 553 tmp.insertBefore(Goto.create(GOTO, header.makeJumpTarget())); 554 555 // change the back branch in the original loop 556 deleteBranches(exitBlock); 557 tmp = exitBlock.lastInstruction(); 558 tmp.insertBefore(IfCmp.create(INT_IFCMP, 559 outerGuard.copyD2D(), 560 rop1.copyU2U(), 561 limit.copyD2U(), 562 (ConditionOperand) cond.copy(), 563 header.makeJumpTarget(), 564 new BranchProfileOperand(1.0f - 1.0f / (unrollFactor / 2)))); 565 tmp.insertBefore(Goto.create(GOTO, guardBlock2.makeJumpTarget())); 566 567 // only enter main loop if iterations left 568 tmp = guardBlock2.lastInstruction(); 569 tmp.insertBefore(IfCmp.create(INT_IFCMP, 570 outerGuard.copyD2D(), 571 rop1.copyU2U(), 572 op2.copy(), 573 (ConditionOperand) cond.copy(), 574 landingPad.makeJumpTarget(), 575 new BranchProfileOperand(backBranchProbability))); 576 tmp.insertBefore(Goto.create(GOTO, succBlock.makeJumpTarget())); 577 578 // landing pad jumps to mainHeader 579 tmp = landingPad.lastInstruction(); 580 tmp.insertBefore(Goto.create(GOTO, mainHeader.makeJumpTarget())); 581 582 // repair back edge in mainExit 583 if (VM.VerifyAssertions) VM._assert(mainExit != null); 584 tmp = mainExit.lastInstruction(); 585 if (VM.VerifyAssertions) { 586 VM._assert((mainExit.lastRealInstruction() == null) || !mainExit.lastRealInstruction().isBranch()); 587 } 588 tmp.insertBefore(IfCmp.create(INT_IFCMP, 589 ifcmpGuard.copyU2U(), 590 rop1.copyU2U(), 591 op2.copy(), 592 (ConditionOperand) cond.copy(), 593 mainHeader.makeJumpTarget(), 594 new BranchProfileOperand(1.0f - (1.0f - backBranchProbability) * unrollFactor))); 595 tmp.insertBefore(Goto.create(GOTO, succBlock.makeJumpTarget())); 596 597 // recompute normal outs 598 guardBlock0.recomputeNormalOut(ir); 599 guardBlock1.recomputeNormalOut(ir); 600 olp.recomputeNormalOut(ir); 601 guardBlock2.recomputeNormalOut(ir); 602 exitBlock.recomputeNormalOut(ir); 603 landingPad.recomputeNormalOut(ir); 604 mainExit.recomputeNormalOut(ir); 605 if (DEBUG && ir.options.hasMETHOD_TO_PRINT() && ir.options.fuzzyMatchMETHOD_TO_PRINT(ir.method.toString())) { 606 dumpIR(ir, "after unroll"); 607 } 608 return false; 609 } 610 611 private void naiveUnroller(LSTNode t, IR ir) { 612 BitVector nloop = t.loop; 613 BasicBlock seqStart = null; 614 Enumeration<BasicBlock> bs; 615 616 if (t.loop.populationCount() > MAX_BLOCKS_FOR_NAIVE_UNROLLING) { 617 report("1 is too big\n"); 618 return; 619 } 620 report("Naively unrolling\n"); 621 622 CFGTransformations.killFallThroughs(ir, nloop); 623 624 // first, capture the blocks in the loop body. 625 int bodyBlocks = nloop.populationCount(); 626 BasicBlock[] body = new BasicBlock[bodyBlocks]; 627 { 628 int i = 0; 629 bs = ir.getBasicBlocks(nloop); 630 while (bs.hasMoreElements()) { 631 BasicBlock b = bs.nextElement(); 632 if (VM.VerifyAssertions) { 633 VM._assert(!(b instanceof ExceptionHandlerBasicBlock)); 634 } 635 body[i++] = b; 636 BasicBlock next = b.nextBasicBlockInCodeOrder(); 637 if (next == null || !CFGTransformations.inLoop(next, nloop)) { 638 seqStart = b; // end of loop in code order 639 } 640 } 641 } 642 643 BasicBlock seqEnd = seqStart.nextBasicBlockInCodeOrder(); 644 if (seqEnd != null) ir.cfg.breakCodeOrder(seqStart, seqEnd); 645 BasicBlock seqLast = seqStart; 646 BasicBlock firstHeaderCopy = null; 647 BasicBlock currentBlock = seqLast; 648 649 for (int i = 1; i <= unrollFactor; ++i) { 650 651 // copy body 652 for (BasicBlock bb : body) { 653 seqLast = copyAndLinkBlock(ir, seqLast, bb); 654 if (bb == t.header) { 655 if (firstHeaderCopy == null) { 656 firstHeaderCopy = seqLast; 657 } 658 } 659 } 660 661 // redirect internal branches 662 currentBlock = seqLast; 663 for (int j = 0; j < bodyBlocks; ++j) { 664 currentBlock.recomputeNormalOut(ir); 665 Enumeration<BasicBlock> be = currentBlock.getOut(); 666 while (be.hasMoreElements()) { 667 BasicBlock out = be.nextElement(); 668 if (out != t.header && CFGTransformations.inLoop(out, nloop)) { 669 BasicBlock outCopy = (BasicBlock) out.scratchObject; 670 currentBlock.redirectOuts(out, outCopy, ir); 671 } 672 } 673 currentBlock.recomputeNormalOut(ir); 674 currentBlock = currentBlock.prevBasicBlockInCodeOrder(); 675 } 676 677 if (i != 1) { 678 // redirect the branches to the header in the (i-1)th copy 679 for (int j = 0; j < bodyBlocks; ++j) { 680 Enumeration<BasicBlock> be = currentBlock.getOut(); 681 while (be.hasMoreElements()) { 682 BasicBlock out = be.nextElement(); 683 if (out == t.header) { 684 BasicBlock headerCopy; 685 headerCopy = (BasicBlock) t.header.scratchObject; 686 currentBlock.redirectOuts(t.header, headerCopy, ir); 687 } 688 } 689 currentBlock.recomputeNormalOut(ir); 690 currentBlock = currentBlock.prevBasicBlockInCodeOrder(); 691 } 692 } 693 } 694 if (seqEnd != null) ir.cfg.linkInCodeOrder(seqLast, seqEnd); 695 696 // in the original loop, redirect branches that go to the header 697 // and make them point to the first header copy 698 699 for (int j = 0; j < bodyBlocks; ++j) { 700 Enumeration<BasicBlock> be = body[j].getOut(); 701 while (be.hasMoreElements()) { 702 BasicBlock out = be.nextElement(); 703 if (out == t.header) { 704 body[j].redirectOuts(t.header, firstHeaderCopy, ir); 705 } 706 } 707 body[j].recomputeNormalOut(ir); 708 } 709 710 // the following loop redirects backedges that start in the last 711 // copy to point to the first copy instead and not to the original 712 // header. 713 // | | 714 // Thus we get [ ] instead of [ ]<-. 715 // | | | 716 // [ ]<-. [ ] | 717 // | | | | 718 // [ ] | [ ] | 719 // | | | | 720 // [ ] | [ ] | 721 // |\_/ |\_/ 722 // 723 // Instead of 2^(unroll_log) we only have 2^(unroll_log-1) bodies 724 // in the unrolled loop, but there is one copy of the loop's body 725 // that dominates the unrolled version. Peeling of this first 726 // version should have benefits for global code placement. 727 currentBlock = seqLast; 728 for (int j = 0; j < bodyBlocks; ++j) { 729 Enumeration<BasicBlock> be = currentBlock.getOut(); 730 while (be.hasMoreElements()) { 731 BasicBlock out = be.nextElement(); 732 if (out == t.header) { 733 currentBlock.redirectOuts(t.header, firstHeaderCopy, ir); 734 } 735 } 736 currentBlock.recomputeNormalOut(ir); 737 currentBlock = currentBlock.prevBasicBlockInCodeOrder(); 738 } 739 } 740 741 static void report(String s) { 742 if (DEBUG) VM.sysWrite("] " + s); 743 } 744 745 private static int theVisit = 1; 746 747 private static Operand follow(Operand use) { 748 theVisit++; 749 return _follow(use); 750 } 751 752 private static Operand _follow(Operand use) { 753 while (true) { 754 if (!(use instanceof RegisterOperand)) return use; 755 RegisterOperand rop = (RegisterOperand) use; 756 Enumeration<RegisterOperand> defs = DefUse.defs(rop.getRegister()); 757 if (!defs.hasMoreElements()) {return use;} 758 Instruction def = defs.nextElement().instruction; 759 if (!Move.conforms(def)) return use; 760 if (defs.hasMoreElements()) {return use;} 761 762 if (def.scratch == theVisit) return use; 763 def.scratch = theVisit; 764 765 use = Move.getVal(def); 766 } 767 } 768 769 private static Instruction definingInstruction(Operand op) { 770 if (!(op instanceof RegisterOperand)) return op.instruction; 771 Enumeration<RegisterOperand> defs = DefUse.defs(((RegisterOperand) op).getRegister()); 772 if (!defs.hasMoreElements()) {return op.instruction;} 773 Instruction def = defs.nextElement().instruction; 774 if (defs.hasMoreElements()) {return op.instruction;} 775 return def; 776 } 777 778 private static boolean loopInvariant(Operand op, BitVector nloop, int depth) { 779 if (depth <= 0) { 780 return false; 781 } else if (op instanceof ConstantOperand) { 782 return true; 783 } else if (op instanceof RegisterOperand) { 784 Register reg = ((RegisterOperand) op).getRegister(); 785 Enumeration<RegisterOperand> defs = DefUse.defs(reg); 786 // if no definitions of this register (very strange) give up 787 if (!defs.hasMoreElements()) return false; 788 Instruction inst = defs.nextElement().instruction; 789 // if multiple definitions of a register give up as follow may 790 // fail to give the correct invariant 791 return !defs.hasMoreElements() && !CFGTransformations.inLoop(inst.getBasicBlock(), nloop); 792 } else { 793 return false; 794 } 795 } 796 797 private static boolean printDefs(Operand op, BitVector nloop, int depth) { 798 if (depth <= 0) return false; 799 if (op instanceof ConstantOperand) { 800 VM.sysWrite(">> " + op + "\n"); 801 return true; 802 } 803 if (op instanceof RegisterOperand) { 804 boolean invariant = true; 805 Register reg = ((RegisterOperand) op).getRegister(); 806 Enumeration<RegisterOperand> defs = DefUse.defs(reg); 807 while (defs.hasMoreElements()) { 808 Instruction inst = defs.nextElement().instruction; 809 VM.sysWrite(">> " + inst.getBasicBlock() + ": " + inst + "\n"); 810 if (CFGTransformations.inLoop(inst.getBasicBlock(), nloop)) { 811 if (Move.conforms(inst)) { 812 invariant &= printDefs(Move.getVal(inst), nloop, depth - 1); 813 } else if (inst.operator.opcode == ARRAYLENGTH_opcode) { 814 invariant &= printDefs(GuardedUnary.getVal(inst), nloop, depth); 815 } else { 816 invariant = false; 817 } 818 } 819 if (!invariant) break; 820 } 821 return invariant; 822 } 823 return false; 824 } 825 826 @SuppressWarnings("unused") 827 // For debugging 828 private static void _printDefs(Operand op) { 829 if (op instanceof RegisterOperand) { 830 Register reg = ((RegisterOperand) op).getRegister(); 831 Enumeration<RegisterOperand> defs = DefUse.defs(reg); 832 defs = DefUse.defs(reg); 833 while (defs.hasMoreElements()) { 834 Instruction inst = defs.nextElement().instruction; 835 if (Move.conforms(inst)) { 836 inst = definingInstruction(follow(Move.getVal(inst))); 837 } 838 VM.sysWrite(">> " + inst.getBasicBlock() + ": " + inst + "\n"); 839 } 840 } else { 841 VM.sysWrite(">> " + op + "\n"); 842 } 843 } 844 845 static void linkToLST(IR ir) { 846 Enumeration<BasicBlock> e = ir.getBasicBlocks(); 847 while (e.hasMoreElements()) { 848 e.nextElement().scratchObject = null; 849 e.nextElement().scratch = 0; 850 } 851 LSTGraph lstg = ir.HIRInfo.loopStructureTree; 852 if (lstg != null) markHeaders((LSTNode) lstg.firstNode()); 853 } 854 855 // for all loops: 856 // make the header block point to the corresponding loop structure tree node. 857 private static void markHeaders(LSTNode t) { 858 BasicBlock header = t.header; 859 header.scratchObject = t; 860 Enumeration<GraphNode> e = t.outNodes(); 861 while (e.hasMoreElements()) { 862 LSTNode n = (LSTNode) e.nextElement(); 863 markHeaders(n); 864 } 865 } 866 867 // inserts unrollFactor copies of the loop after seqStart 868 static BasicBlock[] makeSomeCopies(int unrollFactor, IR ir, BitVector nloop, int blocks, 869 BasicBlock header, BasicBlock exitBlock, BasicBlock seqStart) { 870 // make some copies of the original loop 871 872 // first, capture the blocks in the loop body. 873 BitVector loop = new BitVector(nloop); 874 loop.clear(header.getNumber()); 875 loop.clear(exitBlock.getNumber()); 876 int bodyBlocks = 0; 877 Enumeration<BasicBlock> bs = ir.getBasicBlocks(loop); 878 while (bs.hasMoreElements()) { 879 bodyBlocks++; 880 bs.nextElement(); 881 } 882 BasicBlock[] body = new BasicBlock[bodyBlocks]; 883 { 884 int i = 0; 885 bs = ir.getBasicBlocks(loop); 886 while (bs.hasMoreElements()) { 887 body[i++] = bs.nextElement(); 888 } 889 } 890 891 BasicBlock seqEnd = seqStart.nextBasicBlockInCodeOrder(); 892 if (seqEnd != null) ir.cfg.breakCodeOrder(seqStart, seqEnd); 893 BasicBlock seqLast = seqStart; 894 895 BasicBlock firstHeader = null; 896 BasicBlock lastHeader = null; 897 BasicBlock lastExit = null; 898 BasicBlock[] handles = new BasicBlock[2]; 899 900 for (int i = 0; i < unrollFactor; ++i) { 901 902 // copy header 903 seqLast = copyAndLinkBlock(ir, seqLast, header); 904 lastHeader = seqLast; 905 906 if (i == 0) { 907 firstHeader = seqLast; 908 } else { 909 // link copies by jumps 910 lastExit.appendInstruction(Goto.create(GOTO, seqLast.makeJumpTarget())); 911 lastExit.recomputeNormalOut(ir); 912 } 913 914 // copy body 915 for (BasicBlock bb : body) { 916 seqLast = copyAndLinkBlock(ir, seqLast, bb); 917 } 918 919 // copy exit block 920 if (exitBlock != header) { 921 seqLast = copyAndLinkBlock(ir, seqLast, exitBlock); 922 lastExit = seqLast; 923 } else { 924 lastExit = lastHeader; 925 } 926 927 // delete all branches in the copies of the exit block 928 deleteBranches(lastExit); 929 930 // redirect internal branches 931 BasicBlock cb = seqLast; 932 for (int j = 0; j < blocks; ++j) { 933 cb.recomputeNormalOut(ir); 934 Enumeration<BasicBlock> be = cb.getOut(); 935 while (be.hasMoreElements()) { 936 BasicBlock out = be.nextElement(); 937 if (CFGTransformations.inLoop(out, nloop)) { 938 cb.redirectOuts(out, (BasicBlock) out.scratchObject, ir); 939 } 940 } 941 cb.recomputeNormalOut(ir); 942 cb = cb.prevBasicBlockInCodeOrder(); 943 } 944 } 945 if (seqEnd != null) ir.cfg.linkInCodeOrder(seqLast, seqEnd); 946 handles[0] = firstHeader; 947 handles[1] = lastExit; 948 return handles; 949 } 950 951 static BasicBlock copyAndLinkBlock(IR ir, BasicBlock seqLast, BasicBlock block) { 952 BasicBlock copy = block.copyWithoutLinks(ir); 953 ir.cfg.linkInCodeOrder(seqLast, copy); 954 block.scratchObject = copy; 955 return copy; 956 } 957 958 static void deleteBranches(BasicBlock b) { 959 Instruction branch = b.lastRealInstruction(); 960 while (branch.isBranch()) { 961 Instruction nextBranch = branch.prevInstructionInCodeOrder(); 962 branch.remove(); 963 branch = nextBranch; 964 } 965 } 966 967 static final class RealDefs implements Enumeration<Operand> { 968 private Enumeration<RegisterOperand> defs = null; 969 private Operand use; 970 private RealDefs others = null; 971 972 private void init(Operand use) { 973 this.use = use; 974 if (use instanceof RegisterOperand) { 975 RegisterOperand rop = (RegisterOperand) use; 976 defs = DefUse.defs(rop.getRegister()); 977 this.use = null; 978 if (!defs.hasMoreElements()) defs = null; 979 } 980 } 981 982 public RealDefs(Operand use) { 983 this.init(use); 984 theVisit++; 985 } 986 987 public RealDefs(Operand use, int visit) { 988 this.init(use); 989 theVisit = visit; 990 } 991 992 @Override 993 public boolean hasMoreElements() { 994 return use != null || (others != null && others.hasMoreElements()) || (defs != null && defs.hasMoreElements()); 995 } 996 997 @Override 998 public Operand nextElement() { 999 Operand res = use; 1000 if (res != null) { 1001 use = null; 1002 return res; 1003 } 1004 if (others != null && others.hasMoreElements()) { 1005 return others.nextElement(); 1006 } 1007 1008 res = defs.nextElement(); 1009 Instruction inst = res.instruction; 1010 if (!(Move.conforms(inst)) || inst.scratch == theVisit) { 1011 return res; 1012 } 1013 inst.scratch = theVisit; 1014 1015 others = new RealDefs(Move.getVal(inst), theVisit); 1016 if (!(others.hasMoreElements())) return res; 1017 return others.nextElement(); 1018 } 1019 1020 public Operand nextClear() { 1021 Operand res = nextElement(); 1022 res.instruction = null; 1023 return res; 1024 } 1025 } 1026 }