001 /* 002 * This file is part of the Jikes RVM project (http://jikesrvm.org). 003 * 004 * This file is licensed to You under the Eclipse Public License (EPL); 005 * You may not use this file except in compliance with the License. You 006 * may obtain a copy of the License at 007 * 008 * http://www.opensource.org/licenses/eclipse-1.0.php 009 * 010 * See the COPYRIGHT.txt file distributed with this work for information 011 * regarding copyright ownership. 012 */ 013 package org.jikesrvm.compilers.opt.ssa; 014 015 import static org.jikesrvm.compilers.opt.driver.OptConstants.SYNTH_LOOP_VERSIONING_BCI; 016 import static org.jikesrvm.compilers.opt.ir.Operators.ARRAYLENGTH; 017 import static org.jikesrvm.compilers.opt.ir.Operators.ARRAYLENGTH_opcode; 018 import static org.jikesrvm.compilers.opt.ir.Operators.GOTO; 019 import static org.jikesrvm.compilers.opt.ir.Operators.GUARD_MOVE; 020 import static org.jikesrvm.compilers.opt.ir.Operators.INT_ADD; 021 import static org.jikesrvm.compilers.opt.ir.Operators.INT_ADD_opcode; 022 import static org.jikesrvm.compilers.opt.ir.Operators.INT_IFCMP; 023 import static org.jikesrvm.compilers.opt.ir.Operators.INT_SUB; 024 import static org.jikesrvm.compilers.opt.ir.Operators.INT_SUB_opcode; 025 import static org.jikesrvm.compilers.opt.ir.Operators.PHI; 026 import static org.jikesrvm.compilers.opt.ir.Operators.REF_IFCMP; 027 028 import java.lang.reflect.Constructor; 029 import java.util.ArrayList; 030 import java.util.Enumeration; 031 import java.util.HashMap; 032 import java.util.HashSet; 033 import java.util.Iterator; 034 035 import org.jikesrvm.VM; 036 import org.jikesrvm.classloader.TypeReference; 037 import org.jikesrvm.compilers.opt.DefUse; 038 import org.jikesrvm.compilers.opt.OptOptions; 039 import org.jikesrvm.compilers.opt.OptimizingCompilerException; 040 import org.jikesrvm.compilers.opt.controlflow.AnnotatedLSTGraph; 041 import org.jikesrvm.compilers.opt.controlflow.AnnotatedLSTNode; 042 import org.jikesrvm.compilers.opt.controlflow.DominatorTree; 043 import org.jikesrvm.compilers.opt.controlflow.DominatorsPhase; 044 import org.jikesrvm.compilers.opt.controlflow.LSTGraph; 045 import org.jikesrvm.compilers.opt.controlflow.LTDominators; 046 import org.jikesrvm.compilers.opt.driver.CompilerPhase; 047 import org.jikesrvm.compilers.opt.ir.BBend; 048 import org.jikesrvm.compilers.opt.ir.BasicBlock; 049 import org.jikesrvm.compilers.opt.ir.Binary; 050 import org.jikesrvm.compilers.opt.ir.BoundsCheck; 051 import org.jikesrvm.compilers.opt.ir.Goto; 052 import org.jikesrvm.compilers.opt.ir.GuardedUnary; 053 import org.jikesrvm.compilers.opt.ir.IR; 054 import org.jikesrvm.compilers.opt.ir.IREnumeration; 055 import org.jikesrvm.compilers.opt.ir.IfCmp; 056 import org.jikesrvm.compilers.opt.ir.Instruction; 057 import org.jikesrvm.compilers.opt.ir.Label; 058 import org.jikesrvm.compilers.opt.ir.Move; 059 import org.jikesrvm.compilers.opt.ir.NullCheck; 060 import org.jikesrvm.compilers.opt.ir.Phi; 061 import org.jikesrvm.compilers.opt.ir.Register; 062 import org.jikesrvm.compilers.opt.ir.operand.BasicBlockOperand; 063 import org.jikesrvm.compilers.opt.ir.operand.BranchProfileOperand; 064 import org.jikesrvm.compilers.opt.ir.operand.ConditionOperand; 065 import org.jikesrvm.compilers.opt.ir.operand.HeapOperand; 066 import org.jikesrvm.compilers.opt.ir.operand.IntConstantOperand; 067 import org.jikesrvm.compilers.opt.ir.operand.NullConstantOperand; 068 import org.jikesrvm.compilers.opt.ir.operand.Operand; 069 import org.jikesrvm.compilers.opt.ir.operand.RegisterOperand; 070 import org.jikesrvm.compilers.opt.util.GraphNode; 071 072 /** 073 * This optimisation works from the outer most loop inward, optimising 074 * loops that conform to being regular {@link 075 * AnnotatedLSTNode}s. The optimisations performs the following 076 * operations: 077 * <ul> 078 * <li> 079 * 1) Determine the bound and null checks to be eliminated. These are 080 * the ones that operate on the loop iterator. If no bound checks can 081 * be eliminated, stop optimising this loop. 082 * <li> 083 * 2) Determine the registers defined in the loop. 084 * <li> 085 * 3) Generate phi nodes that define the original register defined by 086 * the loop and use two newly created registers. 087 * <li> 088 * 4) Create a version of the original loop that uses the first of the 089 * newly created registers instead of the original registers. 090 * <li> 091 * 5) Create a second version, this time with the result of the 092 * eliminated checks set to true. 093 * <li> 094 * 6) Work out what the maximum value for all the bounds checks are 095 * and create branches to optimal or suboptimal loops 096 * <li> 097 * 7) Fix up the phi node predecessors 098 * <li> 099 * 8) Remove the unoptimized loop if its redundant 100 * <li> 101 * 9) Replace register definitions in the original loop with phi 102 * instructions 103 * <li> 104 * 10) Compact node numbering so that CFG number of nodes reflects 105 * that some basic blocks may have been deleted 106 * </ul> 107 * <p> 108 * Example: 109 * <listing> 110 * for (int t1=0; t1 < 100; t1++) { 111 * g1 = null_check l0 112 * g2 = bounds_check l0, t1 113 * g3 = guard_combine g1,g2 114 * t2 = aload l0, t1, g3 115 * g4 = null_check l1 116 * g5 = bounds_check l1, t1 117 * g6 = guard_combine g4,g5 118 * astore t2, l1, t1, g6 119 * } 120 * </listing> 121 * 122 * becomes: 123 * 124 * <listing> 125 * goto explicit_test_block 126 * successor_to_loops: 127 * g1 = phi g1_1, g1_2 128 * g2 = phi g2_1, g2_2 129 * g3 = phi g3_1, g3_2 130 * t2 = phi t2_1, t2_2 131 * g4 = phi g4_1, g4_2 132 * g5 = phi g5_1, g5_2 133 * g6 = phi g6_1, g6_2 134 * goto after_loop 135 * explicit_test_block: 136 * if l0 == null (unlikely) goto sub_optimal_loop 137 * if 100 >= l0.length (unlikely) goto sub_optimal_loop 138 * if l1 == null (unlikely) goto sub_optimal_loop 139 * if 100 >= l1.length (unlikely) goto sub_optimal_loop 140 * goto optimal_loop 141 * sub_optimal_loop: 142 * for (int t1_1=0; t1_1 < 100; t1_1++) { 143 * g1_1 = null_check l0 144 * g2_1 = bounds_check l0, t1_1 145 * g3_1 = guard_combine g1_1,g2_1 146 * t2_1 = aload l0, t1_1, g3_1 147 * g4_1 = null_check l1 148 * g5_1 = bounds_check l1, t1_1 149 * g6_1 = guard_combine g4_1,g5_1 150 * astore t2_1, l1, t1_1, g6_1 151 * } 152 * goto successor_to_loops 153 * optimal_loop: 154 * for (int t1_2=0; t1_2 < 100; t1_2++) { 155 * g1_2 = true_guard 156 * g2_2 = true_guard 157 * g3_2 = guard_combine g1_2,g2_2 158 * t2_2 = aload l0, t1_2, g3_2 159 * g4_2 = null_check l1 160 * g5_2 = bounds_check l1, t1_2 161 * g6_2 = guard_combine g4_2,g5_2 162 * astore t2_2, l1, t1_2, g6_2 163 * } 164 * goto successor_to_loops 165 * after_loop: 166 * </listing> 167 * 168 * The optimisation works on the Heap SSA form. A more accurate 169 * example of the transformation would be: 170 * 171 * <listing> 172 * heap1 = ...; // previous heap state 173 * t1_1 = 0; 174 * if t1_1 ≥ 100 goto label2 175 * label1: 176 * t1_2 = phi t1_1, t1_3 177 * heap2 = phi heap1, heap3 178 * g1 = null_check l0 179 * g2 = bounds_check l0, t1_2 180 * g3 = guard_combine g1,g2 181 * t2 = aload l0, t1_2, g3 182 * g4 = null_check l1 183 * g5 = bounds_check l1, t1_2 184 * g6 = guard_combine g4,g5 185 * heap3 = astore t2, l1, t1_2, g6 186 * t1_3 = t1_2 + 1 187 * if t1_3 < 100 label1 * label2: 188 * </listing> 189 * 190 * becomes: 191 * 192 * <listing> 193 * heap1 = ...; // previous heap state 194 * t1_1 = 0; 195 * if t1_1 ≥ 100 goto label2 196 * goto explicit_test_block 197 * successor_to_loops: 198 * t1_2 = phi t1_2_1, t1_2_2 199 * heap2 = phi heap2_1, heap2_2 200 * g1 = phi g1_1, g1_2 201 * g2 = phi g2_1, g2_2 202 * g3 = phi g3_1, g3_2 203 * t2 = phi t2_1, t2_2 204 * g4 = phi g4_1, g4_2 205 * g5 = phi g5_1, g5_2 206 * g6 = phi g6_1, g6_2 207 * heap3 = phi heap3_1, heap3_2 208 * t1_3 = phi t1_3_1, t1_3_2 209 * goto after_loop 210 * explicit_test_block: 211 * g1_2 = if l0 == null (unlikely) goto sub_optimal_loop 212 * g2_2 = if 100 >= l0.length (unlikely) goto sub_optimal_loop 213 * g4_2 = if l1 == null (unlikely) goto sub_optimal_loop 214 * g5_2 = if 100 >= l1.length (unlikely) goto sub_optimal_loop 215 * goto optimal_loop 216 * sub_optimal_loop: 217 * label1_1: 218 * t1_2_1 = phi t1_1, t1_3_1 219 * heap2_1 = phi heap1, heap3_1 220 * g1_1 = null_check l0 221 * g2_1 = bounds_check l0, t1_2_1 222 * g3_1 = guard_combine g1_1,g2_1 223 * t2_1 = aload l0, t1_2_1, g3_1 224 * g4_1 = null_check l1 225 * g5_1 = bounds_check l1, t1_2_1 226 * g6_1 = guard_combine g4_1,g5_1 227 * heap3_1 = astore t2_1, l1, t1_2_1, g6_1 228 * t1_3_1 = t1_2_1 + 1 229 * if t1_3_1 < 100 label1_1 230 * goto successor_to_loops 231 * optimal_loop: 232 * label1_2: 233 * t1_2_2 = phi t1_1, t1_3_2 234 * heap2_2 = phi heap1, heap3_2 235 * g3_2 = guard_combine g1_2,g2_2 236 * t2_2 = aload l0, t1_2_2, g3_2 237 * g6_2 = guard_combine g4_2,g5_2 238 * heap3_2 = astore t2_2, l1, t1_2_2, g6_2 239 * t1_3_2 = t1_2_2 + 1 240 * if t1_3_2 < 100 label1_2 241 * goto successor_to_loops 242 * after_loop: 243 * label2: 244 * </listing> 245 */ 246 public final class LoopVersioning extends CompilerPhase { 247 // -oO Debug variables Oo- 248 /** 249 * Flag to optionally print verbose debugging messages 250 */ 251 private static final boolean DEBUG = false; 252 /** 253 * Flag to verify computed IR 254 */ 255 private static final boolean VERIFY = false; 256 257 // -oO Debug routines Oo- 258 /** 259 * Human readable report of what goes on 260 * 261 * @param s String to print 262 **/ 263 private static void report(String s) { 264 if (DEBUG) { 265 VM.sysWriteln(s); 266 } 267 } 268 269 /** 270 * Return a string name for this phase. 271 * @return "Loop Versioning" 272 */ 273 @Override 274 public String getName() { 275 return "Loop Versioning"; 276 } 277 278 // -oO Variables used throughout the optimisation phase Oo- 279 /** 280 * The phi instruction operand holding the optimized loop variable 281 */ 282 private static final int OPTIMIZED_LOOP_OPERAND = 0; 283 /** 284 * The phi instruction operand holding the unoptimized loop variable 285 */ 286 private static final int UNOPTIMIZED_LOOP_OPERAND = 1; 287 288 /** 289 * IR for optimisation 290 */ 291 private IR ir; 292 293 /** 294 * Set used to store the loop related register 295 */ 296 private HashSet<Register> loopRegisterSet; 297 298 /** 299 * SSA options 300 */ 301 private SSAOptions desiredSSAOptions; 302 /** 303 * Compiler phases called from this one 304 */ 305 private CompilerPhase enterSSA, leaveSSA, domPhase; 306 /** 307 * Run inside SSA sub-phase 308 */ 309 private static final boolean inSSAphase = true; 310 311 // -oO Interface to the rest of the compiler Oo- 312 313 /** 314 * Constructor for this compiler phase 315 */ 316 private static final Constructor<CompilerPhase> constructor = 317 getCompilerPhaseConstructor(LoopVersioning.class); 318 319 /** 320 * Get a constructor object for this compiler phase 321 * @return compiler phase constructor 322 */ 323 @Override 324 public Constructor<CompilerPhase> getClassConstructor() { 325 return constructor; 326 } 327 328 /** 329 * Constructor 330 */ 331 public LoopVersioning() { 332 desiredSSAOptions = new SSAOptions(); 333 desiredSSAOptions.setScalarsOnly(true); 334 if (!inSSAphase) { 335 enterSSA = new EnterSSA(); 336 leaveSSA = new LeaveSSA(); 337 } 338 domPhase = new DominatorsPhase(false); 339 } 340 341 /** 342 * Should loop versioning be performed? 343 */ 344 @Override 345 public boolean shouldPerform(OptOptions options) { 346 return options.SSA_LOOP_VERSIONING; 347 } 348 349 /** 350 * @param _ir the IR to process 351 */ 352 @Override 353 public void perform(IR _ir) { 354 ir = _ir; 355 356 // Create SSA 357 ir.desiredSSAOptions = desiredSSAOptions; 358 if (!inSSAphase) { 359 enterSSA.perform(ir); 360 } 361 362 if (DEBUG) { 363 SSA.printInstructions(ir); 364 } 365 366 // Perform loop annotation 367 if (!ir.hasReachableExceptionHandlers()) { 368 // Build LST tree and dominator info 369 domPhase.perform(ir); 370 DefUse.computeDU(ir); 371 // Build annotated version 372 ir.HIRInfo.loopStructureTree = new AnnotatedLSTGraph(ir, ir.HIRInfo.loopStructureTree); 373 } 374 if (VERIFY) { 375 ir.verify(getName(), true); 376 } 377 378 // Check loop annotation has been performed 379 if (!(ir.HIRInfo.loopStructureTree instanceof AnnotatedLSTGraph)) { 380 report("Optimisation of " + ir.getMethod() + " failed as LST wasn't annotated\n"); 381 } else { 382 loopRegisterSet = new HashSet<Register>(); 383 384 if (DEBUG) { 385 VM.sysWriteln(ir.getMethod().toString()); 386 VM.sysWriteln(ir.HIRInfo.loopStructureTree.toString()); 387 SSA.printInstructions(ir); 388 } 389 390 while (findLoopToOptimise((AnnotatedLSTNode) ir.HIRInfo.loopStructureTree.getRoot())) { 391 if (DEBUG) { 392 VM.sysWriteln("Successful optimisation of " + ir.getMethod()); 393 SSA.printInstructions(ir); 394 VM.sysWriteln(ir.cfg.toString()); 395 } 396 // Get IR into shape for next pass 397 DefUse.computeDU(ir); 398 LTDominators.perform(ir, true, true); 399 ir.HIRInfo.dominatorTree = new DominatorTree(ir, true); 400 LSTGraph.perform(ir); 401 AnnotatedLSTGraph.perform(ir); 402 403 if (VERIFY) { 404 ir.verify(getName(), true); 405 } 406 407 if (DEBUG) { 408 VM.sysWriteln("after an optimization pass"); 409 VM.sysWriteln(ir.HIRInfo.loopStructureTree.toString()); 410 SSA.printInstructions(ir); 411 VM.sysWriteln("Finish optimize: " + ir.getMethod().toString()); 412 } 413 } 414 // No longer in use 415 loopRegisterSet = null; 416 } 417 418 if (!inSSAphase) { 419 // Leave SSA 420 leaveSSA.perform(ir); 421 } 422 423 if (VERIFY) { 424 ir.verify(getName(), true); 425 } 426 } 427 428 // -oO Optimisation routines Oo- 429 430 /** 431 * Find an outermost loop to optimise and optimise it. Focus on 432 * annotated regular loops, LICM should handle possible 433 * optimisation for the non-regular loops 434 * 435 * @param loop Loop to search 436 * @return was optimisation performed 437 */ 438 private boolean findLoopToOptimise(AnnotatedLSTNode loop) { 439 // Has this loop already been optimised? 440 Operand carriedLoopIterator = loop.getCarriedLoopIterator(); 441 if ((carriedLoopIterator instanceof RegisterOperand) && 442 (isOptimizedLoop(carriedLoopIterator.asRegister().getRegister()))) { 443 return false; 444 } 445 446 // Process inner loops first 447 Enumeration<GraphNode> innerLoops = loop.outNodes(); 448 // Iterate over loops 449 while (innerLoops.hasMoreElements()) { 450 AnnotatedLSTNode nestedLoop = (AnnotatedLSTNode) innerLoops.nextElement(); 451 // Try to optimise inner loops first 452 if (findLoopToOptimise(nestedLoop)) { 453 // Exit early if inner loop optimisation succeeded 454 return true; 455 } 456 } 457 // Don't try to optimise irregular loops 458 if (loop.isNonRegularLoop()) { 459 return false; 460 } 461 if (DEBUG) { 462 report("LoopFissionOfArrayGuards: found loop in " + ir.getMethod()); 463 VM.sysWriteln("dominator tree:"); 464 VM.sysWriteln(ir.HIRInfo.dominatorTree.toString()); 465 } 466 467 // 1) Determine the bound and null checks to be eliminated. The 468 // bound checks are the ones that operate on the loop iterator. If 469 // no checks can be eliminated, stop optimising this loop. 470 ArrayList<Instruction> checksToEliminate = new ArrayList<Instruction>(); 471 getListOfChecksToEliminate(loop, checksToEliminate); 472 if (checksToEliminate.isEmpty()) { 473 return false; 474 } else { 475 // We found instructions to eliminate 476 if (DEBUG) { 477 VM.sysWriteln("Loop being optimised:"); 478 VM.sysWriteln(loop.toString()); 479 VM.sysWriteln("Checks to eliminate:"); 480 for (Instruction instruction : checksToEliminate) { 481 VM.sysWriteln(instruction.toString()); 482 } 483 } 484 // 2) Determine the registers defined in the loop. 485 ArrayList<Register> registersDefinedInOriginalLoop = new ArrayList<Register>(); 486 ArrayList<TypeReference> typesOfRegistersDefinedInOriginalLoop = new ArrayList<TypeReference>(); 487 ArrayList<Instruction> definingInstructionsInOriginalLoop = new ArrayList<Instruction>(); 488 getRegistersDefinedInLoop(loop, 489 registersDefinedInOriginalLoop, 490 typesOfRegistersDefinedInOriginalLoop, 491 definingInstructionsInOriginalLoop); 492 if (DEBUG) { 493 VM.sysWrite("Registers in original loop:\n{"); 494 for (int i = 0; i < registersDefinedInOriginalLoop.size(); i++) { 495 VM.sysWrite(registersDefinedInOriginalLoop.get(i).toString()); 496 if (definingInstructionsInOriginalLoop.get(i) != null) { 497 VM.sysWrite("(escapes),"); 498 } else { 499 VM.sysWrite(","); 500 } 501 } 502 VM.sysWriteln("}"); 503 } 504 // 3) Generate phi nodes that define the original register 505 // defined by the loop and use two newly created registers. 506 ArrayList<Instruction> phiInstructions = new ArrayList<Instruction>(); 507 HashMap<Register, Register> subOptimalRegMap = new HashMap<Register, Register>(); 508 HashMap<Register, Register> optimalRegMap = new HashMap<Register, Register>(); 509 generatePhiNodes(loop, 510 registersDefinedInOriginalLoop, 511 typesOfRegistersDefinedInOriginalLoop, 512 phiInstructions, 513 subOptimalRegMap, 514 optimalRegMap); 515 if (DEBUG) { 516 VM.sysWriteln("subOptimalRegMap"); 517 VM.sysWriteln(subOptimalRegMap.toString()); 518 VM.sysWriteln("optimalRegMap"); 519 VM.sysWriteln(optimalRegMap.toString()); 520 } 521 522 // 4) Create a version of the original loop that uses the first of 523 // the newly created registers instead of the original 524 // registers. 525 HashMap<Register, BasicBlock> regToUnoptimizedBlockMap = new HashMap<Register, BasicBlock>(); 526 HashMap<BasicBlock, BasicBlock> unoptimizedLoopMap = 527 createCloneLoop(loop, subOptimalRegMap, regToUnoptimizedBlockMap); 528 if (DEBUG) { 529 VM.sysWriteln("subOptimalLoopMap"); 530 VM.sysWriteln(unoptimizedLoopMap.toString()); 531 } 532 // 5) Create a second version, this time with the result of the 533 // eliminated checks set to explicit test guards. 534 HashMap<Register, BasicBlock> regToOptimizedBlockMap = new HashMap<Register, BasicBlock>(); 535 HashMap<BasicBlock, BasicBlock> optimizedLoopMap = 536 createOptimizedLoop(loop, optimalRegMap, checksToEliminate, regToOptimizedBlockMap); 537 if (DEBUG) { 538 VM.sysWriteln("optimalLoopMap"); 539 VM.sysWriteln(optimizedLoopMap.toString()); 540 } 541 // 6) Work out what the maximum value for all the bounds checks 542 // are and create branches to optimal or suboptimal loops - with 543 // the unoptimized loop possibly being unreachable 544 BasicBlock firstBranchBlock = loop.header.createSubBlock(SYNTH_LOOP_VERSIONING_BCI, ir); 545 BasicBlock temp = (BasicBlock) loop.header.prev; 546 ir.cfg.breakCodeOrder(temp, loop.header); 547 ir.cfg.linkInCodeOrder(temp, firstBranchBlock); 548 ir.cfg.linkInCodeOrder(firstBranchBlock, loop.header); 549 temp.redirectOuts(loop.header, firstBranchBlock, ir); 550 boolean isUnoptimizedLoopReachable = 551 createBranchBlocks(loop, 552 firstBranchBlock, 553 checksToEliminate, 554 unoptimizedLoopMap.get(loop.predecessor), 555 optimizedLoopMap.get(loop.predecessor), 556 optimalRegMap); 557 // 7) Fix up the phi node predecessors 558 fixUpPhiPredecessors(phiInstructions, 559 isUnoptimizedLoopReachable ? unoptimizedLoopMap.get(loop.exit) : null, 560 optimizedLoopMap.get(loop.exit)); 561 // 8) Remove the unoptimized loop if its redundant 562 if (!isUnoptimizedLoopReachable) { 563 removeUnoptimizedLoop(loop, unoptimizedLoopMap); 564 } 565 566 // 9) Replace register definitions in the original 567 // loop with phi instructions 568 modifyOriginalLoop(loop, phiInstructions, definingInstructionsInOriginalLoop, subOptimalRegMap, optimalRegMap); 569 // 10) Compact node numbering so that CFG number of nodes 570 // reflects that some basic blocks may have been deleted 571 ir.cfg.compactNodeNumbering(); 572 return true; 573 } 574 } 575 576 /** 577 * Create a list of instructions to be eliminated 578 * @param loop the loop to examine 579 * @param instrToEliminate the instructions to remove 580 */ 581 private void getListOfChecksToEliminate(AnnotatedLSTNode loop, ArrayList<Instruction> instrToEliminate) { 582 ArrayList<Instruction> nullChecks = new ArrayList<Instruction>(); 583 ArrayList<Instruction> oddBoundChecks = new ArrayList<Instruction>(); 584 Enumeration<BasicBlock> blocks = loop.getBasicBlocks(); 585 while (blocks.hasMoreElements()) { 586 BasicBlock block = blocks.nextElement(); 587 IREnumeration.AllInstructionsEnum instructions = new IREnumeration.AllInstructionsEnum(ir, block); 588 while (instructions.hasMoreElements()) { 589 Instruction instruction = instructions.nextElement(); 590 if (NullCheck.conforms(instruction)) { 591 if (loop.isInvariant(NullCheck.getRef(instruction))) { 592 instrToEliminate.add(instruction); 593 nullChecks.add(instruction); 594 } 595 } else if (loop.isMonotonic() && BoundsCheck.conforms(instruction)) { 596 if (loop.isInvariant(BoundsCheck.getRef(instruction))) { 597 if (loop.isRelatedToIterator(BoundsCheck.getIndex(instruction))) { 598 if (loop.isInvariant(BoundsCheck.getGuard(instruction))) { 599 instrToEliminate.add(instruction); 600 } else { 601 // Null check isn't invariant but reference was, check 602 // null check will be eliminated at end of loop 603 oddBoundChecks.add(instruction); 604 } 605 } 606 } 607 } 608 } 609 } 610 // Check cases where the null check isn't loop invariant, however, 611 // it will be in the optimized loop as we'll have eliminated it 612 for (Instruction oddBoundCheck : oddBoundChecks) { 613 Operand guard = BoundsCheck.getGuard(oddBoundCheck); 614 for (Instruction nullCheck : nullChecks) { 615 if (guard.similar(NullCheck.getGuardResult(nullCheck))) { 616 instrToEliminate.add(oddBoundCheck); 617 break; 618 } 619 } 620 } 621 } 622 623 /** 624 * Get registers defined in the given loop. As we're in SSA form 625 * all register definitions must be unique. 626 * @param loop - the loop to examine 627 * @param registers - vector to which defined registers are added 628 */ 629 private void getRegistersDefinedInLoop(AnnotatedLSTNode loop, ArrayList<Register> registers, 630 ArrayList<TypeReference> types, 631 ArrayList<Instruction> definingInstructions) { 632 Enumeration<BasicBlock> blocks = loop.getBasicBlocks(); 633 while (blocks.hasMoreElements()) { 634 BasicBlock block = blocks.nextElement(); 635 // can value escape 636 final boolean escapes = (block == loop.exit) || (ir.HIRInfo.dominatorTree.dominates(block, loop.exit)); 637 IREnumeration.AllInstructionsEnum instructions = new IREnumeration.AllInstructionsEnum(ir, block); 638 while (instructions.hasMoreElements()) { 639 Instruction instruction = instructions.nextElement(); 640 Enumeration<Operand> operands = instruction.getDefs(); 641 while (operands.hasMoreElements()) { 642 Operand operand = operands.nextElement(); 643 if (operand.isRegister()) { 644 registers.add(operand.asRegister().getRegister()); 645 types.add(operand.asRegister().getType()); 646 if (escapes) { 647 definingInstructions.add(instruction); 648 } else { 649 definingInstructions.add(null); 650 } 651 } 652 } 653 } 654 } 655 } 656 657 /** 658 * Generate into a new block phi nodes that define the original 659 * register defined by the loop and use two newly created 660 * registers. 661 * @param registers - vector to which defined registers need to be 662 * created registers.x used in creating phi nodes 663 * @param types - vector of corresponding types of registers. 664 * @param phiInstructions - created phi instructions 665 * @param subOptimalRegMap - mapping of orignal destination to the 666 * newly created destination for the unoptimized loop 667 * @param optimalRegMap - mapping of orignal destination to the 668 * newly created destination for the optimized loop 669 */ 670 private void generatePhiNodes(AnnotatedLSTNode loop, ArrayList<Register> registers, 671 ArrayList<TypeReference> types, ArrayList<Instruction> phiInstructions, 672 HashMap<Register, Register> subOptimalRegMap, 673 HashMap<Register, Register> optimalRegMap) { 674 // Get the carried loop iterator's register 675 Register carriedLoopIteratorRegister = ((RegisterOperand) loop.getCarriedLoopIterator()).getRegister(); 676 for (int i = 0; i < registers.size(); i++) { 677 Register register = registers.get(i); 678 TypeReference type = types.get(i); 679 Instruction phi = Phi.create(PHI, new RegisterOperand(register, type), 2); 680 phi.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI); 681 682 // new operand for optimized loop 683 Operand op0 = ir.regpool.makeTemp(type); 684 Phi.setValue(phi, OPTIMIZED_LOOP_OPERAND, op0); 685 optimalRegMap.put(register, op0.asRegister().getRegister()); 686 687 // new operand for unoptimized loop 688 Operand op1 = ir.regpool.makeTemp(type); 689 Phi.setValue(phi, UNOPTIMIZED_LOOP_OPERAND, op1); 690 subOptimalRegMap.put(register, op1.asRegister().getRegister()); 691 692 // Add the new created carried loop iterator registers to 693 // internal set to mark the optimized loops 694 if (register == carriedLoopIteratorRegister) { 695 setOptimizedLoop(op0.asRegister().getRegister()); 696 setOptimizedLoop(op1.asRegister().getRegister()); 697 } 698 699 phiInstructions.add(phi); 700 } 701 // rename any optimized inner loops registers 702 renameOptimizedLoops(subOptimalRegMap, optimalRegMap); 703 } 704 705 /** 706 * Create a clone of the loop replacing definitions in the cloned 707 * loop with those found in the register map 708 * @param loop - loop to clone 709 * @param regMap - mapping of original definition to new 710 * definition 711 * @return a mapping from original BBs to created BBs 712 */ 713 private HashMap<BasicBlock, BasicBlock> createCloneLoop(AnnotatedLSTNode loop, 714 HashMap<Register, Register> regMap, 715 HashMap<Register, BasicBlock> regToBlockMap) { 716 HashMap<BasicBlock, BasicBlock> originalToCloneBBMap = new HashMap<BasicBlock, BasicBlock>(); 717 // After the newly created loop goto the old loop header 718 originalToCloneBBMap.put(loop.successor, loop.header); 719 // Create an empty block to be the loop predecessor 720 BasicBlock new_pred = loop.header.createSubBlock(SYNTH_LOOP_VERSIONING_BCI, ir); 721 ir.cfg.linkInCodeOrder(ir.cfg.lastInCodeOrder(), new_pred); 722 originalToCloneBBMap.put(loop.predecessor, new_pred); 723 // Create copy blocks 724 Enumeration<BasicBlock> blocks = loop.getBasicBlocks(); 725 while (blocks.hasMoreElements()) { 726 BasicBlock block = blocks.nextElement(); 727 block.killFallThrough(); // get rid of fall through edges to aid recomputeNormalOuts 728 // Create copy and register mapping 729 BasicBlock copy = block.copyWithoutLinks(ir); 730 originalToCloneBBMap.put(block, copy); 731 // Link into code order 732 ir.cfg.linkInCodeOrder(ir.cfg.lastInCodeOrder(), copy); 733 // Alter register definitions and uses in copy 734 IREnumeration.AllInstructionsEnum instructions = new IREnumeration.AllInstructionsEnum(ir, copy); 735 while (instructions.hasMoreElements()) { 736 Instruction instruction = instructions.nextElement(); 737 Enumeration<Operand> operands = instruction.getDefs(); 738 while (operands.hasMoreElements()) { 739 Operand operand = operands.nextElement(); 740 if (operand.isRegister()) { 741 Register register = operand.asRegister().getRegister(); 742 if (regMap.containsKey(register)) { 743 instruction.replaceRegister(register, regMap.get(register)); 744 regToBlockMap.put(regMap.get(register), copy); 745 } 746 } 747 } 748 operands = instruction.getUses(); 749 while (operands.hasMoreElements()) { 750 Operand operand = operands.nextElement(); 751 if (operand instanceof RegisterOperand) { 752 Register register = operand.asRegister().getRegister(); 753 if (regMap.containsKey(register)) { 754 instruction.replaceRegister(register, regMap.get(register)); 755 } 756 } 757 } 758 } 759 } 760 // Fix up outs 761 // loop predecessor 762 new_pred.redirectOuts(loop.header, originalToCloneBBMap.get(loop.header), ir); 763 // loop blocks 764 blocks = loop.getBasicBlocks(); 765 while (blocks.hasMoreElements()) { 766 BasicBlock block = blocks.nextElement(); 767 BasicBlock copy = originalToCloneBBMap.get(block); 768 Enumeration<BasicBlock> outs = block.getOutNodes(); 769 while (outs.hasMoreElements()) { 770 BasicBlock out = outs.nextElement(); 771 if (originalToCloneBBMap.containsKey(out)) { 772 copy.redirectOuts(out, originalToCloneBBMap.get(out), ir); 773 } 774 } 775 } 776 // Fix up phis 777 blocks = loop.getBasicBlocks(); 778 while (blocks.hasMoreElements()) { 779 BasicBlock block = blocks.nextElement(); 780 BasicBlock copy = originalToCloneBBMap.get(block); 781 IREnumeration.AllInstructionsEnum instructions = new IREnumeration.AllInstructionsEnum(ir, copy); 782 while (instructions.hasMoreElements()) { 783 Instruction instruction = instructions.nextElement(); 784 if (Phi.conforms(instruction)) { 785 for (int i = 0; i < Phi.getNumberOfValues(instruction); i++) { 786 BasicBlock phi_predecessor = Phi.getPred(instruction, i).block; 787 if (originalToCloneBBMap.containsKey(phi_predecessor)) { 788 Phi.setPred(instruction, i, new BasicBlockOperand(originalToCloneBBMap.get(phi_predecessor))); 789 } else { 790 dumpIR(ir, "Error when optimising" + ir.getMethod()); 791 throw new Error("There's > 1 route to this phi node " + 792 instruction + 793 " from outside the loop: " + 794 phi_predecessor); 795 } 796 } 797 } 798 } 799 } 800 return originalToCloneBBMap; 801 } 802 803 /** 804 * Create a clone of the loop replacing definitions in the cloned 805 * loop with those found in the register map and eliminate 806 * unnecessary bound checks 807 * @param loop - loop to clone 808 * @param regMap - mapping of original definition to new 809 * definition 810 * @param instrToEliminate - instructions to eliminate 811 * @param regToBlockMap - mapping of a register to its defining BB 812 * @return a mapping from original BBs to created BBs 813 */ 814 private HashMap<BasicBlock, BasicBlock> createOptimizedLoop(AnnotatedLSTNode loop, 815 HashMap<Register, Register> regMap, 816 ArrayList<Instruction> instrToEliminate, 817 HashMap<Register, BasicBlock> regToBlockMap) { 818 HashMap<BasicBlock, BasicBlock> originalToCloneBBMap = new HashMap<BasicBlock, BasicBlock>(); 819 // After the newly created loop goto the old loop header 820 originalToCloneBBMap.put(loop.successor, loop.header); 821 // Create an empty block to be the loop predecessor 822 BasicBlock new_pred = loop.header.createSubBlock(SYNTH_LOOP_VERSIONING_BCI, ir); 823 ir.cfg.linkInCodeOrder(ir.cfg.lastInCodeOrder(), new_pred); 824 originalToCloneBBMap.put(loop.predecessor, new_pred); 825 826 // Create copy blocks 827 Enumeration<BasicBlock> blocks = loop.getBasicBlocks(); 828 while (blocks.hasMoreElements()) { 829 BasicBlock block = blocks.nextElement(); 830 // N.B. fall through will have been killed by unoptimized loop 831 // Create copy and register mapping 832 BasicBlock copy = block.copyWithoutLinks(ir); 833 originalToCloneBBMap.put(block, copy); 834 // Link into code order 835 ir.cfg.linkInCodeOrder(ir.cfg.lastInCodeOrder(), copy); 836 837 // Alter register definitions in copy 838 IREnumeration.AllInstructionsEnum instructions = new IREnumeration.AllInstructionsEnum(ir, copy); 839 loop_over_created_instructions: 840 while (instructions.hasMoreElements()) { 841 Instruction instruction = instructions.nextElement(); 842 if (BoundsCheck.conforms(instruction)) { 843 for (Instruction anInstrToEliminate : instrToEliminate) { 844 if (instruction.similar(anInstrToEliminate)) { 845 instruction.remove(); 846 continue loop_over_created_instructions; 847 } 848 } 849 } else if (NullCheck.conforms(instruction)) { 850 for (Instruction anInstrToEliminate : instrToEliminate) { 851 if (instruction.similar(anInstrToEliminate)) { 852 instruction.remove(); 853 continue loop_over_created_instructions; 854 } 855 } 856 } 857 Enumeration<Operand> operands = instruction.getDefs(); 858 while (operands.hasMoreElements()) { 859 Operand operand = operands.nextElement(); 860 if (operand instanceof RegisterOperand) { 861 Register register = operand.asRegister().getRegister(); 862 if (regMap.containsKey(register)) { 863 instruction.replaceRegister(register, regMap.get(register)); 864 regToBlockMap.put(regMap.get(register), copy); 865 } 866 } 867 } 868 operands = instruction.getUses(); 869 while (operands.hasMoreElements()) { 870 Operand operand = operands.nextElement(); 871 if (operand.isRegister()) { 872 Register register = operand.asRegister().getRegister(); 873 if (regMap.containsKey(register)) { 874 instruction.replaceRegister(register, regMap.get(register)); 875 } 876 } 877 } 878 } 879 } 880 // Fix up outs 881 // loop predecessor 882 new_pred.redirectOuts(loop.header, originalToCloneBBMap.get(loop.header), ir); 883 blocks = loop.getBasicBlocks(); 884 while (blocks.hasMoreElements()) { 885 BasicBlock block = blocks.nextElement(); 886 BasicBlock copy = originalToCloneBBMap.get(block); 887 Enumeration<BasicBlock> outs = block.getOutNodes(); 888 while (outs.hasMoreElements()) { 889 BasicBlock out = outs.nextElement(); 890 if (originalToCloneBBMap.containsKey(out)) { 891 copy.redirectOuts(out, originalToCloneBBMap.get(out), ir); 892 } 893 } 894 } 895 // Fix up phis 896 blocks = loop.getBasicBlocks(); 897 while (blocks.hasMoreElements()) { 898 BasicBlock block = blocks.nextElement(); 899 BasicBlock copy = originalToCloneBBMap.get(block); 900 IREnumeration.AllInstructionsEnum instructions = new IREnumeration.AllInstructionsEnum(ir, copy); 901 while (instructions.hasMoreElements()) { 902 Instruction instruction = instructions.nextElement(); 903 if (Phi.conforms(instruction)) { 904 for (int i = 0; i < Phi.getNumberOfValues(instruction); i++) { 905 BasicBlock phi_predecessor = Phi.getPred(instruction, i).block; 906 if (originalToCloneBBMap.containsKey(phi_predecessor)) { 907 Phi.setPred(instruction, i, new BasicBlockOperand(originalToCloneBBMap.get(phi_predecessor))); 908 } else { 909 throw new Error("There's > 1 route to this phi node from outside the loop: " + phi_predecessor); 910 } 911 } 912 } 913 } 914 } 915 return originalToCloneBBMap; 916 } 917 918 /** 919 * When phi nodes were generated the basic blocks weren't known for 920 * the predecessors, fix this up now. (It may also not be possible 921 * to reach the unoptimized loop any more) 922 */ 923 private void fixUpPhiPredecessors(ArrayList<Instruction> phiInstructions, BasicBlock unoptimizedLoopExit, 924 BasicBlock optimizedLoopExit) { 925 if (unoptimizedLoopExit != null) { 926 for (Instruction instruction : phiInstructions) { 927 Phi.setPred(instruction, OPTIMIZED_LOOP_OPERAND, new BasicBlockOperand(optimizedLoopExit)); 928 Phi.setPred(instruction, UNOPTIMIZED_LOOP_OPERAND, new BasicBlockOperand(unoptimizedLoopExit)); 929 } 930 } else { 931 for (Instruction instruction : phiInstructions) { 932 Operand operand = Phi.getValue(instruction, OPTIMIZED_LOOP_OPERAND); 933 Phi.resizeNumberOfPreds(instruction, 1); 934 Phi.resizeNumberOfValues(instruction, 1); 935 Phi.setValue(instruction, OPTIMIZED_LOOP_OPERAND, operand); 936 Phi.setPred(instruction, OPTIMIZED_LOOP_OPERAND, new BasicBlockOperand(optimizedLoopExit)); 937 } 938 } 939 } 940 941 /** 942 * Create the block containing explict branches to either the 943 * optimized or unoptimized loops 944 * @param optimalRegMap - mapping used to map eliminated bound and 945 * null check guards to 946 */ 947 private boolean createBranchBlocks(AnnotatedLSTNode loop, BasicBlock block, 948 ArrayList<Instruction> checksToEliminate, BasicBlock unoptimizedLoopEntry, 949 BasicBlock optimizedLoopEntry, 950 HashMap<Register, Register> optimalRegMap) { 951 BasicBlock blockOnEntry = block; 952 // 1) generate null check guards 953 block = generateNullCheckBranchBlocks(loop, checksToEliminate, optimalRegMap, block, unoptimizedLoopEntry); 954 955 // 2) generate bound check guards 956 if (loop.isMonotonic()) { 957 // create new operands for values beyond initial and terminal iterator values 958 Operand terminal; 959 Operand terminalLessStrideOnce; 960 Operand terminalPlusStrideOnce; 961 962 // NB. precomputing these makes life easier and the code easier to read, 963 // it does create dead code though 964 if (loop.terminalIteratorValue.isIntConstant()) { 965 terminal = loop.terminalIteratorValue; 966 int terminalAsInt = terminal.asIntConstant().value; 967 int stride = loop.strideValue.asIntConstant().value; 968 terminalLessStrideOnce = new IntConstantOperand(terminalAsInt - stride); 969 terminalPlusStrideOnce = new IntConstantOperand(terminalAsInt + stride); 970 } else { 971 Instruction tempInstr; 972 terminal = loop.generateLoopInvariantOperand(block, loop.terminalIteratorValue); 973 terminalLessStrideOnce = ir.regpool.makeTempInt(); 974 terminalPlusStrideOnce = ir.regpool.makeTempInt(); 975 tempInstr = 976 Binary.create(INT_SUB, terminalLessStrideOnce.asRegister(), terminal.copy(), loop.strideValue.copy()); 977 tempInstr.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI); 978 block.appendInstruction(tempInstr); 979 DefUse.updateDUForNewInstruction(tempInstr); 980 981 tempInstr = 982 Binary.create(INT_ADD, terminalPlusStrideOnce.asRegister(), terminal.copy(), loop.strideValue.copy()); 983 tempInstr.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI); 984 block.appendInstruction(tempInstr); 985 DefUse.updateDUForNewInstruction(tempInstr); 986 } 987 988 // Determine maximum and minimum index values for different loop types 989 Operand phiMinIndexValue; 990 Operand phiMaxIndexValue; 991 992 if (loop.isMonotonicIncreasing()) { 993 phiMinIndexValue = loop.initialIteratorValue; 994 if ((loop.condition.isLESS() || 995 loop.condition.isLOWER() || 996 loop.condition.isNOT_EQUAL())) { 997 phiMaxIndexValue = terminal; 998 } else if ((loop.condition.isLESS_EQUAL() || 999 loop.condition.isLOWER_EQUAL() || 1000 loop.condition.isEQUAL())) { 1001 phiMaxIndexValue = terminalPlusStrideOnce; 1002 } else { 1003 throw new Error("Unrecognised loop for fission " + loop); 1004 } 1005 } else if (loop.isMonotonicDecreasing()) { 1006 phiMaxIndexValue = loop.initialIteratorValue; 1007 if ((loop.condition.isGREATER() || 1008 loop.condition.isHIGHER() || 1009 loop.condition.isNOT_EQUAL())) { 1010 phiMinIndexValue = terminalPlusStrideOnce; 1011 } else if ((loop.condition.isGREATER_EQUAL() || 1012 loop.condition.isHIGHER_EQUAL() || 1013 loop.condition.isEQUAL())) { 1014 phiMinIndexValue = terminalLessStrideOnce; 1015 } else { 1016 throw new Error("Unrecognised loop for fission " + loop); 1017 } 1018 } else { 1019 throw new Error("Unrecognised loop for fission " + loop); 1020 } 1021 // Generate tests 1022 for (int i = 0; i < checksToEliminate.size(); i++) { 1023 Instruction instr = checksToEliminate.get(i); 1024 if (BoundsCheck.conforms(instr)) { 1025 // Have we already generated these tests? 1026 boolean alreadyChecked = false; 1027 for (int j = 0; j < i; j++) { 1028 Instruction old_instr = checksToEliminate.get(j); 1029 if (BoundsCheck.conforms(old_instr) && 1030 (BoundsCheck.getRef(old_instr).similar(BoundsCheck.getRef(instr))) && 1031 (BoundsCheck.getIndex(old_instr).similar(BoundsCheck.getIndex(instr)))) { 1032 // yes - just create a guard move 1033 alreadyChecked = true; 1034 RegisterOperand guardResult = BoundsCheck.getGuardResult(instr).copyRO(); 1035 guardResult.setRegister(optimalRegMap.get(guardResult.getRegister())); 1036 RegisterOperand guardSource = BoundsCheck.getGuardResult(old_instr).copyRO(); 1037 guardSource.setRegister(optimalRegMap.get(guardSource.getRegister())); 1038 Instruction tempInstr = Move.create(GUARD_MOVE, guardResult, guardSource); 1039 tempInstr.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI); 1040 block.appendInstruction(tempInstr); 1041 break; 1042 } 1043 } 1044 if (!alreadyChecked) { 1045 // no - generate tests 1046 Operand index = BoundsCheck.getIndex(instr); 1047 int distance = loop.getFixedDistanceFromPhiIterator(index); 1048 if (distance == 0) { 1049 block = 1050 generateExplicitBoundCheck(instr, 1051 phiMinIndexValue, 1052 phiMaxIndexValue, 1053 optimalRegMap, 1054 block, 1055 unoptimizedLoopEntry); 1056 } else { 1057 Instruction tempInstr; 1058 RegisterOperand minIndex = ir.regpool.makeTempInt(); 1059 RegisterOperand maxIndex = ir.regpool.makeTempInt(); 1060 1061 tempInstr = 1062 Binary.create(INT_ADD, minIndex, phiMinIndexValue.copy(), new IntConstantOperand(distance)); 1063 tempInstr.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI); 1064 block.appendInstruction(tempInstr); 1065 DefUse.updateDUForNewInstruction(tempInstr); 1066 1067 tempInstr = 1068 Binary.create(INT_ADD, maxIndex, phiMaxIndexValue.copy(), new IntConstantOperand(distance)); 1069 tempInstr.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI); 1070 block.appendInstruction(tempInstr); 1071 DefUse.updateDUForNewInstruction(tempInstr); 1072 1073 block = generateExplicitBoundCheck(instr, minIndex, maxIndex, optimalRegMap, block, unoptimizedLoopEntry); 1074 } 1075 } 1076 } 1077 } 1078 } 1079 // Have we had to create a new basic block since entry => we 1080 // generated a branch to the unoptimized loop 1081 boolean isUnoptimizedLoopReachable = (blockOnEntry != block); 1082 // 3) Finish up with goto and generate true guard value 1083 { 1084 Instruction branch; // the generated branch instruction 1085 branch = Goto.create(GOTO, optimizedLoopEntry.makeJumpTarget()); 1086 branch.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI); 1087 block.appendInstruction(branch); 1088 block.deleteNormalOut(); 1089 block.insertOut(optimizedLoopEntry); 1090 } 1091 return isUnoptimizedLoopReachable; 1092 } 1093 1094 /** 1095 * Generate null check branch blocks 1096 * 1097 * @param loop the current loop where checks are being eliminated 1098 * @param checksToEliminate all of the checks that are being eliminated in the pass 1099 * @param optimalRegMap a map from original register to the register used in the optimal loop 1100 * @param block the block to generate code into 1101 * @param unoptimizedLoopEntry entry to the unoptimized loop for if the check fails 1102 * @return the new block to generate code into 1103 */ 1104 private BasicBlock generateNullCheckBranchBlocks(AnnotatedLSTNode loop, 1105 ArrayList<Instruction> checksToEliminate, 1106 HashMap<Register, Register> optimalRegMap, 1107 BasicBlock block, BasicBlock unoptimizedLoopEntry) { 1108 // Map of already generated null check references to their 1109 // corresponding guard result 1110 HashMap<Register, Operand> refToGuardMap = new HashMap<Register, Operand>(); 1111 // Iterate over checks 1112 for (Instruction instr : checksToEliminate) { 1113 // Is this a null check 1114 if (NullCheck.conforms(instr)) { 1115 // the generated branch instruction 1116 Instruction branch; 1117 // the reference to compare 1118 Operand ref = AnnotatedLSTNode.follow(NullCheck.getRef(instr)); 1119 // the guard result to define 1120 RegisterOperand guardResult = NullCheck.getGuardResult(instr).copyRO(); 1121 guardResult.setRegister(optimalRegMap.get(guardResult.getRegister())); 1122 // check if we've generated this test already 1123 if (ref.isRegister() && refToGuardMap.containsKey(ref.asRegister().getRegister())) { 1124 // yes - generate just a guard move 1125 branch = Move.create(GUARD_MOVE, guardResult, refToGuardMap.get(ref.asRegister().getRegister()).copy()); 1126 branch.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI); 1127 block.appendInstruction(branch); 1128 } else { 1129 // check if we can just move a guard from the loop predecessors 1130 RegisterOperand guard = nullCheckPerformedInLoopPredecessors(loop.header, instr); 1131 if (guard != null) { 1132 // yes - generate just a guard move 1133 branch = Move.create(GUARD_MOVE, guardResult, guard.copyRO()); 1134 branch.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI); 1135 block.appendInstruction(branch); 1136 } else { 1137 // generate explicit null test 1138 branch = 1139 IfCmp.create(REF_IFCMP, 1140 guardResult, 1141 ref.copy(), 1142 new NullConstantOperand(), 1143 ConditionOperand.EQUAL(), 1144 unoptimizedLoopEntry.makeJumpTarget(), 1145 BranchProfileOperand.unlikely()); 1146 if (ref.isRegister()) { 1147 refToGuardMap.put(ref.asRegister().getRegister(), guardResult); 1148 } 1149 branch.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI); 1150 block.appendInstruction(branch); 1151 // Adjust block 1152 block.insertOut(unoptimizedLoopEntry); 1153 BasicBlock new_block = block.createSubBlock(SYNTH_LOOP_VERSIONING_BCI, ir); 1154 BasicBlock temp = (BasicBlock) block.next; 1155 ir.cfg.breakCodeOrder(block, temp); 1156 ir.cfg.linkInCodeOrder(block, new_block); 1157 ir.cfg.linkInCodeOrder(new_block, temp); 1158 block.insertOut(new_block); 1159 block = new_block; 1160 } 1161 } 1162 } 1163 } 1164 return block; 1165 } 1166 1167 /** 1168 * Generate bound check branch blocks 1169 * 1170 * @param boundCheckInstr the bound check instruction in question 1171 * @param minIndexValue the min value for an iterator a loop will generate 1172 * @param maxIndexValue the max value for an iterator a loop will generate 1173 * @param optimalRegMap a map from original register to the register used in the optimal loop 1174 * @param block the block to generate code into 1175 * @param unoptimizedLoopEntry entry to the unoptimized loop for if the check fails 1176 * @return the new block to generate code into 1177 */ 1178 private BasicBlock generateExplicitBoundCheck(Instruction boundCheckInstr, Operand minIndexValue, 1179 Operand maxIndexValue, 1180 HashMap<Register, Register> optimalRegMap, 1181 BasicBlock block, BasicBlock unoptimizedLoopEntry) { 1182 // 1) Work out what tests are necessary. NB we don't optimise for 1183 // the case when exceptions will always be generated 1184 boolean lowerBoundTestRedundant; 1185 boolean upperBoundTestRedundant; 1186 { 1187 // as array lengths must be >= 0 the lower bound test is not 1188 // necessary if: 1189 // (minIndexValue >= 0) or ((arraylength A) + zeroOrPositiveConstant) 1190 lowerBoundTestRedundant = 1191 ((minIndexValue.isIntConstant() && (minIndexValue.asIntConstant().value >= 0)) || 1192 ((getConstantAdjustedArrayLengthRef(minIndexValue) != null) && 1193 (getConstantAdjustedArrayLengthDistance(minIndexValue) >= 0))); 1194 // as the upper bound must be <= arraylength the test is not 1195 // necessary if: 1196 // maxIndexValue = (arraylength A) - zeroOrPositiveConstant 1197 Operand maxIndexArrayLengthRef = getConstantAdjustedArrayLengthRef(maxIndexValue); 1198 upperBoundTestRedundant = 1199 ((maxIndexArrayLengthRef != null) && 1200 maxIndexArrayLengthRef.similar(BoundsCheck.getRef(boundCheckInstr)) && 1201 (getConstantAdjustedArrayLengthDistance(maxIndexValue) <= 0)); 1202 } 1203 1204 // 2) Create explicit bound check 1205 1206 // register to hold result (NB it's a guard for the optimal loop) 1207 RegisterOperand guardResult = BoundsCheck.getGuardResult(boundCheckInstr).copyRO(); 1208 guardResult.setRegister(optimalRegMap.get(guardResult.getRegister())); 1209 1210 // the guard on the bound check (mapped from the optimal loop as 1211 // it should already have been generated or may already be out of 1212 // the loop) 1213 Operand origGuard = BoundsCheck.getGuard(boundCheckInstr); 1214 Operand guard = origGuard.copy(); 1215 if (origGuard.isRegister() && optimalRegMap.containsKey(origGuard.asRegister().getRegister())) { 1216 guard.asRegister().setRegister(optimalRegMap.get(origGuard.asRegister().getRegister())); 1217 } 1218 1219 if (lowerBoundTestRedundant && upperBoundTestRedundant) { 1220 // both tests redundant so just generate a guard move of the 1221 // bound check guard 1222 Instruction move = Move.create(GUARD_MOVE, guardResult, guard); 1223 move.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI); 1224 block.appendInstruction(move); 1225 } else { 1226 // 2.1) Create array length 1227 RegisterOperand array_length = ir.regpool.makeTempInt(); 1228 Instruction array_length_instr = 1229 GuardedUnary.create(ARRAYLENGTH, 1230 array_length, 1231 AnnotatedLSTNode.follow(BoundsCheck.getRef(boundCheckInstr)).copy(), 1232 guard); 1233 array_length_instr.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI); 1234 block.appendInstruction(array_length_instr); 1235 1236 // 2.2) Create minimum index test unless test is redundant 1237 if (!lowerBoundTestRedundant) { 1238 RegisterOperand lowerBoundGuard = upperBoundTestRedundant ? guardResult : ir.regpool.makeTempValidation(); 1239 // Generate bound check 1240 Instruction branch = 1241 IfCmp.create(INT_IFCMP, 1242 lowerBoundGuard, 1243 minIndexValue.copy(), 1244 array_length.copyRO(), 1245 ConditionOperand.LESS(), 1246 unoptimizedLoopEntry.makeJumpTarget(), 1247 BranchProfileOperand.unlikely()); 1248 branch.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI); 1249 block.appendInstruction(branch); 1250 // Adjust block 1251 block.insertOut(unoptimizedLoopEntry); 1252 BasicBlock new_block = block.createSubBlock(SYNTH_LOOP_VERSIONING_BCI, ir); 1253 BasicBlock temp = (BasicBlock) block.next; 1254 ir.cfg.breakCodeOrder(block, temp); 1255 ir.cfg.linkInCodeOrder(block, new_block); 1256 ir.cfg.linkInCodeOrder(new_block, temp); 1257 block.insertOut(new_block); 1258 block = new_block; 1259 } 1260 // 2.3) Create maximum index test 1261 if (!upperBoundTestRedundant) { 1262 Instruction branch = 1263 IfCmp.create(INT_IFCMP, 1264 guardResult, 1265 maxIndexValue.copy(), 1266 array_length.copyRO(), 1267 ConditionOperand.GREATER(), 1268 unoptimizedLoopEntry.makeJumpTarget(), 1269 BranchProfileOperand.unlikely()); 1270 branch.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI); 1271 block.appendInstruction(branch); 1272 // Adjust block 1273 block.insertOut(unoptimizedLoopEntry); 1274 BasicBlock new_block = block.createSubBlock(SYNTH_LOOP_VERSIONING_BCI, ir); 1275 BasicBlock temp = (BasicBlock) block.next; 1276 ir.cfg.breakCodeOrder(block, temp); 1277 ir.cfg.linkInCodeOrder(block, new_block); 1278 ir.cfg.linkInCodeOrder(new_block, temp); 1279 block.insertOut(new_block); 1280 block = new_block; 1281 } 1282 } 1283 return block; 1284 } 1285 1286 /** 1287 * Can we eliminate a null check as it has lready been performed? 1288 * NB SSA guarantees that if a value is null it must always be null 1289 * 1290 * @param instr null check instruction 1291 */ 1292 private RegisterOperand nullCheckPerformedInLoopPredecessors(BasicBlock header, Instruction instr) { 1293 if (VM.VerifyAssertions) VM._assert(NullCheck.conforms(instr)); 1294 BasicBlock block = header; 1295 do { 1296 block = ir.HIRInfo.dominatorTree.getParent(block); 1297 Instruction lastInst = block.lastInstruction(); 1298 for (Instruction itrInst = block.firstInstruction(); itrInst != lastInst; itrInst = 1299 itrInst.nextInstructionInCodeOrder()) { 1300 if (NullCheck.conforms(itrInst) && NullCheck.getRef(itrInst).similar(NullCheck.getRef(instr))) { 1301 return NullCheck.getGuardResult(itrInst); 1302 } 1303 } 1304 } while (block != ir.cfg.entry()); 1305 return null; 1306 } 1307 1308 /** 1309 * Get the array length reference ignoring instructions that adjust 1310 * its result by a fixed amount 1311 * 1312 * @param op operand to chase arraylength opcode to 1313 * constant value from an array length 1314 */ 1315 private Operand getConstantAdjustedArrayLengthRef(Operand op) { 1316 Operand result = null; 1317 if (op.isRegister()) { 1318 Instruction opInstr = AnnotatedLSTNode.definingInstruction(op); 1319 if (opInstr.operator.opcode == ARRAYLENGTH_opcode) { 1320 result = GuardedUnary.getVal(opInstr); 1321 } else if ((opInstr.operator.opcode == INT_ADD_opcode) || (opInstr.operator.opcode == INT_SUB_opcode)) { 1322 Operand val1 = Binary.getVal1(opInstr); 1323 Operand val2 = Binary.getVal2(opInstr); 1324 if (val1.isConstant()) { 1325 result = getConstantAdjustedArrayLengthRef(val2); 1326 } else if (val2.isConstant()) { 1327 result = getConstantAdjustedArrayLengthRef(val1); 1328 } 1329 } 1330 } 1331 return result; 1332 } 1333 1334 /** 1335 * Get the distance from an array length by addding up instructions 1336 * that adjust the array length result by a constant amount 1337 * 1338 * @param op operand to chase arraylength opcode to 1339 */ 1340 private int getConstantAdjustedArrayLengthDistance(Operand op) { 1341 Instruction opInstr = AnnotatedLSTNode.definingInstruction(op); 1342 if (opInstr.operator.opcode == ARRAYLENGTH_opcode) { 1343 return 0; 1344 } else if (opInstr.operator.opcode == INT_ADD_opcode) { 1345 Operand val1 = Binary.getVal1(opInstr); 1346 Operand val2 = Binary.getVal2(opInstr); 1347 if (val1.isConstant()) { 1348 return val1.asIntConstant().value + getConstantAdjustedArrayLengthDistance(val2); 1349 } else { 1350 if (VM.VerifyAssertions) VM._assert(val2.isConstant()); 1351 return getConstantAdjustedArrayLengthDistance(val1) + val2.asIntConstant().value; 1352 } 1353 } else if (opInstr.operator.opcode == INT_SUB_opcode) { 1354 Operand val1 = Binary.getVal1(opInstr); 1355 Operand val2 = Binary.getVal2(opInstr); 1356 if (val1.isConstant()) { 1357 return val1.asIntConstant().value - getConstantAdjustedArrayLengthDistance(val2); 1358 } else { 1359 if (VM.VerifyAssertions) VM._assert(val2.isConstant()); 1360 return getConstantAdjustedArrayLengthDistance(val1) - val2.asIntConstant().value; 1361 } 1362 } else { 1363 throw new Error("Unexpected opcode when computing distance " + op); 1364 } 1365 } 1366 1367 /** 1368 * Remove loop and replace register definitions in the original loop 1369 * with phi instructions 1370 */ 1371 private void modifyOriginalLoop(AnnotatedLSTNode loop, ArrayList<Instruction> phiInstructions, 1372 ArrayList<Instruction> definingInstrInOriginalLoop, 1373 HashMap<Register, Register> subOptimalRegMap, 1374 HashMap<Register, Register> optimalRegMap) { 1375 // Remove instructions from loop header and exit, remove other 1376 // loop body blocks 1377 Enumeration<BasicBlock> blocks = loop.getBasicBlocks(); 1378 while (blocks.hasMoreElements()) { 1379 BasicBlock block = blocks.nextElement(); 1380 if ((block == loop.header) || (block == loop.exit)) { 1381 IREnumeration.AllInstructionsEnum instructions = new IREnumeration.AllInstructionsEnum(ir, block); 1382 while (instructions.hasMoreElements()) { 1383 Instruction instruction = instructions.nextElement(); 1384 if (!BBend.conforms(instruction) && !Label.conforms(instruction)) { 1385 instruction.remove(); 1386 } 1387 } 1388 } else { 1389 ir.cfg.removeFromCFGAndCodeOrder(block); 1390 } 1391 } 1392 1393 // Place phi instructions in loop header 1394 for (int i = 0; i < phiInstructions.size(); i++) { 1395 Instruction origInstr = definingInstrInOriginalLoop.get(i); 1396 // Did the original instructions value escape the loop? 1397 if (origInstr != null) { 1398 // Was this a phi of a phi? 1399 if (Phi.conforms(origInstr)) { 1400 Instruction phi = phiInstructions.get(i); 1401 boolean phiHasUnoptimizedArg = Phi.getNumberOfValues(phi) == 2; 1402 // Phi of a phi - so make sure that we get the value to escape the loop, not the value at the loop header 1403 boolean fixed = false; 1404 for (int index = 0; index < Phi.getNumberOfPreds(origInstr); index++) { 1405 BasicBlockOperand predOp = Phi.getPred(origInstr, index); 1406 // Only worry about values who are on the backward branch 1407 if (predOp.block == loop.exit) { 1408 if (fixed) { // We've tried to do 2 replaces => something wrong 1409 SSA.printInstructions(ir); 1410 OptimizingCompilerException.UNREACHABLE("LoopVersioning", 1411 "Phi node in loop header with multiple in loop predecessors"); 1412 } 1413 Operand rval = Phi.getValue(origInstr, index); 1414 if (rval.isRegister()) { 1415 // Sort out registers 1416 Register origRegPhiRval = rval.asRegister().getRegister(); 1417 Register subOptRegPhiRval; 1418 Register optRegPhiRval; 1419 if (!subOptimalRegMap.containsKey(origRegPhiRval)) { 1420 // Register comes from loop exit but it wasn't defined in the loop 1421 subOptRegPhiRval = origRegPhiRval; 1422 optRegPhiRval = origRegPhiRval; 1423 } else { 1424 subOptRegPhiRval = subOptimalRegMap.get(origRegPhiRval); 1425 optRegPhiRval = optimalRegMap.get(origRegPhiRval); 1426 } 1427 if (phiHasUnoptimizedArg) { 1428 Phi.getValue(phi, UNOPTIMIZED_LOOP_OPERAND).asRegister().setRegister(subOptRegPhiRval); 1429 } 1430 Phi.getValue(phi, OPTIMIZED_LOOP_OPERAND).asRegister().setRegister(optRegPhiRval); 1431 } else if (rval.isConstant()) { 1432 // Sort out constants 1433 if (phiHasUnoptimizedArg) { 1434 Phi.setValue(phi, UNOPTIMIZED_LOOP_OPERAND, rval.copy()); 1435 } 1436 Phi.setValue(phi, OPTIMIZED_LOOP_OPERAND, rval.copy()); 1437 } else if (rval instanceof HeapOperand) { 1438 // Sort out heap variables 1439 @SuppressWarnings("unchecked") // Cast to generic type 1440 HeapVariable<Object> origPhiRval = ((HeapOperand) rval).value; 1441 HeapVariable<Object> subOptPhiRval; 1442 HeapVariable<Object> optPhiRval; 1443 if (true /*subOptimalRegMap.containsKey(origPhiRval) == false*/) { 1444 // currently we only expect to optimise scalar SSA 1445 // form 1446 subOptPhiRval = origPhiRval; 1447 optPhiRval = origPhiRval; 1448 } else { 1449 /* 1450 subOptPhiRval = (HeapVariable)subOptimalRegMap.get(origPhiRval); 1451 optPhiRval = (HeapVariable)optimalRegMap.get(origPhiRval); 1452 */ 1453 } 1454 if (phiHasUnoptimizedArg) { 1455 Phi.setValue(phi, UNOPTIMIZED_LOOP_OPERAND, new HeapOperand<Object>(subOptPhiRval)); 1456 } 1457 Phi.setValue(phi, OPTIMIZED_LOOP_OPERAND, new HeapOperand<Object>(optPhiRval)); 1458 } else { 1459 OptimizingCompilerException.UNREACHABLE("LoopVersioning", 1460 "Unknown operand type", 1461 rval.toString()); 1462 } 1463 fixed = true; 1464 } 1465 } 1466 } 1467 // Add back to loop 1468 loop.header.appendInstruction(phiInstructions.get(i)); 1469 } 1470 } 1471 // Remove original loop and branch to loop successor 1472 Instruction tempInstr; 1473 if (loop.header != loop.exit) { 1474 tempInstr = Goto.create(GOTO, loop.exit.makeJumpTarget()); 1475 tempInstr.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI); 1476 loop.header.appendInstruction(tempInstr); 1477 loop.header.deleteNormalOut(); 1478 loop.header.insertOut(loop.exit); 1479 1480 } 1481 tempInstr = Goto.create(GOTO, loop.successor.makeJumpTarget()); 1482 tempInstr.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI); 1483 loop.exit.appendInstruction(tempInstr); 1484 loop.exit.deleteNormalOut(); 1485 loop.exit.insertOut(loop.successor); 1486 } 1487 1488 /** 1489 * Remove unreachable unoptimized loop 1490 */ 1491 private void removeUnoptimizedLoop(AnnotatedLSTNode loop, 1492 HashMap<BasicBlock, BasicBlock> unoptimizedLoopMap) { 1493 Enumeration<BasicBlock> blocks = loop.getBasicBlocks(); 1494 report("removing unoptimized loop"); 1495 BasicBlock block = unoptimizedLoopMap.get(loop.predecessor); 1496 report("removing block " + block); 1497 ir.cfg.removeFromCFGAndCodeOrder(block); 1498 while (blocks.hasMoreElements()) { 1499 block = unoptimizedLoopMap.get(blocks.nextElement()); 1500 if (!loop.contains(block)) { 1501 report("removing block " + block); 1502 ir.cfg.removeFromCFGAndCodeOrder(block); 1503 } else { 1504 report("not removing block that's in the original loop" + block); 1505 } 1506 } 1507 } 1508 1509 /** 1510 * Put the optimized loop's iterator register into the hash set 1511 * 1512 * @param reg register 1513 */ 1514 private void setOptimizedLoop(Register reg) { 1515 loopRegisterSet.add(reg); 1516 } 1517 1518 /** 1519 * Check whether the loop that contain such iterator register had 1520 * been optimized 1521 * 1522 * @param reg register 1523 * @return the test result 1524 */ 1525 private boolean isOptimizedLoop(Register reg) { 1526 return loopRegisterSet.contains(reg); 1527 } 1528 1529 /** 1530 * Rename the iterators for optimized loops so we can tell they are still optimized 1531 */ 1532 private void renameOptimizedLoops(HashMap<Register, Register> subOptimalRegMap, 1533 HashMap<Register, Register> optimalRegMap) { 1534 Iterator<Register> itr = loopRegisterSet.iterator(); 1535 while (itr.hasNext()) { 1536 Register reg = itr.next(); 1537 if (subOptimalRegMap.containsKey(reg)) { 1538 loopRegisterSet.remove(reg); 1539 loopRegisterSet.add(subOptimalRegMap.get(reg)); 1540 loopRegisterSet.add(optimalRegMap.get(reg)); 1541 itr = loopRegisterSet.iterator(); 1542 } 1543 } 1544 } 1545 }