001 /* 002 * This file is part of the Jikes RVM project (http://jikesrvm.org). 003 * 004 * This file is licensed to You under the Eclipse Public License (EPL); 005 * You may not use this file except in compliance with the License. You 006 * may obtain a copy of the License at 007 * 008 * http://www.opensource.org/licenses/eclipse-1.0.php 009 * 010 * See the COPYRIGHT.txt file distributed with this work for information 011 * regarding copyright ownership. 012 */ 013 package org.jikesrvm.compilers.opt.controlflow; 014 015 import static org.jikesrvm.compilers.opt.ir.Operators.GOTO; 016 017 import java.util.Enumeration; 018 import java.util.HashMap; 019 import java.util.LinkedHashMap; 020 import java.util.LinkedHashSet; 021 import java.util.TreeSet; 022 023 import org.jikesrvm.VM; 024 import org.jikesrvm.compilers.opt.OptOptions; 025 import org.jikesrvm.compilers.opt.driver.CompilerPhase; 026 import org.jikesrvm.compilers.opt.ir.BasicBlock; 027 import org.jikesrvm.compilers.opt.ir.Goto; 028 import org.jikesrvm.compilers.opt.ir.IR; 029 import org.jikesrvm.compilers.opt.ir.Instruction; 030 import org.jikesrvm.compilers.opt.ir.WeightedBranchTargets; 031 import org.jikesrvm.compilers.opt.ir.operand.BranchOperand; 032 033 /** 034 * Reorder code layout of basic blocks for improved I-cache locality and 035 * branch prediction. This code assumes that basic block frequencies have 036 * been computed and blocks have been marked infrequent. 037 * This pass actually implements two code placement algorithms: 038 * <ul> 039 * <li>(1) A simple 'fluff' removal pass that moves all infrequent basic blocks 040 * to the end of the code order. 041 * <li>(2) Pettis and Hansen Algo2. 042 * </ul> 043 */ 044 public final class ReorderingPhase extends CompilerPhase { 045 046 private static final boolean DEBUG = false; 047 048 /** 049 * Return this instance of this phase. This phase contains no 050 * per-compilation instance fields. 051 * @param ir not used 052 * @return this 053 */ 054 @Override 055 public CompilerPhase newExecution(IR ir) { 056 return this; 057 } 058 059 @Override 060 public boolean shouldPerform(OptOptions options) { 061 return options.REORDER_CODE; 062 } 063 064 @Override 065 public boolean printingEnabled(OptOptions options, boolean before) { 066 return DEBUG; 067 } 068 069 @Override 070 public String getName() { 071 return "Code Reordering"; 072 } 073 074 /** 075 * Reorder basic blocks either by trivially moving infrequent blocks 076 * to the end of the code order or by applying Pettis and Hansen Algo2. 077 * 078 * We will rearrange basic blocks and insert/remove 079 * unconditional GOTO's if needed. It does not clean up branches, 080 * by reversing the branch condition, however. That is saved for 081 * BranchOptimizations. 082 */ 083 @Override 084 public void perform(IR ir) { 085 ir.cfg.entry().clearInfrequent(); 086 if (ir.options.REORDER_CODE_PH) { 087 // Do Pettis and Hansen PLDI'90 Algo2 088 doPettisHansenAlgo2(ir); 089 } else { 090 // Simple algorithm: just move infrequent code to the end 091 exileInfrequentBlocks(ir); 092 } 093 } 094 095 ///////////////////////// 096 // Code for trivial algorithm 097 ///////////////////////// 098 099 /** 100 * Select a new basic block ordering via a simple heuristic 101 * that moves all infrequent basic blocks to the end. 102 * @param ir the IR object to reorder 103 */ 104 private void exileInfrequentBlocks(IR ir) { 105 // (1) Look to see if there are infrequent blocks 106 // Also count how many blocks there are. 107 int numBlocks = 0; 108 boolean foundSome = false; 109 for (Enumeration<BasicBlock> e = ir.getBasicBlocks(); e.hasMoreElements();) { 110 BasicBlock bb = e.nextElement(); 111 numBlocks++; 112 foundSome |= bb.getInfrequent(); 113 } 114 if (!foundSome) return; // Nothing to do 115 116 // Reorder the blocks to exile the infrequent blocks. 117 // Relative order within the set of frequent and infrequent is unchanged. 118 BasicBlock[] newOrdering = new BasicBlock[numBlocks]; 119 int idx = 0; 120 // First append frequent blocks to newOrdering 121 for (BasicBlock bb = ir.cfg.firstInCodeOrder(); bb != null; bb = bb.nextBasicBlockInCodeOrder()) { 122 if (!bb.getInfrequent()) { 123 newOrdering[idx++] = bb; 124 } 125 } 126 // Next append infrequent blocks to newOrdering 127 for (BasicBlock bb = ir.cfg.firstInCodeOrder(); bb != null; bb = bb.nextBasicBlockInCodeOrder()) { 128 if (bb.getInfrequent()) { 129 newOrdering[idx++] = bb; 130 } 131 } 132 133 if (VM.VerifyAssertions) VM._assert(idx == numBlocks); // don't lose blocks! 134 if (VM.VerifyAssertions) VM._assert(ir.cfg.entry() == newOrdering[0]); 135 136 // Add/remove unconditional goto's as needed. 137 for (int i = 0; i < newOrdering.length; i++) { 138 Instruction lastInstr = newOrdering[i].lastRealInstruction(); 139 // Append a GOTO if needed to maintain old fallthrough semantics. 140 BasicBlock fallthroughBlock = newOrdering[i].getFallThroughBlock(); 141 if (fallthroughBlock != null) { 142 if (i == newOrdering.length - 1 || fallthroughBlock != newOrdering[i + 1]) { 143 // Add unconditional goto to preserve old fallthrough semantics 144 newOrdering[i].appendInstruction(fallthroughBlock.makeGOTO()); 145 } 146 } 147 // Remove last instruction if it is a redundant GOTO that 148 // can be implemented by a fallthrough edge in the new ordering. 149 // (Only possible if newOrdering[i] is not the last basic block.) 150 if (i < newOrdering.length - 1 && lastInstr != null && lastInstr.operator() == GOTO) { 151 BranchOperand op = Goto.getTarget(lastInstr); 152 if (op.target.getBasicBlock() == newOrdering[i + 1]) { 153 // unconditional goto is redundant in new ordering 154 lastInstr.remove(); 155 } 156 } 157 } 158 159 // Re-insert all basic blocks according to new ordering 160 ir.cfg.clearCodeOrder(); 161 for (BasicBlock bb : newOrdering) { 162 ir.cfg.addLastInCodeOrder(bb); 163 } 164 } 165 166 ///////////////////////// 167 // Code for P&H Algo2 168 ///////////////////////// 169 170 /** 171 * Reorder code using Algo2 (Bottom-Up Positioning) from 172 * Pettis and Hansen PLDI'90. 173 * @param ir the IR to reorder. 174 */ 175 private void doPettisHansenAlgo2(IR ir) { 176 // (1) Setup: 177 // (a) Count the blocks 178 // (b) Create a sorted set of CFG edges 179 // (c) Create a set of blocks 180 // (d) Make fallthroughs explict by adding GOTOs 181 int numBlocks = 0; 182 TreeSet<Edge> edges = new TreeSet<Edge>(); 183 LinkedHashSet<BasicBlock> chainHeads = new LinkedHashSet<BasicBlock>(); 184 BasicBlock entry = ir.cfg.entry(); 185 if (VM.VerifyAssertions) VM._assert(ir.cfg.entry() == ir.cfg.firstInCodeOrder()); 186 187 for (BasicBlock bb = entry; bb != null; bb = bb.nextBasicBlockInCodeOrder()) { 188 numBlocks++; 189 chainHeads.add(bb); 190 bb.scratchObject = bb; 191 BasicBlock ft = bb.getFallThroughBlock(); 192 if (ft != null) { 193 bb.appendInstruction(Goto.create(GOTO, ft.makeJumpTarget())); 194 } 195 float bw = bb.getExecutionFrequency(); 196 for (WeightedBranchTargets wbt = new WeightedBranchTargets(bb); wbt.hasMoreElements(); wbt.advance()) { 197 edges.add(new Edge(bb, wbt.curBlock(), wbt.curWeight() * bw)); 198 } 199 } 200 201 if (DEBUG) VM.sysWriteln("Edges = " + edges); 202 203 // (2) Build chains 204 ir.cfg.clearCodeOrder(); 205 for (Edge e : edges) { 206 // If the source of the edge is the last block in its chain 207 // and the target of the edge is the first block in its chain 208 // then merge the chains. 209 if (DEBUG) VM.sysWriteln("Processing edge " + e); 210 if (e.target == entry) { 211 if (DEBUG) VM.sysWriteln("\tCan't put entry block in interior of chain"); 212 continue; 213 } 214 if (e.source.nextBasicBlockInCodeOrder() != null) { 215 if (DEBUG) VM.sysWriteln("\tSource is not at end of a chain"); 216 continue; 217 } 218 if (e.target.prevBasicBlockInCodeOrder() != null) { 219 if (DEBUG) VM.sysWriteln("\tTarget is not at start of a chain"); 220 continue; 221 } 222 if (e.source.scratchObject == e.target.scratchObject) { 223 if (DEBUG) VM.sysWriteln("\tSource and target are in same chain"); 224 continue; 225 } 226 if (DEBUG) VM.sysWriteln("\tMerging chains"); 227 chainHeads.remove(e.target); 228 ir.cfg.linkInCodeOrder(e.source, e.target); 229 // Yuck....we should really use near-linear time union find here 230 // Doing this crappy thing makes us O(N^2) in the worst case. 231 BasicBlock newChain = (BasicBlock) e.source.scratchObject; 232 for (BasicBlock ptr = e.target; ptr != null; ptr = ptr.nextBasicBlockInCodeOrder()) { 233 ptr.scratchObject = newChain; 234 } 235 } 236 237 if (DEBUG) VM.sysWriteln("Chains constructed "); 238 LinkedHashMap<BasicBlock, ChainInfo> chainInfo = new LinkedHashMap<BasicBlock, ChainInfo>(); 239 for (BasicBlock head : chainHeads) { 240 if (DEBUG) dumpChain(head); 241 chainInfo.put(head, new ChainInfo(head)); 242 } 243 244 // (3) Summarize inter-chain edges. 245 for (Edge e : edges) { 246 if (e.source.scratchObject != e.target.scratchObject) { 247 Object sourceChain = e.source.scratchObject; 248 Object targetChain = e.target.scratchObject; 249 ChainInfo sourceInfo = chainInfo.get(sourceChain); 250 ChainInfo targetInfo = chainInfo.get(targetChain); 251 if (DEBUG) VM.sysWriteln("Inter-chain edge " + sourceChain + "->" + targetChain + " (" + e.weight + ")"); 252 Object value = sourceInfo.outWeights.get(targetInfo); 253 float weight = e.weight; 254 if (value != null) { 255 weight += (Float) value; 256 } 257 sourceInfo.outWeights.put(targetInfo, weight); 258 targetInfo.inWeight += e.weight; 259 if (DEBUG) VM.sysWriteln("\t" + targetInfo + "," + sourceInfo.outWeights.get(targetInfo)); 260 } 261 } 262 263 if (DEBUG) VM.sysWriteln("Chain Info " + chainInfo); 264 265 // (4) Construct a total order of the chains, guided by the interchain edge weights. 266 // Constructing an optimal order is NP-Hard, so we apply the following heuristic. 267 // The chain that starts with the entry node is placed first. 268 // At each step, pick the chain with the maximal placedWeight (incoming edges from chains 269 // that are already placed) and minimal inWeight (incoming edges from chains that are not 270 // already placed). Prefer a node with non-zero placedWeight and inWeight to one that has 271 // zeros for both. (A node with both zero placedWeight and zero inWeight is something that 272 // the profile data predicts is not reachable via normal control flow from the entry node). 273 BasicBlock lastNode = null; 274 ChainInfo nextChoice = chainInfo.get(entry); 275 int numPlaced = 0; 276 ir.cfg.setFirstNode(entry); 277 while (true) { 278 if (DEBUG) VM.sysWriteln("Placing chain " + nextChoice); 279 // Append nextChoice to the previous chain 280 if (lastNode != null) ir.cfg.linkInCodeOrder(lastNode, nextChoice.head); 281 for (BasicBlock ptr = nextChoice.head; ptr != null; ptr = ptr.nextBasicBlockInCodeOrder()) { 282 numPlaced++; 283 lastNode = ptr; 284 } 285 // update ChainInfo 286 chainInfo.remove(nextChoice.head); 287 if (chainInfo.isEmpty()) break; // no chains left to place. 288 for (ChainInfo target : nextChoice.outWeights.keySet()) { 289 if (DEBUG) VM.sysWrite("\toutedge " + target); 290 float weight = (Float) nextChoice.outWeights.get(target); 291 if (DEBUG) VM.sysWriteln(" = " + weight); 292 target.placedWeight += weight; 293 target.inWeight -= weight; 294 } 295 296 if (DEBUG) VM.sysWriteln("Chain Info " + chainInfo); 297 298 // Find the next chain to append. 299 nextChoice = null; 300 for (ChainInfo cand : chainInfo.values()) { 301 if (cand.placedWeight > 0f) { 302 if (nextChoice == null) { 303 if (DEBUG) VM.sysWriteln("First reachable candidate " + cand); 304 nextChoice = cand; 305 } else if (cand.inWeight > nextChoice.inWeight || 306 (cand.inWeight == nextChoice.inWeight && cand.placedWeight > nextChoice.placedWeight)) { 307 if (DEBUG) VM.sysWriteln(cand + " is a better choice than " + nextChoice); 308 nextChoice = cand; 309 } 310 } 311 } 312 if (nextChoice != null) continue; 313 314 // All remaining chains are fluff (not reachable from entry). 315 // Pick one with minimal inWeight and continue. 316 for (ChainInfo cand : chainInfo.values()) { 317 if (nextChoice == null) { 318 if (DEBUG) VM.sysWriteln("First candidate " + cand); 319 nextChoice = cand; 320 } else if (cand.inWeight < nextChoice.inWeight) { 321 if (DEBUG) VM.sysWriteln(cand + " is a better choice than " + nextChoice); 322 nextChoice = cand; 323 } 324 } 325 } 326 327 if (VM.VerifyAssertions) VM._assert(numPlaced == numBlocks); // Don't lose blocks!! 328 ir.cfg.setLastNode(lastNode); 329 } 330 331 private void dumpChain(BasicBlock head) { 332 VM.sysWrite("{" + head); 333 for (BasicBlock next = head.nextBasicBlockInCodeOrder(); next != null; next = next.nextBasicBlockInCodeOrder()) 334 { 335 VM.sysWrite(", " + next); 336 } 337 VM.sysWriteln("}"); 338 } 339 340 private static class ChainInfo { 341 final BasicBlock head; 342 float placedWeight; 343 float inWeight; 344 final HashMap<ChainInfo, Object> outWeights = new HashMap<ChainInfo, Object>(); 345 346 ChainInfo(BasicBlock h) { 347 head = h; 348 } 349 350 @Override 351 public String toString() { 352 return "[" + head + "," + placedWeight + "," + inWeight + "] " + outWeights.size(); 353 } 354 } 355 356 private static final class Edge implements Comparable<Edge> { 357 final BasicBlock source; 358 final BasicBlock target; 359 final float weight; 360 361 Edge(BasicBlock s, BasicBlock t, float w) { 362 source = s; 363 target = t; 364 weight = w; 365 } 366 367 @Override 368 public String toString() { 369 return weight + ": " + source.toString() + " -> " + target.toString(); 370 } 371 372 @Override 373 public int compareTo(Edge that) { 374 if (weight < that.weight) { 375 return 1; 376 } else if (weight > that.weight) { 377 return -1; 378 } else { 379 // Equal weights. 380 // Sort based on original code ordering, which is implied by block number 381 if (source.getNumber() < that.source.getNumber()) { 382 return 1; 383 } else if (source.getNumber() > that.source.getNumber()) { 384 return -1; 385 } else { 386 if (target.getNumber() > that.target.getNumber()) { 387 return 1; 388 } else if (source.getNumber() < that.target.getNumber()) { 389 return -1; 390 } else { 391 return 0; 392 } 393 } 394 } 395 } 396 } 397 }