001 /* 002 * This file is part of the Jikes RVM project (http://jikesrvm.org). 003 * 004 * This file is licensed to You under the Eclipse Public License (EPL); 005 * You may not use this file except in compliance with the License. You 006 * may obtain a copy of the License at 007 * 008 * http://www.opensource.org/licenses/eclipse-1.0.php 009 * 010 * See the COPYRIGHT.txt file distributed with this work for information 011 * regarding copyright ownership. 012 */ 013 package org.jikesrvm.compilers.opt.controlflow; 014 015 import java.lang.reflect.Constructor; 016 import java.util.Arrays; 017 import java.util.Enumeration; 018 019 import org.jikesrvm.VM; 020 import org.jikesrvm.compilers.opt.OptOptions; 021 import org.jikesrvm.compilers.opt.driver.CompilerPhase; 022 import org.jikesrvm.compilers.opt.ir.BasicBlock; 023 import org.jikesrvm.compilers.opt.ir.IR; 024 import org.jikesrvm.compilers.opt.ir.WeightedBranchTargets; 025 026 /** 027 * Derive relative basic block execution frequencies from branch probabilities.<p> 028 * 029 * This code assumes that the loop structure tree can be constructed for 030 * the CFG in question. This implies that the CFG is reducible. <p> 031 * 032 * The basic algorithm is as follows: 033 * <ul> 034 * <li> Construct the loop structure tree for the CFG. </li> 035 * <li> In a postorder traversal, compute the loop multiplier for each loop. 036 * The loop multiplier is a number such that the execution frequency of 037 * the loop pre-header times the loop multiplier is equal to the 038 * execution frequency of the loop head. This can be derived by computing 039 * the loop exit weight (the probability of exiting the loop) and applying 040 * Kirchoff's law that flow in is equal to flow out. Loop exit weight 041 * can be computed in a single topological (ignoring backedges) traversal 042 * of the nodes in the loop. </li> 043 * <li> Assign the entry node weight 1. In a topological traversal of the CFG 044 * (ignoring backedges), propagate the weights. When processing a loop head, 045 * multiply the incoming weight by the loop multiplier.</li> 046 * </ul> 047 */ 048 public class EstimateBlockFrequencies extends CompilerPhase { 049 050 /** 051 * The IR on which to operate. 052 */ 053 private IR ir; 054 055 /** 056 * The loop structure tree of said IR 057 */ 058 private LSTGraph lst; 059 060 /** 061 * Constructor for this compiler phase 062 */ 063 private static final Constructor<CompilerPhase> constructor = 064 getCompilerPhaseConstructor(EstimateBlockFrequencies.class); 065 066 /** 067 * Get a constructor object for this compiler phase 068 * @return compiler phase constructor 069 */ 070 @Override 071 public Constructor<CompilerPhase> getClassConstructor() { 072 return constructor; 073 } 074 075 /** 076 * Topological ordering (ignoring backedges) of CFG 077 */ 078 private BasicBlock[] topOrder; 079 080 @Override 081 public String getName() { return "Estimate Block Frequencies"; } 082 083 @Override 084 public void reportAdditionalStats() { 085 VM.sysWrite(" "); 086 VM.sysWrite(container.counter1 / container.counter2 * 100, 2); 087 VM.sysWrite("% Infrequent BBs"); 088 } 089 090 /** 091 * Compute relative basic block frequencies for the argument IR based on the 092 * branch probability information on each conditional and multiway branch.<p> 093 * 094 * Assumptions: 095 * <ol> 096 * <li>LST is valid 097 * <li>basic block numbering is dense (compact has been called) 098 * </ol> 099 * 100 * @param _ir the IR on which to apply the phase 101 */ 102 @Override 103 public void perform(IR _ir) { 104 // Prepare 105 ir = _ir; 106 107 if (ir.options.PROFILE_FREQUENCY_STRATEGY == OptOptions.PROFILE_DUMB_FREQ) { 108 setDumbFrequencies(ir); 109 return; 110 } 111 112 ir.cfg.resetTopSorted(); 113 ir.cfg.buildTopSort(); 114 topOrder = new BasicBlock[ir.cfg.numberOfNodes()]; 115 int idx = 0; 116 for (BasicBlock ptr = ir.cfg.entry(); ptr != null; ptr = (BasicBlock) ptr.getForwardSortedNext()) { 117 topOrder[idx++] = ptr; 118 ptr.setExecutionFrequency(0f); 119 ptr.clearScratchFlag(); 120 } 121 122 // Get pre-computed LST from IR. 123 lst = ir.HIRInfo.loopStructureTree; 124 125 // Compute loop multipliers 126 if (lst != null) { 127 computeLoopMultipliers(lst.getRoot()); 128 for (Enumeration<BasicBlock> e = ir.getBasicBlocks(); e.hasMoreElements();) { 129 BasicBlock bb = e.nextElement(); 130 bb.setExecutionFrequency(0f); 131 bb.clearScratchFlag(); 132 } 133 } 134 135 // Compute execution frequency of each basic block 136 computeBlockFrequencies(); 137 138 // Set infrequent bits on basic blocks 139 computeInfrequentBlocks(ir); 140 } 141 142 /** 143 * Set the frequency of each basic block to 1.0f. 144 */ 145 private void setDumbFrequencies(IR ir) { 146 for (Enumeration<BasicBlock> e = ir.getBasicBlocks(); e.hasMoreElements();) { 147 BasicBlock bb = e.nextElement(); 148 bb.setExecutionFrequency(1f); 149 } 150 } 151 152 /** 153 * Compute which blocks are infrequent.<p> 154 * 155 * Algorithm: 156 * <ol> 157 * <li>let f = INFREQUENT_THRESHOLD. 158 * <li>Start with S = {all basic blocks}. 159 * <li>Sort the blocks by frequency. Starting with the most frequent 160 * blocks, remove blocks from S until the sum of block frequencies in S 161 * <= f. Then blocks in S are infrequent. 162 * </ol> 163 * </pre> 164 * 165 * @param ir the governing IR. 166 */ 167 private void computeInfrequentBlocks(IR ir) { 168 int i = 0; 169 float[] freq = new float[ir.getMaxBasicBlockNumber()]; 170 float total = 0f; 171 // count the total frequency of all blocks 172 for (Enumeration<BasicBlock> e = ir.getBasicBlocks(); e.hasMoreElements();) { 173 BasicBlock bb = e.nextElement(); 174 freq[i] = bb.getExecutionFrequency(); 175 total += freq[i]; 176 i++; 177 } 178 // sort the frequencies (ascending); 179 Arrays.sort(freq); 180 float f = ir.options.PROFILE_INFREQUENT_THRESHOLD; 181 float goal = (1f - f) * total; 182 total = 0f; 183 float threshold = 0f; 184 // add up the frequencies (descending) until we real the goal. 185 for (i = freq.length - 1; i >= 0 && total < goal; i--) { 186 threshold = freq[i]; 187 total += threshold; 188 } 189 // go back and set infrequent bits. 190 for (Enumeration<BasicBlock> e = ir.getBasicBlocks(); e.hasMoreElements();) { 191 BasicBlock bb = e.nextElement(); 192 if (bb.getExecutionFrequency() < threshold) { 193 bb.setInfrequent(); 194 container.counter1++; 195 } else { 196 bb.clearInfrequent(); 197 } 198 container.counter2++; 199 } 200 } 201 202 /** 203 * Postorder traversal of LST computing loop multiplier and loop exits 204 * for each loop. 205 */ 206 private void computeLoopMultipliers(LSTNode n) { 207 for (Enumeration<LSTNode> e = n.getChildren(); e.hasMoreElements();) { 208 computeLoopMultipliers(e.nextElement()); 209 } 210 if (n != lst.getRoot()) { 211 computeMultiplier(n); 212 n.header.clearScratchFlag(); // so we won't ignore when processing enclosing loop 213 } 214 } 215 216 /** 217 * Compute the loop multiplier for this loop nest 218 */ 219 private void computeMultiplier(LSTNode n) { 220 n.initializeLoopExits(); 221 computeNodeWeights(n); 222 float loopExitWeight = computeLoopExitWeight(n); 223 n.loopMultiplier = 1.0f / loopExitWeight; 224 } 225 226 /** 227 * Propagate execution frequencies through the loop. 228 * Also records loop exit edges in loopExits. 229 */ 230 private void computeNodeWeights(LSTNode n) { 231 n.header.setExecutionFrequency(1f); 232 int idx = 0; 233 while (topOrder[idx] != n.header) idx++; 234 for (int numNodes = n.loop.populationCount(); numNodes > 0;) { 235 if (idx >= topOrder.length) { 236 numNodes--; 237 continue; 238 } 239 BasicBlock cur = topOrder[idx++]; 240 if (cur == null) { 241 numNodes--; 242 continue; 243 } 244 if (!n.loop.get(cur.getNumber())) continue; // node was not in the loop nest being processed. 245 LSTNode other = lst.getLoop(cur); 246 if (other != n) { 247 if (cur == other.header) { 248 // loop header of nested loop 249 numNodes -= other.loop.populationCount(); 250 } 251 continue; // skip over nodes in nested loop. 252 } 253 254 numNodes--; 255 cur.setScratchFlag(); 256 float weight = cur.getExecutionFrequency(); 257 for (WeightedBranchTargets wbt = new WeightedBranchTargets(cur); wbt.hasMoreElements(); wbt.advance()) { 258 processEdge(n, cur, wbt.curBlock(), wbt.curWeight(), weight); 259 } 260 } 261 } 262 263 private void processEdge(LSTNode n, BasicBlock source, BasicBlock target, float prob, float weight) { 264 if (target.getScratchFlag()) return; // ignore backedge 265 if (n.loop.get(target.getNumber())) { 266 LSTNode other = lst.getLoop(target); 267 if (other == n) { 268 target.augmentExecutionFrequency(prob * weight); 269 } else { 270 // header of nested loop; pass prob and weight through to targets of loop exits 271 // Algorithm: find the total loopExitWeight, then distribute prob and weight 272 // in ratio to the exit weight for each exit. 273 // Effectively we are treating the nested loop as an n-way branch to its loop exits. 274 target.setScratchFlag(); 275 float exitWeight = computeLoopExitWeight(other); 276 for (LSTNode.Edge exit : other.loopExits) { 277 float myWeight = exit.source.getExecutionFrequency() * exit.probability; 278 float myFraction = myWeight / exitWeight; 279 processEdge(n, source, exit.target, prob * myFraction, weight); 280 } 281 target.clearScratchFlag(); 282 } 283 } else { 284 n.addLoopExit(source, target, prob); 285 } 286 } 287 288 private float computeLoopExitWeight(LSTNode n) { 289 float exitWeight = 0f; 290 for (LSTNode.Edge exit : n.loopExits) { 291 exitWeight += (exit.source.getExecutionFrequency() * exit.probability); 292 } 293 // Kludge: if we think the loop has no exits, lets pretend that there is a 1% 294 // chance of exiting to avoid getting NaN's in our computation. 295 return exitWeight == 0f ? 0.01f : exitWeight; 296 } 297 298 private void computeBlockFrequencies() { 299 ir.cfg.entry().setExecutionFrequency(1f); 300 for (BasicBlock cur : topOrder) { 301 if (cur == null || cur.isExit()) continue; // ignore exit node. 302 if (lst != null) { 303 LSTNode loop = lst.getLoop(cur); 304 if (loop != null && loop.header == cur) { 305 cur.setExecutionFrequency(cur.getExecutionFrequency() * loop.loopMultiplier); 306 } 307 } 308 float weight = cur.getExecutionFrequency(); 309 cur.setScratchFlag(); 310 311 for (WeightedBranchTargets wbt = new WeightedBranchTargets(cur); wbt.hasMoreElements(); wbt.advance()) { 312 BasicBlock target = wbt.curBlock(); 313 if (!target.getScratchFlag()) { 314 target.augmentExecutionFrequency(wbt.curWeight() * weight); 315 } 316 } 317 } 318 } 319 }