001    /*
002     *  This file is part of the Jikes RVM project (http://jikesrvm.org).
003     *
004     *  This file is licensed to You under the Eclipse Public License (EPL);
005     *  You may not use this file except in compliance with the License. You
006     *  may obtain a copy of the License at
007     *
008     *      http://www.opensource.org/licenses/eclipse-1.0.php
009     *
010     *  See the COPYRIGHT.txt file distributed with this work for information
011     *  regarding copyright ownership.
012     */
013    package org.jikesrvm.compilers.opt.controlflow;
014    
015    import java.lang.reflect.Constructor;
016    import java.util.Arrays;
017    import java.util.Enumeration;
018    
019    import org.jikesrvm.VM;
020    import org.jikesrvm.compilers.opt.OptOptions;
021    import org.jikesrvm.compilers.opt.driver.CompilerPhase;
022    import org.jikesrvm.compilers.opt.ir.BasicBlock;
023    import org.jikesrvm.compilers.opt.ir.IR;
024    import org.jikesrvm.compilers.opt.ir.WeightedBranchTargets;
025    
026    /**
027     * Derive relative basic block execution frequencies from branch probabilities.<p>
028     *
029     * This code assumes that the loop structure tree can be constructed for
030     * the CFG in question.  This implies that the CFG is reducible. <p>
031     *
032     * The basic algorithm is as follows:
033     * <ul>
034     * <li> Construct the loop structure tree for the CFG. </li>
035     * <li> In a postorder traversal, compute the loop multiplier for each loop.
036     *      The loop multiplier is a number such that the execution frequency of
037     *      the loop pre-header times the loop multiplier is equal to the
038     *      execution frequency of the loop head.  This can be derived by computing
039     *      the loop exit weight (the probability of exiting the loop) and applying
040     *      Kirchoff's law that flow in is equal to flow out.  Loop exit weight
041     *      can be computed in a single topological (ignoring backedges) traversal
042     *      of the nodes in the loop. </li>
043     * <li> Assign the entry node weight 1.  In a topological traversal of the CFG
044     *      (ignoring backedges), propagate the weights.  When processing a loop head,
045     *      multiply the incoming weight by the loop multiplier.</li>
046     * </ul>
047     */
048    public class EstimateBlockFrequencies extends CompilerPhase {
049    
050      /**
051       * The IR on which to operate.
052       */
053      private IR ir;
054    
055      /**
056       * The loop structure tree of said IR
057       */
058      private LSTGraph lst;
059    
060      /**
061       * Constructor for this compiler phase
062       */
063      private static final Constructor<CompilerPhase> constructor =
064          getCompilerPhaseConstructor(EstimateBlockFrequencies.class);
065    
066      /**
067       * Get a constructor object for this compiler phase
068       * @return compiler phase constructor
069       */
070      @Override
071      public Constructor<CompilerPhase> getClassConstructor() {
072        return constructor;
073      }
074    
075      /**
076       * Topological ordering (ignoring backedges) of CFG
077       */
078      private BasicBlock[] topOrder;
079    
080      @Override
081      public String getName() { return "Estimate Block Frequencies"; }
082    
083      @Override
084      public void reportAdditionalStats() {
085        VM.sysWrite("  ");
086        VM.sysWrite(container.counter1 / container.counter2 * 100, 2);
087        VM.sysWrite("% Infrequent BBs");
088      }
089    
090      /**
091       * Compute relative basic block frequencies for the argument IR based on the
092       * branch probability information on each conditional and multiway branch.<p>
093       *
094       * Assumptions:
095       * <ol>
096       *   <li>LST is valid
097       *   <li>basic block numbering is dense (compact has been called)
098       * </ol>
099       *
100       * @param _ir the IR on which to apply the phase
101       */
102      @Override
103      public void perform(IR _ir) {
104        // Prepare
105        ir = _ir;
106    
107        if (ir.options.PROFILE_FREQUENCY_STRATEGY == OptOptions.PROFILE_DUMB_FREQ) {
108          setDumbFrequencies(ir);
109          return;
110        }
111    
112        ir.cfg.resetTopSorted();
113        ir.cfg.buildTopSort();
114        topOrder = new BasicBlock[ir.cfg.numberOfNodes()];
115        int idx = 0;
116        for (BasicBlock ptr = ir.cfg.entry(); ptr != null; ptr = (BasicBlock) ptr.getForwardSortedNext()) {
117          topOrder[idx++] = ptr;
118          ptr.setExecutionFrequency(0f);
119          ptr.clearScratchFlag();
120        }
121    
122        // Get pre-computed LST from IR.
123        lst = ir.HIRInfo.loopStructureTree;
124    
125        // Compute loop multipliers
126        if (lst != null) {
127          computeLoopMultipliers(lst.getRoot());
128          for (Enumeration<BasicBlock> e = ir.getBasicBlocks(); e.hasMoreElements();) {
129            BasicBlock bb = e.nextElement();
130            bb.setExecutionFrequency(0f);
131            bb.clearScratchFlag();
132          }
133        }
134    
135        // Compute execution frequency of each basic block
136        computeBlockFrequencies();
137    
138        // Set infrequent bits on basic blocks
139        computeInfrequentBlocks(ir);
140      }
141    
142      /**
143       * Set the frequency of each basic block to 1.0f.
144       */
145      private void setDumbFrequencies(IR ir) {
146        for (Enumeration<BasicBlock> e = ir.getBasicBlocks(); e.hasMoreElements();) {
147          BasicBlock bb = e.nextElement();
148          bb.setExecutionFrequency(1f);
149        }
150      }
151    
152      /**
153       * Compute which blocks are infrequent.<p>
154       *
155       * Algorithm:
156       * <ol>
157       *   <li>let f = INFREQUENT_THRESHOLD.
158       *   <li>Start with S = {all basic blocks}.
159       *   <li>Sort the blocks by frequency.  Starting with the most frequent
160       *       blocks, remove blocks from S until the sum of block frequencies in S
161       *       <= f.  Then blocks in S are infrequent.
162       * </ol>
163       * </pre>
164       *
165       * @param ir the governing IR.
166       */
167      private void computeInfrequentBlocks(IR ir) {
168        int i = 0;
169        float[] freq = new float[ir.getMaxBasicBlockNumber()];
170        float total = 0f;
171        // count the total frequency of all blocks
172        for (Enumeration<BasicBlock> e = ir.getBasicBlocks(); e.hasMoreElements();) {
173          BasicBlock bb = e.nextElement();
174          freq[i] = bb.getExecutionFrequency();
175          total += freq[i];
176          i++;
177        }
178        // sort the frequencies (ascending);
179        Arrays.sort(freq);
180        float f = ir.options.PROFILE_INFREQUENT_THRESHOLD;
181        float goal = (1f - f) * total;
182        total = 0f;
183        float threshold = 0f;
184        // add up the frequencies (descending) until we real the goal.
185        for (i = freq.length - 1; i >= 0 && total < goal; i--) {
186          threshold = freq[i];
187          total += threshold;
188        }
189        // go back and set infrequent bits.
190        for (Enumeration<BasicBlock> e = ir.getBasicBlocks(); e.hasMoreElements();) {
191          BasicBlock bb = e.nextElement();
192          if (bb.getExecutionFrequency() < threshold) {
193            bb.setInfrequent();
194            container.counter1++;
195          } else {
196            bb.clearInfrequent();
197          }
198          container.counter2++;
199        }
200      }
201    
202      /**
203       * Postorder traversal of LST computing loop multiplier and loop exits
204       * for each loop.
205       */
206      private void computeLoopMultipliers(LSTNode n) {
207        for (Enumeration<LSTNode> e = n.getChildren(); e.hasMoreElements();) {
208          computeLoopMultipliers(e.nextElement());
209        }
210        if (n != lst.getRoot()) {
211          computeMultiplier(n);
212          n.header.clearScratchFlag(); // so we won't ignore when processing enclosing loop
213        }
214      }
215    
216      /**
217       * Compute the loop multiplier for this loop nest
218       */
219      private void computeMultiplier(LSTNode n) {
220        n.initializeLoopExits();
221        computeNodeWeights(n);
222        float loopExitWeight = computeLoopExitWeight(n);
223        n.loopMultiplier = 1.0f / loopExitWeight;
224      }
225    
226      /**
227       * Propagate execution frequencies through the loop.
228       * Also records loop exit edges in loopExits.
229       */
230      private void computeNodeWeights(LSTNode n) {
231        n.header.setExecutionFrequency(1f);
232        int idx = 0;
233        while (topOrder[idx] != n.header) idx++;
234        for (int numNodes = n.loop.populationCount(); numNodes > 0;) {
235          if (idx >= topOrder.length) {
236            numNodes--;
237            continue;
238          }
239          BasicBlock cur = topOrder[idx++];
240          if (cur == null) {
241            numNodes--;
242            continue;
243          }
244          if (!n.loop.get(cur.getNumber())) continue; // node was not in the loop nest being processed.
245          LSTNode other = lst.getLoop(cur);
246          if (other != n) {
247            if (cur == other.header) {
248              // loop header of nested loop
249              numNodes -= other.loop.populationCount();
250            }
251            continue; // skip over nodes in nested loop.
252          }
253    
254          numNodes--;
255          cur.setScratchFlag();
256          float weight = cur.getExecutionFrequency();
257          for (WeightedBranchTargets wbt = new WeightedBranchTargets(cur); wbt.hasMoreElements(); wbt.advance()) {
258            processEdge(n, cur, wbt.curBlock(), wbt.curWeight(), weight);
259          }
260        }
261      }
262    
263      private void processEdge(LSTNode n, BasicBlock source, BasicBlock target, float prob, float weight) {
264        if (target.getScratchFlag()) return; // ignore backedge
265        if (n.loop.get(target.getNumber())) {
266          LSTNode other = lst.getLoop(target);
267          if (other == n) {
268            target.augmentExecutionFrequency(prob * weight);
269          } else {
270            // header of nested loop; pass prob and weight through to targets of loop exits
271            // Algorithm: find the total loopExitWeight, then distribute prob and weight
272            //            in ratio to the exit weight for each exit.
273            //            Effectively we are treating the nested loop as an n-way branch to its loop exits.
274            target.setScratchFlag();
275            float exitWeight = computeLoopExitWeight(other);
276            for (LSTNode.Edge exit : other.loopExits) {
277              float myWeight = exit.source.getExecutionFrequency() * exit.probability;
278              float myFraction = myWeight / exitWeight;
279              processEdge(n, source, exit.target, prob * myFraction, weight);
280            }
281            target.clearScratchFlag();
282          }
283        } else {
284          n.addLoopExit(source, target, prob);
285        }
286      }
287    
288      private float computeLoopExitWeight(LSTNode n) {
289        float exitWeight = 0f;
290        for (LSTNode.Edge exit : n.loopExits) {
291          exitWeight += (exit.source.getExecutionFrequency() * exit.probability);
292        }
293        // Kludge: if we think the loop has no exits, lets pretend that there is a 1%
294        //         chance of exiting to avoid getting NaN's in our computation.
295        return exitWeight == 0f ? 0.01f : exitWeight;
296      }
297    
298      private void computeBlockFrequencies() {
299        ir.cfg.entry().setExecutionFrequency(1f);
300        for (BasicBlock cur : topOrder) {
301          if (cur == null || cur.isExit()) continue; // ignore exit node.
302          if (lst != null) {
303            LSTNode loop = lst.getLoop(cur);
304            if (loop != null && loop.header == cur) {
305              cur.setExecutionFrequency(cur.getExecutionFrequency() * loop.loopMultiplier);
306            }
307          }
308          float weight = cur.getExecutionFrequency();
309          cur.setScratchFlag();
310    
311          for (WeightedBranchTargets wbt = new WeightedBranchTargets(cur); wbt.hasMoreElements(); wbt.advance()) {
312            BasicBlock target = wbt.curBlock();
313            if (!target.getScratchFlag()) {
314              target.augmentExecutionFrequency(wbt.curWeight() * weight);
315            }
316          }
317        }
318      }
319    }