001    /*
002     *  This file is part of the Jikes RVM project (http://jikesrvm.org).
003     *
004     *  This file is licensed to You under the Eclipse Public License (EPL);
005     *  You may not use this file except in compliance with the License. You
006     *  may obtain a copy of the License at
007     *
008     *      http://www.opensource.org/licenses/eclipse-1.0.php
009     *
010     *  See the COPYRIGHT.txt file distributed with this work for information
011     *  regarding copyright ownership.
012     */
013    package org.jikesrvm.compilers.opt.controlflow;
014    
015    import static org.jikesrvm.compilers.opt.ir.Operators.GOTO;
016    
017    import java.util.Enumeration;
018    import java.util.HashMap;
019    import java.util.LinkedHashMap;
020    import java.util.LinkedHashSet;
021    import java.util.TreeSet;
022    
023    import org.jikesrvm.VM;
024    import org.jikesrvm.compilers.opt.OptOptions;
025    import org.jikesrvm.compilers.opt.driver.CompilerPhase;
026    import org.jikesrvm.compilers.opt.ir.BasicBlock;
027    import org.jikesrvm.compilers.opt.ir.Goto;
028    import org.jikesrvm.compilers.opt.ir.IR;
029    import org.jikesrvm.compilers.opt.ir.Instruction;
030    import org.jikesrvm.compilers.opt.ir.WeightedBranchTargets;
031    import org.jikesrvm.compilers.opt.ir.operand.BranchOperand;
032    
033    /**
034     * Reorder code layout of basic blocks for improved I-cache locality and
035     * branch prediction. This code assumes that basic block frequencies have
036     * been computed and blocks have been marked infrequent.
037     * This pass actually implements two code placement algorithms:
038     * <ul>
039     *  <li>(1) A simple 'fluff' removal pass that moves all infrequent basic blocks
040     *     to the end of the code order.
041     *  <li>(2) Pettis and Hansen Algo2.
042     * </ul>
043     */
044    public final class ReorderingPhase extends CompilerPhase {
045    
046      private static final boolean DEBUG = false;
047    
048      /**
049       * Return this instance of this phase. This phase contains no
050       * per-compilation instance fields.
051       * @param ir not used
052       * @return this
053       */
054      @Override
055      public CompilerPhase newExecution(IR ir) {
056        return this;
057      }
058    
059      @Override
060      public boolean shouldPerform(OptOptions options) {
061        return options.REORDER_CODE;
062      }
063    
064      @Override
065      public boolean printingEnabled(OptOptions options, boolean before) {
066        return DEBUG;
067      }
068    
069      @Override
070      public String getName() {
071        return "Code Reordering";
072      }
073    
074      /**
075       * Reorder basic blocks either by trivially moving infrequent blocks
076       * to the end of the code order or by applying Pettis and Hansen Algo2.
077       *
078       * We will rearrange basic blocks and insert/remove
079       * unconditional GOTO's if needed.  It does not clean up branches,
080       * by reversing the branch condition, however.  That is saved for
081       * BranchOptimizations.
082       */
083      @Override
084      public void perform(IR ir) {
085        ir.cfg.entry().clearInfrequent();
086        if (ir.options.REORDER_CODE_PH) {
087          // Do Pettis and Hansen PLDI'90 Algo2
088          doPettisHansenAlgo2(ir);
089        } else {
090          // Simple algorithm: just move infrequent code to the end
091          exileInfrequentBlocks(ir);
092        }
093      }
094    
095      /////////////////////////
096      // Code for trivial algorithm
097      /////////////////////////
098    
099      /**
100       * Select a new basic block ordering via a simple heuristic
101       * that moves all infrequent basic blocks to the end.
102       * @param ir the IR object to reorder
103       */
104      private void exileInfrequentBlocks(IR ir) {
105        // (1) Look to see if there are infrequent blocks
106        //     Also count how many blocks there are.
107        int numBlocks = 0;
108        boolean foundSome = false;
109        for (Enumeration<BasicBlock> e = ir.getBasicBlocks(); e.hasMoreElements();) {
110          BasicBlock bb = e.nextElement();
111          numBlocks++;
112          foundSome |= bb.getInfrequent();
113        }
114        if (!foundSome) return; // Nothing to do
115    
116        // Reorder the blocks to exile the infrequent blocks.
117        // Relative order within the set of frequent and infrequent is unchanged.
118        BasicBlock[] newOrdering = new BasicBlock[numBlocks];
119        int idx = 0;
120        // First append frequent blocks to newOrdering
121        for (BasicBlock bb = ir.cfg.firstInCodeOrder(); bb != null; bb = bb.nextBasicBlockInCodeOrder()) {
122          if (!bb.getInfrequent()) {
123            newOrdering[idx++] = bb;
124          }
125        }
126        // Next append infrequent blocks to newOrdering
127        for (BasicBlock bb = ir.cfg.firstInCodeOrder(); bb != null; bb = bb.nextBasicBlockInCodeOrder()) {
128          if (bb.getInfrequent()) {
129            newOrdering[idx++] = bb;
130          }
131        }
132    
133        if (VM.VerifyAssertions) VM._assert(idx == numBlocks); // don't lose blocks!
134        if (VM.VerifyAssertions) VM._assert(ir.cfg.entry() == newOrdering[0]);
135    
136        // Add/remove unconditional goto's as needed.
137        for (int i = 0; i < newOrdering.length; i++) {
138          Instruction lastInstr = newOrdering[i].lastRealInstruction();
139          // Append a GOTO if needed to maintain old fallthrough semantics.
140          BasicBlock fallthroughBlock = newOrdering[i].getFallThroughBlock();
141          if (fallthroughBlock != null) {
142            if (i == newOrdering.length - 1 || fallthroughBlock != newOrdering[i + 1]) {
143              // Add unconditional goto to preserve old fallthrough semantics
144              newOrdering[i].appendInstruction(fallthroughBlock.makeGOTO());
145            }
146          }
147          // Remove last instruction if it is a redundant GOTO that
148          // can be implemented by a fallthrough edge in the new ordering.
149          // (Only possible if newOrdering[i] is not the last basic block.)
150          if (i < newOrdering.length - 1 && lastInstr != null && lastInstr.operator() == GOTO) {
151            BranchOperand op = Goto.getTarget(lastInstr);
152            if (op.target.getBasicBlock() == newOrdering[i + 1]) {
153              // unconditional goto is redundant in new ordering
154              lastInstr.remove();
155            }
156          }
157        }
158    
159        // Re-insert all basic blocks according to new ordering
160        ir.cfg.clearCodeOrder();
161        for (BasicBlock bb : newOrdering) {
162          ir.cfg.addLastInCodeOrder(bb);
163        }
164      }
165    
166      /////////////////////////
167      // Code for P&H Algo2
168      /////////////////////////
169    
170      /**
171       * Reorder code using Algo2 (Bottom-Up Positioning) from
172       * Pettis and Hansen PLDI'90.
173       * @param ir the IR to reorder.
174       */
175      private void doPettisHansenAlgo2(IR ir) {
176        // (1) Setup:
177        //     (a) Count the blocks
178        //     (b) Create a sorted set of CFG edges
179        //     (c) Create a set of blocks
180        //     (d) Make fallthroughs explict by adding GOTOs
181        int numBlocks = 0;
182        TreeSet<Edge> edges = new TreeSet<Edge>();
183        LinkedHashSet<BasicBlock> chainHeads = new LinkedHashSet<BasicBlock>();
184        BasicBlock entry = ir.cfg.entry();
185        if (VM.VerifyAssertions) VM._assert(ir.cfg.entry() == ir.cfg.firstInCodeOrder());
186    
187        for (BasicBlock bb = entry; bb != null; bb = bb.nextBasicBlockInCodeOrder()) {
188          numBlocks++;
189          chainHeads.add(bb);
190          bb.scratchObject = bb;
191          BasicBlock ft = bb.getFallThroughBlock();
192          if (ft != null) {
193            bb.appendInstruction(Goto.create(GOTO, ft.makeJumpTarget()));
194          }
195          float bw = bb.getExecutionFrequency();
196          for (WeightedBranchTargets wbt = new WeightedBranchTargets(bb); wbt.hasMoreElements(); wbt.advance()) {
197            edges.add(new Edge(bb, wbt.curBlock(), wbt.curWeight() * bw));
198          }
199        }
200    
201        if (DEBUG) VM.sysWriteln("Edges = " + edges);
202    
203        // (2) Build chains
204        ir.cfg.clearCodeOrder();
205        for (Edge e : edges) {
206          // If the source of the edge is the last block in its chain
207          // and the target of the edge is the first block in its chain
208          // then merge the chains.
209          if (DEBUG) VM.sysWriteln("Processing edge " + e);
210          if (e.target == entry) {
211            if (DEBUG) VM.sysWriteln("\tCan't put entry block in interior of chain");
212            continue;
213          }
214          if (e.source.nextBasicBlockInCodeOrder() != null) {
215            if (DEBUG) VM.sysWriteln("\tSource is not at end of a chain");
216            continue;
217          }
218          if (e.target.prevBasicBlockInCodeOrder() != null) {
219            if (DEBUG) VM.sysWriteln("\tTarget is not at start of a chain");
220            continue;
221          }
222          if (e.source.scratchObject == e.target.scratchObject) {
223            if (DEBUG) VM.sysWriteln("\tSource and target are in same chain");
224            continue;
225          }
226          if (DEBUG) VM.sysWriteln("\tMerging chains");
227          chainHeads.remove(e.target);
228          ir.cfg.linkInCodeOrder(e.source, e.target);
229          // Yuck....we should really use near-linear time union find here
230          // Doing this crappy thing makes us O(N^2) in the worst case.
231          BasicBlock newChain = (BasicBlock) e.source.scratchObject;
232          for (BasicBlock ptr = e.target; ptr != null; ptr = ptr.nextBasicBlockInCodeOrder()) {
233            ptr.scratchObject = newChain;
234          }
235        }
236    
237        if (DEBUG) VM.sysWriteln("Chains constructed ");
238        LinkedHashMap<BasicBlock, ChainInfo> chainInfo = new LinkedHashMap<BasicBlock, ChainInfo>();
239        for (BasicBlock head : chainHeads) {
240          if (DEBUG) dumpChain(head);
241          chainInfo.put(head, new ChainInfo(head));
242        }
243    
244        // (3) Summarize inter-chain edges.
245        for (Edge e : edges) {
246          if (e.source.scratchObject != e.target.scratchObject) {
247            Object sourceChain = e.source.scratchObject;
248            Object targetChain = e.target.scratchObject;
249            ChainInfo sourceInfo = chainInfo.get(sourceChain);
250            ChainInfo targetInfo = chainInfo.get(targetChain);
251            if (DEBUG) VM.sysWriteln("Inter-chain edge " + sourceChain + "->" + targetChain + " (" + e.weight + ")");
252            Object value = sourceInfo.outWeights.get(targetInfo);
253            float weight = e.weight;
254            if (value != null) {
255              weight += (Float) value;
256            }
257            sourceInfo.outWeights.put(targetInfo, weight);
258            targetInfo.inWeight += e.weight;
259            if (DEBUG) VM.sysWriteln("\t" + targetInfo + "," + sourceInfo.outWeights.get(targetInfo));
260          }
261        }
262    
263        if (DEBUG) VM.sysWriteln("Chain Info " + chainInfo);
264    
265        // (4) Construct a total order of the chains, guided by the interchain edge weights.
266        //     Constructing an optimal order is NP-Hard, so we apply the following heuristic.
267        //     The chain that starts with the entry node is placed first.
268        //     At each step, pick the chain with the maximal placedWeight (incoming edges from chains
269        //     that are already placed) and minimal inWeight (incoming edges from chains that are not
270        //     already placed). Prefer a node with non-zero placedWeight and inWeight to one that has
271        //     zeros for both. (A node with both zero placedWeight and zero inWeight is something that
272        //     the profile data predicts is not reachable via normal control flow from the entry node).
273        BasicBlock lastNode = null;
274        ChainInfo nextChoice = chainInfo.get(entry);
275        int numPlaced = 0;
276        ir.cfg.setFirstNode(entry);
277        while (true) {
278          if (DEBUG) VM.sysWriteln("Placing chain " + nextChoice);
279          // Append nextChoice to the previous chain
280          if (lastNode != null) ir.cfg.linkInCodeOrder(lastNode, nextChoice.head);
281          for (BasicBlock ptr = nextChoice.head; ptr != null; ptr = ptr.nextBasicBlockInCodeOrder()) {
282            numPlaced++;
283            lastNode = ptr;
284          }
285          // update ChainInfo
286          chainInfo.remove(nextChoice.head);
287          if (chainInfo.isEmpty()) break; // no chains left to place.
288          for (ChainInfo target : nextChoice.outWeights.keySet()) {
289            if (DEBUG) VM.sysWrite("\toutedge " + target);
290            float weight = (Float) nextChoice.outWeights.get(target);
291            if (DEBUG) VM.sysWriteln(" = " + weight);
292            target.placedWeight += weight;
293            target.inWeight -= weight;
294          }
295    
296          if (DEBUG) VM.sysWriteln("Chain Info " + chainInfo);
297    
298          // Find the next chain to append.
299          nextChoice = null;
300          for (ChainInfo cand : chainInfo.values()) {
301            if (cand.placedWeight > 0f) {
302              if (nextChoice == null) {
303                if (DEBUG) VM.sysWriteln("First reachable candidate " + cand);
304                nextChoice = cand;
305              } else if (cand.inWeight > nextChoice.inWeight ||
306                         (cand.inWeight == nextChoice.inWeight && cand.placedWeight > nextChoice.placedWeight)) {
307                if (DEBUG) VM.sysWriteln(cand + " is a better choice than " + nextChoice);
308                nextChoice = cand;
309              }
310            }
311          }
312          if (nextChoice != null) continue;
313    
314          // All remaining chains are fluff (not reachable from entry).
315          // Pick one with minimal inWeight and continue.
316          for (ChainInfo cand : chainInfo.values()) {
317            if (nextChoice == null) {
318              if (DEBUG) VM.sysWriteln("First candidate " + cand);
319              nextChoice = cand;
320            } else if (cand.inWeight < nextChoice.inWeight) {
321              if (DEBUG) VM.sysWriteln(cand + " is a better choice than " + nextChoice);
322              nextChoice = cand;
323            }
324          }
325        }
326    
327        if (VM.VerifyAssertions) VM._assert(numPlaced == numBlocks); // Don't lose blocks!!
328        ir.cfg.setLastNode(lastNode);
329      }
330    
331      private void dumpChain(BasicBlock head) {
332        VM.sysWrite("{" + head);
333        for (BasicBlock next = head.nextBasicBlockInCodeOrder(); next != null; next = next.nextBasicBlockInCodeOrder())
334        {
335          VM.sysWrite(", " + next);
336        }
337        VM.sysWriteln("}");
338      }
339    
340      private static class ChainInfo {
341        final BasicBlock head;
342        float placedWeight;
343        float inWeight;
344        final HashMap<ChainInfo, Object> outWeights = new HashMap<ChainInfo, Object>();
345    
346        ChainInfo(BasicBlock h) {
347          head = h;
348        }
349    
350        @Override
351        public String toString() {
352          return "[" + head + "," + placedWeight + "," + inWeight + "] " + outWeights.size();
353        }
354      }
355    
356      private static final class Edge implements Comparable<Edge> {
357        final BasicBlock source;
358        final BasicBlock target;
359        final float weight;
360    
361        Edge(BasicBlock s, BasicBlock t, float w) {
362          source = s;
363          target = t;
364          weight = w;
365        }
366    
367        @Override
368        public String toString() {
369          return weight + ": " + source.toString() + " -> " + target.toString();
370        }
371    
372        @Override
373        public int compareTo(Edge that) {
374          if (weight < that.weight) {
375            return 1;
376          } else if (weight > that.weight) {
377            return -1;
378          } else {
379            // Equal weights.
380            // Sort based on original code ordering, which is implied by block number
381            if (source.getNumber() < that.source.getNumber()) {
382              return 1;
383            } else if (source.getNumber() > that.source.getNumber()) {
384              return -1;
385            } else {
386              if (target.getNumber() > that.target.getNumber()) {
387                return 1;
388              } else if (source.getNumber() < that.target.getNumber()) {
389                return -1;
390              } else {
391                return 0;
392              }
393            }
394          }
395        }
396      }
397    }