org.jikesrvm.compilers.opt.ssa
Class LoopVersioning

java.lang.Object
  extended by org.jikesrvm.compilers.opt.driver.CompilerPhase
      extended by org.jikesrvm.compilers.opt.ssa.LoopVersioning

public final class LoopVersioning
extends CompilerPhase

This optimisation works from the outer most loop inward, optimising loops that conform to being regular AnnotatedLSTNodes. The optimisations performs the following operations:

Example:

for (int t1=0; t1 < 100; t1++) { g1 = null_check l0 g2 = bounds_check l0, t1 g3 = guard_combine g1,g2 t2 = aload l0, t1, g3 g4 = null_check l1 g5 = bounds_check l1, t1 g6 = guard_combine g4,g5 astore t2, l1, t1, g6 } becomes: goto explicit_test_block successor_to_loops: g1 = phi g1_1, g1_2 g2 = phi g2_1, g2_2 g3 = phi g3_1, g3_2 t2 = phi t2_1, t2_2 g4 = phi g4_1, g4_2 g5 = phi g5_1, g5_2 g6 = phi g6_1, g6_2 goto after_loop explicit_test_block: if l0 == null (unlikely) goto sub_optimal_loop if 100 >= l0.length (unlikely) goto sub_optimal_loop if l1 == null (unlikely) goto sub_optimal_loop if 100 >= l1.length (unlikely) goto sub_optimal_loop goto optimal_loop sub_optimal_loop: for (int t1_1=0; t1_1 < 100; t1_1++) { g1_1 = null_check l0 g2_1 = bounds_check l0, t1_1 g3_1 = guard_combine g1_1,g2_1 t2_1 = aload l0, t1_1, g3_1 g4_1 = null_check l1 g5_1 = bounds_check l1, t1_1 g6_1 = guard_combine g4_1,g5_1 astore t2_1, l1, t1_1, g6_1 } goto successor_to_loops optimal_loop: for (int t1_2=0; t1_2 < 100; t1_2++) { g1_2 = true_guard g2_2 = true_guard g3_2 = guard_combine g1_2,g2_2 t2_2 = aload l0, t1_2, g3_2 g4_2 = null_check l1 g5_2 = bounds_check l1, t1_2 g6_2 = guard_combine g4_2,g5_2 astore t2_2, l1, t1_2, g6_2 } goto successor_to_loops after_loop: The optimisation works on the Heap SSA form. A more accurate example of the transformation would be: heap1 = ...; // previous heap state t1_1 = 0; if t1_1 ≥ 100 goto label2 label1: t1_2 = phi t1_1, t1_3 heap2 = phi heap1, heap3 g1 = null_check l0 g2 = bounds_check l0, t1_2 g3 = guard_combine g1,g2 t2 = aload l0, t1_2, g3 g4 = null_check l1 g5 = bounds_check l1, t1_2 g6 = guard_combine g4,g5 heap3 = astore t2, l1, t1_2, g6 t1_3 = t1_2 + 1 if t1_3 < 100 label1 * label2: becomes: heap1 = ...; // previous heap state t1_1 = 0; if t1_1 ≥ 100 goto label2 goto explicit_test_block successor_to_loops: t1_2 = phi t1_2_1, t1_2_2 heap2 = phi heap2_1, heap2_2 g1 = phi g1_1, g1_2 g2 = phi g2_1, g2_2 g3 = phi g3_1, g3_2 t2 = phi t2_1, t2_2 g4 = phi g4_1, g4_2 g5 = phi g5_1, g5_2 g6 = phi g6_1, g6_2 heap3 = phi heap3_1, heap3_2 t1_3 = phi t1_3_1, t1_3_2 goto after_loop explicit_test_block: g1_2 = if l0 == null (unlikely) goto sub_optimal_loop g2_2 = if 100 >= l0.length (unlikely) goto sub_optimal_loop g4_2 = if l1 == null (unlikely) goto sub_optimal_loop g5_2 = if 100 >= l1.length (unlikely) goto sub_optimal_loop goto optimal_loop sub_optimal_loop: label1_1: t1_2_1 = phi t1_1, t1_3_1 heap2_1 = phi heap1, heap3_1 g1_1 = null_check l0 g2_1 = bounds_check l0, t1_2_1 g3_1 = guard_combine g1_1,g2_1 t2_1 = aload l0, t1_2_1, g3_1 g4_1 = null_check l1 g5_1 = bounds_check l1, t1_2_1 g6_1 = guard_combine g4_1,g5_1 heap3_1 = astore t2_1, l1, t1_2_1, g6_1 t1_3_1 = t1_2_1 + 1 if t1_3_1 < 100 label1_1 goto successor_to_loops optimal_loop: label1_2: t1_2_2 = phi t1_1, t1_3_2 heap2_2 = phi heap1, heap3_2 g3_2 = guard_combine g1_2,g2_2 t2_2 = aload l0, t1_2_2, g3_2 g6_2 = guard_combine g4_2,g5_2 heap3_2 = astore t2_2, l1, t1_2_2, g6_2 t1_3_2 = t1_2_2 + 1 if t1_3_2 < 100 label1_2 goto successor_to_loops after_loop: label2:


Field Summary
private static Constructor<CompilerPhase> constructor
          Constructor for this compiler phase
private static boolean DEBUG
          Flag to optionally print verbose debugging messages
private  SSAOptions desiredSSAOptions
          SSA options
private  CompilerPhase domPhase
          Compiler phases called from this one
private  CompilerPhase enterSSA
          Compiler phases called from this one
private static boolean inSSAphase
          Run inside SSA sub-phase
private  IR ir
          IR for optimisation
private  CompilerPhase leaveSSA
          Compiler phases called from this one
private  HashSet<Register> loopRegisterSet
          Set used to store the loop related register
private static int OPTIMIZED_LOOP_OPERAND
          The phi instruction operand holding the optimized loop variable
private static int UNOPTIMIZED_LOOP_OPERAND
          The phi instruction operand holding the unoptimized loop variable
private static boolean VERIFY
          Flag to verify computed IR
 
Fields inherited from class org.jikesrvm.compilers.opt.driver.CompilerPhase
container
 
Constructor Summary
LoopVersioning()
          Constructor
 
Method Summary
private  boolean createBranchBlocks(AnnotatedLSTNode loop, BasicBlock block, ArrayList<Instruction> checksToEliminate, BasicBlock unoptimizedLoopEntry, BasicBlock optimizedLoopEntry, HashMap<Register,Register> optimalRegMap)
          Create the block containing explict branches to either the optimized or unoptimized loops
private  HashMap<BasicBlock,BasicBlock> createCloneLoop(AnnotatedLSTNode loop, HashMap<Register,Register> regMap, HashMap<Register,BasicBlock> regToBlockMap)
          Create a clone of the loop replacing definitions in the cloned loop with those found in the register map
private  HashMap<BasicBlock,BasicBlock> createOptimizedLoop(AnnotatedLSTNode loop, HashMap<Register,Register> regMap, ArrayList<Instruction> instrToEliminate, HashMap<Register,BasicBlock> regToBlockMap)
          Create a clone of the loop replacing definitions in the cloned loop with those found in the register map and eliminate unnecessary bound checks
private  boolean findLoopToOptimise(AnnotatedLSTNode loop)
          Find an outermost loop to optimise and optimise it.
private  void fixUpPhiPredecessors(ArrayList<Instruction> phiInstructions, BasicBlock unoptimizedLoopExit, BasicBlock optimizedLoopExit)
          When phi nodes were generated the basic blocks weren't known for the predecessors, fix this up now.
private  BasicBlock generateExplicitBoundCheck(Instruction boundCheckInstr, Operand minIndexValue, Operand maxIndexValue, HashMap<Register,Register> optimalRegMap, BasicBlock block, BasicBlock unoptimizedLoopEntry)
          Generate bound check branch blocks
private  BasicBlock generateNullCheckBranchBlocks(AnnotatedLSTNode loop, ArrayList<Instruction> checksToEliminate, HashMap<Register,Register> optimalRegMap, BasicBlock block, BasicBlock unoptimizedLoopEntry)
          Generate null check branch blocks
private  void generatePhiNodes(AnnotatedLSTNode loop, ArrayList<Register> registers, ArrayList<TypeReference> types, ArrayList<Instruction> phiInstructions, HashMap<Register,Register> subOptimalRegMap, HashMap<Register,Register> optimalRegMap)
          Generate into a new block phi nodes that define the original register defined by the loop and use two newly created registers.
 Constructor<CompilerPhase> getClassConstructor()
          Get a constructor object for this compiler phase
private  int getConstantAdjustedArrayLengthDistance(Operand op)
          Get the distance from an array length by addding up instructions that adjust the array length result by a constant amount
private  Operand getConstantAdjustedArrayLengthRef(Operand op)
          Get the array length reference ignoring instructions that adjust its result by a fixed amount
private  void getListOfChecksToEliminate(AnnotatedLSTNode loop, ArrayList<Instruction> instrToEliminate)
          Create a list of instructions to be eliminated
 String getName()
          Return a string name for this phase.
private  void getRegistersDefinedInLoop(AnnotatedLSTNode loop, ArrayList<Register> registers, ArrayList<TypeReference> types, ArrayList<Instruction> definingInstructions)
          Get registers defined in the given loop.
private  boolean isOptimizedLoop(Register reg)
          Check whether the loop that contain such iterator register had been optimized
private  void modifyOriginalLoop(AnnotatedLSTNode loop, ArrayList<Instruction> phiInstructions, ArrayList<Instruction> definingInstrInOriginalLoop, HashMap<Register,Register> subOptimalRegMap, HashMap<Register,Register> optimalRegMap)
          Remove loop and replace register definitions in the original loop with phi instructions
private  RegisterOperand nullCheckPerformedInLoopPredecessors(BasicBlock header, Instruction instr)
          Can we eliminate a null check as it has lready been performed?
 void perform(IR _ir)
          This is the method that actually does the work of the phase.
private  void removeUnoptimizedLoop(AnnotatedLSTNode loop, HashMap<BasicBlock,BasicBlock> unoptimizedLoopMap)
          Remove unreachable unoptimized loop
private  void renameOptimizedLoops(HashMap<Register,Register> subOptimalRegMap, HashMap<Register,Register> optimalRegMap)
          Rename the iterators for optimized loops so we can tell they are still optimized
private static void report(String s)
          Human readable report of what goes on
private  void setOptimizedLoop(Register reg)
          Put the optimized loop's iterator register into the hash set
 boolean shouldPerform(OptOptions options)
          Should loop versioning be performed?
 
Methods inherited from class org.jikesrvm.compilers.opt.driver.CompilerPhase
dumpIR, dumpIR, getCompilerPhaseConstructor, getCompilerPhaseConstructor, newExecution, performPhase, printingEnabled, reportAdditionalStats, setContainer, verify
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

DEBUG

private static final boolean DEBUG
Flag to optionally print verbose debugging messages

See Also:
Constant Field Values

VERIFY

private static final boolean VERIFY
Flag to verify computed IR

See Also:
Constant Field Values

OPTIMIZED_LOOP_OPERAND

private static final int OPTIMIZED_LOOP_OPERAND
The phi instruction operand holding the optimized loop variable

See Also:
Constant Field Values

UNOPTIMIZED_LOOP_OPERAND

private static final int UNOPTIMIZED_LOOP_OPERAND
The phi instruction operand holding the unoptimized loop variable

See Also:
Constant Field Values

ir

private IR ir
IR for optimisation


loopRegisterSet

private HashSet<Register> loopRegisterSet
Set used to store the loop related register


desiredSSAOptions

private SSAOptions desiredSSAOptions
SSA options


enterSSA

private CompilerPhase enterSSA
Compiler phases called from this one


leaveSSA

private CompilerPhase leaveSSA
Compiler phases called from this one


domPhase

private CompilerPhase domPhase
Compiler phases called from this one


inSSAphase

private static final boolean inSSAphase
Run inside SSA sub-phase

See Also:
Constant Field Values

constructor

private static final Constructor<CompilerPhase> constructor
Constructor for this compiler phase

Constructor Detail

LoopVersioning

public LoopVersioning()
Constructor

Method Detail

report

private static void report(String s)
Human readable report of what goes on

Parameters:
s - String to print

getName

public String getName()
Return a string name for this phase.

Specified by:
getName in class CompilerPhase
Returns:
"Loop Versioning"

getClassConstructor

public Constructor<CompilerPhase> getClassConstructor()
Get a constructor object for this compiler phase

Overrides:
getClassConstructor in class CompilerPhase
Returns:
compiler phase constructor

shouldPerform

public boolean shouldPerform(OptOptions options)
Should loop versioning be performed?

Overrides:
shouldPerform in class CompilerPhase
Parameters:
options - the compiler options for the compilation
Returns:
true if the phase should be performed

perform

public void perform(IR _ir)
Description copied from class: CompilerPhase
This is the method that actually does the work of the phase.

Specified by:
perform in class CompilerPhase
Parameters:
_ir - the IR to process

findLoopToOptimise

private boolean findLoopToOptimise(AnnotatedLSTNode loop)
Find an outermost loop to optimise and optimise it. Focus on annotated regular loops, LICM should handle possible optimisation for the non-regular loops

Parameters:
loop - Loop to search
Returns:
was optimisation performed

getListOfChecksToEliminate

private void getListOfChecksToEliminate(AnnotatedLSTNode loop,
                                        ArrayList<Instruction> instrToEliminate)
Create a list of instructions to be eliminated

Parameters:
loop - the loop to examine
instrToEliminate - the instructions to remove

getRegistersDefinedInLoop

private void getRegistersDefinedInLoop(AnnotatedLSTNode loop,
                                       ArrayList<Register> registers,
                                       ArrayList<TypeReference> types,
                                       ArrayList<Instruction> definingInstructions)
Get registers defined in the given loop. As we're in SSA form all register definitions must be unique.

Parameters:
loop - - the loop to examine
registers - - vector to which defined registers are added

generatePhiNodes

private void generatePhiNodes(AnnotatedLSTNode loop,
                              ArrayList<Register> registers,
                              ArrayList<TypeReference> types,
                              ArrayList<Instruction> phiInstructions,
                              HashMap<Register,Register> subOptimalRegMap,
                              HashMap<Register,Register> optimalRegMap)
Generate into a new block phi nodes that define the original register defined by the loop and use two newly created registers.

Parameters:
registers - - vector to which defined registers need to be created registers.x used in creating phi nodes
types - - vector of corresponding types of registers.
phiInstructions - - created phi instructions
subOptimalRegMap - - mapping of orignal destination to the newly created destination for the unoptimized loop
optimalRegMap - - mapping of orignal destination to the newly created destination for the optimized loop

createCloneLoop

private HashMap<BasicBlock,BasicBlock> createCloneLoop(AnnotatedLSTNode loop,
                                                       HashMap<Register,Register> regMap,
                                                       HashMap<Register,BasicBlock> regToBlockMap)
Create a clone of the loop replacing definitions in the cloned loop with those found in the register map

Parameters:
loop - - loop to clone
regMap - - mapping of original definition to new definition
Returns:
a mapping from original BBs to created BBs

createOptimizedLoop

private HashMap<BasicBlock,BasicBlock> createOptimizedLoop(AnnotatedLSTNode loop,
                                                           HashMap<Register,Register> regMap,
                                                           ArrayList<Instruction> instrToEliminate,
                                                           HashMap<Register,BasicBlock> regToBlockMap)
Create a clone of the loop replacing definitions in the cloned loop with those found in the register map and eliminate unnecessary bound checks

Parameters:
loop - - loop to clone
regMap - - mapping of original definition to new definition
instrToEliminate - - instructions to eliminate
regToBlockMap - - mapping of a register to its defining BB
Returns:
a mapping from original BBs to created BBs

fixUpPhiPredecessors

private void fixUpPhiPredecessors(ArrayList<Instruction> phiInstructions,
                                  BasicBlock unoptimizedLoopExit,
                                  BasicBlock optimizedLoopExit)
When phi nodes were generated the basic blocks weren't known for the predecessors, fix this up now. (It may also not be possible to reach the unoptimized loop any more)


createBranchBlocks

private boolean createBranchBlocks(AnnotatedLSTNode loop,
                                   BasicBlock block,
                                   ArrayList<Instruction> checksToEliminate,
                                   BasicBlock unoptimizedLoopEntry,
                                   BasicBlock optimizedLoopEntry,
                                   HashMap<Register,Register> optimalRegMap)
Create the block containing explict branches to either the optimized or unoptimized loops

Parameters:
optimalRegMap - - mapping used to map eliminated bound and null check guards to

generateNullCheckBranchBlocks

private BasicBlock generateNullCheckBranchBlocks(AnnotatedLSTNode loop,
                                                 ArrayList<Instruction> checksToEliminate,
                                                 HashMap<Register,Register> optimalRegMap,
                                                 BasicBlock block,
                                                 BasicBlock unoptimizedLoopEntry)
Generate null check branch blocks

Parameters:
loop - the current loop where checks are being eliminated
checksToEliminate - all of the checks that are being eliminated in the pass
optimalRegMap - a map from original register to the register used in the optimal loop
block - the block to generate code into
unoptimizedLoopEntry - entry to the unoptimized loop for if the check fails
Returns:
the new block to generate code into

generateExplicitBoundCheck

private BasicBlock generateExplicitBoundCheck(Instruction boundCheckInstr,
                                              Operand minIndexValue,
                                              Operand maxIndexValue,
                                              HashMap<Register,Register> optimalRegMap,
                                              BasicBlock block,
                                              BasicBlock unoptimizedLoopEntry)
Generate bound check branch blocks

Parameters:
boundCheckInstr - the bound check instruction in question
minIndexValue - the min value for an iterator a loop will generate
maxIndexValue - the max value for an iterator a loop will generate
optimalRegMap - a map from original register to the register used in the optimal loop
block - the block to generate code into
unoptimizedLoopEntry - entry to the unoptimized loop for if the check fails
Returns:
the new block to generate code into

nullCheckPerformedInLoopPredecessors

private RegisterOperand nullCheckPerformedInLoopPredecessors(BasicBlock header,
                                                             Instruction instr)
Can we eliminate a null check as it has lready been performed? NB SSA guarantees that if a value is null it must always be null

Parameters:
instr - null check instruction

getConstantAdjustedArrayLengthRef

private Operand getConstantAdjustedArrayLengthRef(Operand op)
Get the array length reference ignoring instructions that adjust its result by a fixed amount

Parameters:
op - operand to chase arraylength opcode to constant value from an array length

getConstantAdjustedArrayLengthDistance

private int getConstantAdjustedArrayLengthDistance(Operand op)
Get the distance from an array length by addding up instructions that adjust the array length result by a constant amount

Parameters:
op - operand to chase arraylength opcode to

modifyOriginalLoop

private void modifyOriginalLoop(AnnotatedLSTNode loop,
                                ArrayList<Instruction> phiInstructions,
                                ArrayList<Instruction> definingInstrInOriginalLoop,
                                HashMap<Register,Register> subOptimalRegMap,
                                HashMap<Register,Register> optimalRegMap)
Remove loop and replace register definitions in the original loop with phi instructions


removeUnoptimizedLoop

private void removeUnoptimizedLoop(AnnotatedLSTNode loop,
                                   HashMap<BasicBlock,BasicBlock> unoptimizedLoopMap)
Remove unreachable unoptimized loop


setOptimizedLoop

private void setOptimizedLoop(Register reg)
Put the optimized loop's iterator register into the hash set

Parameters:
reg - register

isOptimizedLoop

private boolean isOptimizedLoop(Register reg)
Check whether the loop that contain such iterator register had been optimized

Parameters:
reg - register
Returns:
the test result

renameOptimizedLoops

private void renameOptimizedLoops(HashMap<Register,Register> subOptimalRegMap,
                                  HashMap<Register,Register> optimalRegMap)
Rename the iterators for optimized loops so we can tell they are still optimized