Index: lib/Transforms/Scalar/Reassociate.cpp =================================================================== --- lib/Transforms/Scalar/Reassociate.cpp +++ lib/Transforms/Scalar/Reassociate.cpp @@ -163,7 +163,8 @@ AU.addPreserved(); } private: - void BuildRankMap(Function &F); + void BuildRankMap(Function &F, ReversePostOrderTraversal &RPOT); + unsigned getRank(Value *V); void canonicalizeOperands(Instruction *I); void ReassociateExpression(BinaryOperator *I); @@ -246,7 +247,8 @@ return nullptr; } -void Reassociate::BuildRankMap(Function &F) { +void Reassociate::BuildRankMap(Function &F, + ReversePostOrderTraversal &RPOT) { unsigned i = 2; // Assign distinct ranks to function arguments. @@ -255,7 +257,6 @@ DEBUG(dbgs() << "Calculated Rank[" << I->getName() << "] = " << i << "\n"); } - ReversePostOrderTraversal RPOT(&F); for (ReversePostOrderTraversal::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I) { BasicBlock *BB = *I; @@ -2259,13 +2260,28 @@ if (skipOptnoneFunction(F)) return false; - // Calculate the rank map for F - BuildRankMap(F); + // Reassociate needs for each instruction to have its operands already + // processed, so we first perform a RPOT of the basic blocks so that + // when we process a basic block, all its dominators have been processed + // before. + ReversePostOrderTraversal RPOT(&F); + BuildRankMap(F, RPOT); MadeChange = false; - for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { + for (BasicBlock *BI : RPOT) { + // Use a worklist to keep track of which instructions have been processed + // (and which insts won't be optimized again) so when redoing insts, + // optimize insts rightaway which won't be processed later. + SmallSet Worklist; + + // Insert all instructions in the BB + for (Instruction &I : *BI) + Worklist.insert(&I); + // Optimize every instruction in the basic block. - for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; ) + for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;) { + // This instruction has been processed. + Worklist.erase(&*II); if (isInstructionTriviallyDead(&*II)) { EraseInst(&*II++); } else { @@ -2274,27 +2290,22 @@ ++II; } - // Make a copy of all the instructions to be redone so we can remove dead - // instructions. - SetVector> ToRedo(RedoInsts); - // Iterate over all instructions to be reevaluated and remove trivially dead - // instructions. If any operand of the trivially dead instruction becomes - // dead mark it for deletion as well. Continue this process until all - // trivially dead instructions have been removed. - while (!ToRedo.empty()) { - Instruction *I = ToRedo.pop_back_val(); - if (isInstructionTriviallyDead(I)) - RecursivelyEraseDeadInsts(I, ToRedo); - } - - // Now that we have removed dead instructions, we can reoptimize the - // remaining instructions. - while (!RedoInsts.empty()) { - Instruction *I = RedoInsts.pop_back_val(); - if (isInstructionTriviallyDead(I)) - EraseInst(I); - else - OptimizeInst(I); + // If the above optimizations produced new instructions to optimize or + // made modifications which need to be redone, do them now if they won't + // be handled later. + while (!RedoInsts.empty()) { + Instruction *I = RedoInsts.pop_back_val(); + // Process instructions that won't be processed later, either + // inside the block itself or in another basic block (based on rank), + // since these will be processed later. + if ((I->getParent() != BI || !Worklist.count(I)) && + RankMap[I->getParent()] <= RankMap[BI]) { + if (isInstructionTriviallyDead(I)) + EraseInst(I); + else + OptimizeInst(I); + } + } } } Index: test/Transforms/Reassociate/prev_insts_canonicalized.ll =================================================================== --- /dev/null +++ test/Transforms/Reassociate/prev_insts_canonicalized.ll @@ -0,0 +1,57 @@ +; RUN: opt < %s -reassociate -S | FileCheck %s + +; These tests make sure that before processing insts +; any previous instructions are already canonicalized. +define i32 @foo(i32 %in) { +; CHECK-LABEL: @foo +; CHECK-NEXT: %factor = mul i32 %in, -4 +; CHECK-NEXT: %factor1 = mul i32 %in, 2 +; CHECK-NEXT: %_3 = add i32 %factor, 1 +; CHECK-NEXT: %_5 = add i32 %_3, %factor1 +; CHECK-NEXT: ret i32 %_5 + %_0 = add i32 %in, 1 + %_1 = mul i32 %in, -2 + %_2 = add i32 %_0, %_1 + %_3 = add i32 %_1, %_2 + %_4 = add i32 %_3, 1 + %_5 = add i32 %in, %_3 + ret i32 %_5 +} + +; CHECK-LABEL: @foo1 +define void @foo1(float %in, i1 %cmp) { +wrapper_entry: + br label %foo1 + +for.body: + %0 = fadd float %in1, %in1 + br label %foo1 + +foo1: + %_0 = fmul fast float %in, -3.000000e+00 + %_1 = fmul fast float %_0, 3.000000e+00 + %in1 = fadd fast float -3.000000e+00, %_1 + %in1use = fadd fast float %in1, %in1 + br label %for.body + + +} + +; CHECK-LABEL: @foo2 +define void @foo2(float %in, i1 %cmp) { +wrapper_entry: + br label %for.body + +for.body: +; If the operands of the phi are sheduled for processing before +; foo1 is processed, the invariant of reassociate are not preserved + %unused = phi float [%in1, %foo1], [undef, %wrapper_entry] + br label %foo1 + +foo1: + %_0 = fmul fast float %in, -3.000000e+00 + %_1 = fmul fast float %_0, 3.000000e+00 + %in1 = fadd fast float -3.000000e+00, %_1 + %in1use = fadd fast float %in1, %in1 + br label %for.body +} Index: test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll =================================================================== --- test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll +++ test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll @@ -1,8 +1,8 @@ ; RUN: opt < %s -reassociate -S | FileCheck %s ; CHECK-LABEL: faddsubAssoc1 -; CHECK: [[TMP1:%tmp.*]] = fmul fast half %a, 0xH4500 -; CHECK: [[TMP2:%tmp.*]] = fmul fast half %b, 0xH4500 -; CHECK: fsub fast half [[TMP2]], [[TMP1]] +; CHECK: [[TMP1:%.*]] = fsub fast half 0xH8000, %a +; CHECK: [[TMP2:%.*]] = fadd fast half %b, [[TMP1]] +; CHECK: fmul fast half [[TMP2]], 0xH4500 ; CHECK: ret ; Input is A op (B op C) define half @faddsubAssoc1(half %a, half %b) { Index: test/Transforms/Reassociate/xor_reassoc.ll =================================================================== --- test/Transforms/Reassociate/xor_reassoc.ll +++ test/Transforms/Reassociate/xor_reassoc.ll @@ -88,8 +88,8 @@ %xor1 = xor i32 %xor, %and ret i32 %xor1 ; CHECK-LABEL: @xor_special2( -; CHECK: %xor = xor i32 %x, 123 -; CHECK: %xor1 = xor i32 %xor, %y +; CHECK: %xor = xor i32 %y, 123 +; CHECK: %xor1 = xor i32 %xor, %x ; CHECK: ret i32 %xor1 }