Index: llvm/trunk/lib/Transforms/Scalar/GVNHoist.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/GVNHoist.cpp +++ llvm/trunk/lib/Transforms/Scalar/GVNHoist.cpp @@ -186,7 +186,8 @@ public: GVNHoist(DominatorTree *Dt, AliasAnalysis *Aa, MemoryDependenceResults *Md, bool OptForMinSize) - : DT(Dt), AA(Aa), MD(Md), OptForMinSize(OptForMinSize), HoistedCtr(0) {} + : DT(Dt), AA(Aa), MD(Md), OptForMinSize(OptForMinSize), + HoistingGeps(OptForMinSize), HoistedCtr(0) {} bool run(Function &F) { VN.setDomTree(DT); VN.setAliasAnalysis(AA); @@ -230,6 +231,7 @@ AliasAnalysis *AA; MemoryDependenceResults *MD; const bool OptForMinSize; + const bool HoistingGeps; DenseMap DFSNumber; BBSideEffectsSet BBSideEffects; MemorySSA *MSSA; @@ -609,21 +611,86 @@ return true; } - bool makeOperandsAvailable(Instruction *Repl, BasicBlock *HoistPt, - const SmallVecInsn &InstructionsToHoist) const { + // Same as allOperandsAvailable with recursive check for GEP operands. + bool allGepOperandsAvailable(const Instruction *I, + const BasicBlock *HoistPt) const { + for (const Use &Op : I->operands()) + if (const auto *Inst = dyn_cast(&Op)) + if (!DT->dominates(Inst->getParent(), HoistPt)) { + if (const GetElementPtrInst *GepOp = dyn_cast(Inst)) { + if (!allGepOperandsAvailable(GepOp, HoistPt)) + return false; + // Gep is available if all operands of GepOp are available. + } else { + // Gep is not available if it has operands other than GEPs that are + // defined in blocks not dominating HoistPt. + return false; + } + } + return true; + } + + // Make all operands of the GEP available. + void makeGepsAvailable(Instruction *Repl, BasicBlock *HoistPt, + const SmallVecInsn &InstructionsToHoist, + Instruction *Gep) const { + assert(allGepOperandsAvailable(Gep, HoistPt) && "GEP operands not available"); + + Instruction *ClonedGep = Gep->clone(); + for (unsigned i = 0, e = Gep->getNumOperands(); i != e; ++i) + if (Instruction *Op = dyn_cast(Gep->getOperand(i))) { + + // Check whether the operand is already available. + if (DT->dominates(Op->getParent(), HoistPt)) + continue; + + // As a GEP can refer to other GEPs, recursively make all the operands + // of this GEP available at HoistPt. + if (GetElementPtrInst *GepOp = dyn_cast(Op)) + makeGepsAvailable(ClonedGep, HoistPt, InstructionsToHoist, GepOp); + } + + // Copy Gep and replace its uses in Repl with ClonedGep. + ClonedGep->insertBefore(HoistPt->getTerminator()); + + // Conservatively discard any optimization hints, they may differ on the + // other paths. + ClonedGep->dropUnknownNonDebugMetadata(); + + // If we have optimization hints which agree with each other along different + // paths, preserve them. + for (const Instruction *OtherInst : InstructionsToHoist) { + const GetElementPtrInst *OtherGep; + if (auto *OtherLd = dyn_cast(OtherInst)) + OtherGep = cast(OtherLd->getPointerOperand()); + else + OtherGep = cast( + cast(OtherInst)->getPointerOperand()); + ClonedGep->intersectOptionalDataWith(OtherGep); + } + + // Replace uses of Gep with ClonedGep in Repl. + Repl->replaceUsesOfWith(Gep, ClonedGep); + } + + // In the case Repl is a load or a store, we make all their GEPs + // available: GEPs are not hoisted by default to avoid the address + // computations to be hoisted without the associated load or store. + bool makeGepOperandsAvailable(Instruction *Repl, BasicBlock *HoistPt, + const SmallVecInsn &InstructionsToHoist) const { // Check whether the GEP of a ld/st can be synthesized at HoistPt. GetElementPtrInst *Gep = nullptr; Instruction *Val = nullptr; - if (auto *Ld = dyn_cast(Repl)) + if (auto *Ld = dyn_cast(Repl)) { Gep = dyn_cast(Ld->getPointerOperand()); - if (auto *St = dyn_cast(Repl)) { + } else if (auto *St = dyn_cast(Repl)) { Gep = dyn_cast(St->getPointerOperand()); Val = dyn_cast(St->getValueOperand()); // Check that the stored value is available. if (Val) { if (isa(Val)) { // Check whether we can compute the GEP at HoistPt. - if (!allOperandsAvailable(Val, HoistPt)) + if (!allGepOperandsAvailable(Val, HoistPt)) return false; } else if (!DT->dominates(Val->getParent(), HoistPt)) return false; @@ -631,40 +698,13 @@ } // Check whether we can compute the Gep at HoistPt. - if (!Gep || !allOperandsAvailable(Gep, HoistPt)) + if (!Gep || !allGepOperandsAvailable(Gep, HoistPt)) return false; - // Copy the gep before moving the ld/st. - Instruction *ClonedGep = Gep->clone(); - ClonedGep->insertBefore(HoistPt->getTerminator()); - // Conservatively discard any optimization hints, they may differ on the - // other paths. - for (Instruction *OtherInst : InstructionsToHoist) { - GetElementPtrInst *OtherGep; - if (auto *OtherLd = dyn_cast(OtherInst)) - OtherGep = cast(OtherLd->getPointerOperand()); - else - OtherGep = cast( - cast(OtherInst)->getPointerOperand()); - ClonedGep->intersectOptionalDataWith(OtherGep); - combineKnownMetadata(ClonedGep, OtherGep); - } - Repl->replaceUsesOfWith(Gep, ClonedGep); + makeGepsAvailable(Repl, HoistPt, InstructionsToHoist, Gep); - // Also copy Val when it is a GEP. - if (Val && isa(Val)) { - Instruction *ClonedVal = Val->clone(); - ClonedVal->insertBefore(HoistPt->getTerminator()); - // Conservatively discard any optimization hints, they may differ on the - // other paths. - for (Instruction *OtherInst : InstructionsToHoist) { - auto *OtherVal = - cast(cast(OtherInst)->getValueOperand()); - ClonedVal->intersectOptionalDataWith(OtherVal); - combineKnownMetadata(ClonedVal, OtherVal); - } - Repl->replaceUsesOfWith(Val, ClonedVal); - } + if (Val && isa(Val)) + makeGepsAvailable(Repl, HoistPt, InstructionsToHoist, Val); return true; } @@ -695,14 +735,14 @@ Repl = InstructionsToHoist.front(); // We can move Repl in HoistPt only when all operands are available. + // When not HoistingGeps we need to copy the GEPs now. // The order in which hoistings are done may influence the availability // of operands. - if (!allOperandsAvailable(Repl, HoistPt) && - !makeOperandsAvailable(Repl, HoistPt, InstructionsToHoist)) + if (!allOperandsAvailable(Repl, HoistPt) && !HoistingGeps && + !makeGepOperandsAvailable(Repl, HoistPt, InstructionsToHoist)) continue; + Repl->moveBefore(HoistPt->getTerminator()); - // TBAA may differ on one of the other paths, we need to get rid of - // anything which might conflict. } if (isa(Repl)) @@ -783,7 +823,7 @@ break; } CI.insert(Call, VN); - } else if (OptForMinSize || !isa(&I1)) + } else if (HoistingGeps || !isa(&I1)) // Do not hoist scalars past calls that may write to memory because // that could result in spills later. geps are handled separately. // TODO: We can relax this for targets like AArch64 as they have more Index: llvm/trunk/test/Transforms/GVN/hoist-recursive-geps.ll =================================================================== --- llvm/trunk/test/Transforms/GVN/hoist-recursive-geps.ll +++ llvm/trunk/test/Transforms/GVN/hoist-recursive-geps.ll @@ -0,0 +1,103 @@ +; RUN: opt -gvn-hoist -S < %s | FileCheck %s + +; Check that recursive GEPs are hoisted. +; CHECK-LABEL: @fun +; CHECK: fdiv +; CHECK: load +; CHECK: load +; CHECK: load +; CHECK: load +; CHECK: fsub +; CHECK: fmul +; CHECK: fsub +; CHECK: fmul +; CHECK-NOT: fsub +; CHECK-NOT: fmul + +%0 = type { double, double, double } +%1 = type { double, double, double } +%2 = type { %3, %1, %1 } +%3 = type { i32 (...)**, %4, %10*, %11, %11, %11, %11, %11, %11, %11, %11, %11 } +%4 = type { %5 } +%5 = type { %6 } +%6 = type { %7 } +%7 = type { %8 } +%8 = type { %9 } +%9 = type { i64, i64, i8* } +%10 = type <{ i32 (...)**, i32, [4 x i8] }> +%11 = type { [4 x [4 x double]] } +%12 = type <{ %1, %0, i32, [4 x i8] }> +%13 = type { %1, %0, %12, %3*, %14* } +%14 = type opaque + +@d = external global %0, align 8 +@p = external global %1, align 8 + +define zeroext i1 @fun(%2*, %12* dereferenceable(56), double*, %13*) { + %5 = alloca %2*, align 8 + %6 = alloca %12*, align 8 + %7 = alloca double*, align 8 + %8 = alloca %13*, align 8 + %9 = alloca double, align 8 + %10 = alloca double, align 8 + %11 = alloca double, align 8 + %12 = alloca double, align 8 + %13 = alloca double, align 8 + %14 = alloca double, align 8 + %15 = alloca double, align 8 + store %2* %0, %2** %5, align 8 + store %12* %1, %12** %6, align 8 + store double* %2, double** %7, align 8 + store %13* %3, %13** %8, align 8 + %16 = load %2*, %2** %5, align 8 + %17 = load double, double* getelementptr inbounds (%0, %0* @d, i32 0, i32 0), align 8 + %18 = fdiv double 1.000000e+00, %17 + store double %18, double* %15, align 8 + %19 = load double, double* %15, align 8 + %20 = fcmp oge double %19, 0.000000e+00 + br i1 %20, label %21, label %36 + +;