Index: llvm/lib/Transforms/Scalar/GVNHoist.cpp =================================================================== --- llvm/lib/Transforms/Scalar/GVNHoist.cpp +++ llvm/lib/Transforms/Scalar/GVNHoist.cpp @@ -574,41 +574,60 @@ llvm_unreachable("Both I and J must be from same BB"); } + // Make all operands of the GEP available. + bool makeGepOperandsAvailable(GetElementPtrInst *Gep, BasicBlock *HoistPt, + Instruction *Repl) const { + for (unsigned i = 0, e = Gep->getNumOperands(); i != e; ++i) + if (Instruction *Op = dyn_cast(Gep->getOperand(i))) { + + // Check whether the operand is already available. + if (DT->dominates(Op->getParent(), HoistPt)) + continue; + + // As a GEP can refer to other GEPs, recursively check whether all the + // operands of this GEP can be computed at HoistPt. + if (GetElementPtrInst *GepOp = dyn_cast(Op)) + if (!makeGepOperandsAvailable(GepOp, HoistPt, Gep)) + return false; + + // Failed to make Op available. + return false; + } + + // Copy Gep and replace its uses in Repl with ClonedGep. + Instruction *ClonedGep = Gep->clone(); + ClonedGep->insertBefore(HoistPt->getTerminator()); + Repl->replaceUsesOfWith(Gep, ClonedGep); + + return true; + } + bool makeOperandsAvailable(Instruction *Repl, BasicBlock *HoistPt) const { // Check whether the GEP of a ld/st can be synthesized at HoistPt. - Instruction *Gep = nullptr; - Instruction *Val = nullptr; + GetElementPtrInst *Gep = nullptr; + Instruction *StoredValue = nullptr; if (auto *Ld = dyn_cast(Repl)) - Gep = dyn_cast(Ld->getPointerOperand()); + Gep = dyn_cast(Ld->getPointerOperand()); if (auto *St = dyn_cast(Repl)) { - Gep = dyn_cast(St->getPointerOperand()); - Val = dyn_cast(St->getValueOperand()); + Gep = dyn_cast(St->getPointerOperand()); + StoredValue = dyn_cast(St->getValueOperand()); } - if (!Gep || !isa(Gep)) - return false; - // Check whether we can compute the Gep at HoistPt. - if (!allOperandsAvailable(Gep, HoistPt)) + if (!Gep || !makeGepOperandsAvailable(Gep, HoistPt, Repl)) return false; - // Also check that the stored value is available. - if (Val && !allOperandsAvailable(Val, HoistPt)) - return false; - - // Copy the gep before moving the ld/st. - Instruction *ClonedGep = Gep->clone(); - ClonedGep->insertBefore(HoistPt->getTerminator()); - Repl->replaceUsesOfWith(Gep, ClonedGep); + // For loads there is nothing else to be done. + if (!StoredValue) + return true; - // Also copy Val. - if (Val) { - Instruction *ClonedVal = Val->clone(); - ClonedVal->insertBefore(HoistPt->getTerminator()); - Repl->replaceUsesOfWith(Val, ClonedVal); - } + // Copy StoredValue when it is a GEP: GEPs are not hoisted by default. + if (GetElementPtrInst *StGep = dyn_cast(StoredValue)) + if (makeGepOperandsAvailable(StGep, HoistPt, Repl)) + return true; - return true; + // Check that StoredValue is available. + return allOperandsAvailable(StoredValue, HoistPt); } std::pair hoist(HoistingPointList &HPL) { Index: llvm/test/Transforms/GVN/hoist-recursive-geps.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/GVN/hoist-recursive-geps.ll @@ -0,0 +1,103 @@ +; RUN: opt -gvn-hoist -S < %s | FileCheck %s + +; Check that recursive GEPs are hoisted. +; CHECK-LABEL: @fun +; CHECK: fdiv +; CHECK: load +; CHECK: load +; CHECK: load +; CHECK: load +; CHECK: fsub +; CHECK: fmul +; CHECK: fsub +; CHECK: fmul +; CHECK-NOT: fsub +; CHECK-NOT: fmul + +%0 = type { double, double, double } +%1 = type { double, double, double } +%2 = type { %3, %1, %1 } +%3 = type { i32 (...)**, %4, %10*, %11, %11, %11, %11, %11, %11, %11, %11, %11 } +%4 = type { %5 } +%5 = type { %6 } +%6 = type { %7 } +%7 = type { %8 } +%8 = type { %9 } +%9 = type { i64, i64, i8* } +%10 = type <{ i32 (...)**, i32, [4 x i8] }> +%11 = type { [4 x [4 x double]] } +%12 = type <{ %1, %0, i32, [4 x i8] }> +%13 = type { %1, %0, %12, %3*, %14* } +%14 = type opaque + +@d = external global %0, align 8 +@p = external global %1, align 8 + +define zeroext i1 @fun(%2*, %12* dereferenceable(56), double*, %13*) { + %5 = alloca %2*, align 8 + %6 = alloca %12*, align 8 + %7 = alloca double*, align 8 + %8 = alloca %13*, align 8 + %9 = alloca double, align 8 + %10 = alloca double, align 8 + %11 = alloca double, align 8 + %12 = alloca double, align 8 + %13 = alloca double, align 8 + %14 = alloca double, align 8 + %15 = alloca double, align 8 + store %2* %0, %2** %5, align 8 + store %12* %1, %12** %6, align 8 + store double* %2, double** %7, align 8 + store %13* %3, %13** %8, align 8 + %16 = load %2*, %2** %5, align 8 + %17 = load double, double* getelementptr inbounds (%0, %0* @d, i32 0, i32 0), align 8 + %18 = fdiv double 1.000000e+00, %17 + store double %18, double* %15, align 8 + %19 = load double, double* %15, align 8 + %20 = fcmp oge double %19, 0.000000e+00 + br i1 %20, label %21, label %36 + +;