diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -5810,6 +5810,77 @@ return ::SimplifyFreezeInst(Op0, Q); } +static Constant *ConstructLoadOperandConstant(Value *Op) { + SmallVector Worklist; + Worklist.push_back(Op); + while (true) { + Value *CurOp = Worklist.back(); + if (isa(CurOp)) + break; + if (auto *BC = dyn_cast(CurOp)) { + Worklist.push_back(BC->getOperand(0)); + } else if (auto *GEP = dyn_cast(CurOp)) { + for (unsigned I = 1; I != GEP->getNumOperands(); ++I) { + if (!isa(GEP->getOperand(I))) + return nullptr; + } + Worklist.push_back(GEP->getOperand(0)); + } else if (auto *II = dyn_cast(CurOp)) { + if (II->getIntrinsicID() != Intrinsic::strip_invariant_group && + II->getIntrinsicID() != Intrinsic::launder_invariant_group) + return nullptr; + Worklist.push_back(II->getOperand(0)); + } else { + return nullptr; + } + } + + Constant *NewOp = cast(Worklist.pop_back_val()); + while (!Worklist.empty()) { + Value *CurOp = Worklist.pop_back_val(); + if (isa(CurOp)) { + NewOp = ConstantExpr::getBitCast(NewOp, CurOp->getType()); + } else if (auto *GEP = dyn_cast(CurOp)) { + SmallVector Idxs; + Idxs.reserve(GEP->getNumOperands() - 1); + for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I) { + Idxs.push_back(cast(GEP->getOperand(I))); + } + NewOp = ConstantExpr::getGetElementPtr(GEP->getSourceElementType(), NewOp, + Idxs, GEP->isInBounds(), + GEP->getInRangeIndex()); + } else { + assert(isa(CurOp) && "expected intrinsic"); + assert((cast(CurOp)->getIntrinsicID() == + Intrinsic::launder_invariant_group || + cast(CurOp)->getIntrinsicID() == + Intrinsic::strip_invariant_group) && + "expected invariant.group intrinsic"); + NewOp = ConstantExpr::getBitCast(NewOp, CurOp->getType()); + } + } + return NewOp; +} + +static Value *SimplifyLoadInst(LoadInst *LI, const SimplifyQuery &Q) { + if (LI->isVolatile()) + return nullptr; + + if (auto *C = ConstantFoldInstruction(LI, Q.DL)) + return C; + + // The following only catches more cases than ConstantFoldInstruction() if the + // load operand wasn't a constant. Specifically, invariant.group intrinsics. + if (isa(LI->getPointerOperand())) + return nullptr; + + if (auto *C = dyn_cast_or_null( + ConstructLoadOperandConstant(LI->getPointerOperand()))) + return ConstantFoldLoadFromConstPtr(C, LI->getType(), Q.DL); + + return nullptr; +} + /// See if we can compute a simplified version of this instruction. /// If not, this returns null. @@ -5966,6 +6037,9 @@ // No simplifications for Alloca and it can't be constant folded. Result = nullptr; break; + case Instruction::Load: + Result = SimplifyLoadInst(cast(I), Q); + break; } /// If called on unreachable code, the above logic may report that the diff --git a/llvm/test/Transforms/InstSimplify/invariant.group-load.ll b/llvm/test/Transforms/InstSimplify/invariant.group-load.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstSimplify/invariant.group-load.ll @@ -0,0 +1,80 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=instsimplify -S < %s | FileCheck %s + +@A = linkonce_odr hidden constant { i64, i64 } { i64 2, i64 3 } +@B = linkonce_odr hidden global { i64, i64 } { i64 2, i64 3 } + +declare i8* @llvm.strip.invariant.group.p0i8(i8* %p) +declare i8* @llvm.launder.invariant.group.p0i8(i8* %p) + +define i64 @f() { +; CHECK-LABEL: @f( +; CHECK-NEXT: ret i64 3 +; + %p = bitcast { i64, i64 }* @A to i8* + %a = call i8* @llvm.strip.invariant.group.p0i8(i8* %p) + %b = getelementptr i8, i8* %a, i32 8 + %c = bitcast i8* %b to i64* + %d = load i64, i64* %c + ret i64 %d +} + +define i64 @g() { +; CHECK-LABEL: @g( +; CHECK-NEXT: ret i64 3 +; + %p = bitcast { i64, i64 }* @A to i8* + %a = call i8* @llvm.launder.invariant.group.p0i8(i8* %p) + %b = getelementptr i8, i8* %a, i32 8 + %c = bitcast i8* %b to i64* + %d = load i64, i64* %c + ret i64 %d +} + +define i64 @notconstantglobal() { +; CHECK-LABEL: @notconstantglobal( +; CHECK-NEXT: [[A:%.*]] = call i8* @llvm.launder.invariant.group.p0i8(i8* bitcast ({ i64, i64 }* @B to i8*)) +; CHECK-NEXT: [[B:%.*]] = getelementptr i8, i8* [[A]], i32 8 +; CHECK-NEXT: [[C:%.*]] = bitcast i8* [[B]] to i64* +; CHECK-NEXT: [[D:%.*]] = load i64, i64* [[C]], align 4 +; CHECK-NEXT: ret i64 [[D]] +; + %p = bitcast { i64, i64 }* @B to i8* + %a = call i8* @llvm.launder.invariant.group.p0i8(i8* %p) + %b = getelementptr i8, i8* %a, i32 8 + %c = bitcast i8* %b to i64* + %d = load i64, i64* %c + ret i64 %d +} + +define i64 @notconstantgepindex(i32 %i) { +; CHECK-LABEL: @notconstantgepindex( +; CHECK-NEXT: [[A:%.*]] = call i8* @llvm.launder.invariant.group.p0i8(i8* bitcast ({ i64, i64 }* @A to i8*)) +; CHECK-NEXT: [[B:%.*]] = getelementptr i8, i8* [[A]], i32 [[I:%.*]] +; CHECK-NEXT: [[C:%.*]] = bitcast i8* [[B]] to i64* +; CHECK-NEXT: [[D:%.*]] = load i64, i64* [[C]], align 4 +; CHECK-NEXT: ret i64 [[D]] +; + %p = bitcast { i64, i64 }* @A to i8* + %a = call i8* @llvm.launder.invariant.group.p0i8(i8* %p) + %b = getelementptr i8, i8* %a, i32 %i + %c = bitcast i8* %b to i64* + %d = load i64, i64* %c + ret i64 %d +} + +define i64 @volatile() { +; CHECK-LABEL: @volatile( +; CHECK-NEXT: [[A:%.*]] = call i8* @llvm.launder.invariant.group.p0i8(i8* bitcast ({ i64, i64 }* @A to i8*)) +; CHECK-NEXT: [[B:%.*]] = getelementptr i8, i8* [[A]], i32 8 +; CHECK-NEXT: [[C:%.*]] = bitcast i8* [[B]] to i64* +; CHECK-NEXT: [[D:%.*]] = load volatile i64, i64* [[C]], align 4 +; CHECK-NEXT: ret i64 [[D]] +; + %p = bitcast { i64, i64 }* @A to i8* + %a = call i8* @llvm.launder.invariant.group.p0i8(i8* %p) + %b = getelementptr i8, i8* %a, i32 8 + %c = bitcast i8* %b to i64* + %d = load volatile i64, i64* %c + ret i64 %d +}