diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp --- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -539,6 +539,12 @@ bool GepHasConstantOffset = true; for (User::const_op_iterator I = GEPOp->op_begin() + 1, E = GEPOp->op_end(); I != E; ++I, ++GTI) { + Type *GEPIdxedTy = GTI.getIndexedType(); + if (GEPIdxedTy->isVectorTy() && GEPIdxedTy->getVectorIsScalable()) { + GepHasConstantOffset = false; + break; + } + const Value *Index = *I; // Compute the (potentially symbolic) offset in bytes for this index. if (StructType *STy = GTI.getStructTypeOrNull()) { @@ -557,15 +563,16 @@ if (CIdx->isZero()) continue; Decomposed.OtherOffset += - (DL.getTypeAllocSize(GTI.getIndexedType()) * - CIdx->getValue().sextOrSelf(MaxPointerSize)) - .sextOrTrunc(MaxPointerSize); + (DL.getTypeAllocSize(GEPIdxedTy).getFixedSize() * + CIdx->getValue().sextOrSelf(MaxPointerSize)) + .sextOrTrunc(MaxPointerSize); continue; } GepHasConstantOffset = false; - APInt Scale(MaxPointerSize, DL.getTypeAllocSize(GTI.getIndexedType())); + APInt Scale(MaxPointerSize, + DL.getTypeAllocSize(GEPIdxedTy).getFixedSize()); unsigned ZExtBits = 0, SExtBits = 0; // If the integer type is smaller than the pointer size, it is implicitly diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1877,7 +1877,7 @@ // If the element type has zero size then any index over it is equivalent // to an index of zero, so replace it with zero if it is not zero already. Type *EltTy = GTI.getIndexedType(); - if (EltTy->isSized() && DL.getTypeAllocSize(EltTy) == 0) + if (EltTy->isSized() && !DL.getTypeAllocSize(EltTy).isNonZero()) if (!isa(*I) || !match(I->get(), m_Zero())) { *I = Constant::getNullValue(NewIndexType); MadeChange = true; diff --git a/llvm/lib/Transforms/Utils/VNCoercion.cpp b/llvm/lib/Transforms/Utils/VNCoercion.cpp --- a/llvm/lib/Transforms/Utils/VNCoercion.cpp +++ b/llvm/lib/Transforms/Utils/VNCoercion.cpp @@ -20,17 +20,19 @@ // If the loaded or stored value is an first class array or struct, don't try // to transform them. We need to be able to bitcast to integer. if (LoadTy->isStructTy() || LoadTy->isArrayTy() || StoredTy->isStructTy() || - StoredTy->isArrayTy()) + StoredTy->isArrayTy() || + (LoadTy->isVectorTy() && LoadTy->getVectorIsScalable()) || + (StoredTy->isVectorTy() && StoredTy->getVectorIsScalable())) return false; - uint64_t StoreSize = DL.getTypeSizeInBits(StoredTy); + uint64_t StoreSize = DL.getTypeSizeInBits(StoredTy).getFixedSize(); // The store size must be byte-aligned to support future type casts. if (llvm::alignTo(StoreSize, 8) != StoreSize) return false; // The store has to be at least as big as the load. - if (StoreSize < DL.getTypeSizeInBits(LoadTy)) + if (StoreSize < DL.getTypeSizeInBits(LoadTy).getFixedSize()) return false; // Don't coerce non-integral pointers to integers or vice versa. @@ -59,8 +61,8 @@ // If this is already the right type, just return it. Type *StoredValTy = StoredVal->getType(); - uint64_t StoredValSize = DL.getTypeSizeInBits(StoredValTy); - uint64_t LoadedValSize = DL.getTypeSizeInBits(LoadedTy); + uint64_t StoredValSize = DL.getTypeSizeInBits(StoredValTy).getFixedSize(); + uint64_t LoadedValSize = DL.getTypeSizeInBits(LoadedTy).getFixedSize(); // If the store and reload are the same size, we can always reuse it. if (StoredValSize == LoadedValSize) { @@ -112,8 +114,8 @@ // If this is a big-endian system, we need to shift the value down to the low // bits so that a truncate will work. if (DL.isBigEndian()) { - uint64_t ShiftAmt = DL.getTypeStoreSizeInBits(StoredValTy) - - DL.getTypeStoreSizeInBits(LoadedTy); + uint64_t ShiftAmt = DL.getTypeStoreSizeInBits(StoredValTy).getFixedSize() - + DL.getTypeStoreSizeInBits(LoadedTy).getFixedSize(); StoredVal = Helper.CreateLShr( StoredVal, ConstantInt::get(StoredVal->getType(), ShiftAmt)); } @@ -162,7 +164,8 @@ const DataLayout &DL) { // If the loaded or stored value is a first class array or struct, don't try // to transform them. We need to be able to bitcast to integer. - if (LoadTy->isStructTy() || LoadTy->isArrayTy()) + if (LoadTy->isStructTy() || LoadTy->isArrayTy() || + (LoadTy->isVectorTy() && LoadTy->getVectorIsScalable())) return -1; int64_t StoreOffset = 0, LoadOffset = 0; @@ -180,7 +183,7 @@ // If the load and store don't overlap at all, the store doesn't provide // anything to the load. In this case, they really don't alias at all, AA // must have gotten confused. - uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy); + uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy).getFixedSize(); if ((WriteSizeInBits & 7) | (LoadSize & 7)) return -1; @@ -216,8 +219,9 @@ auto *StoredVal = DepSI->getValueOperand(); // Cannot handle reading from store of first-class aggregate yet. - if (StoredVal->getType()->isStructTy() || - StoredVal->getType()->isArrayTy()) + if (StoredVal->getType()->isStructTy() || StoredVal->getType()->isArrayTy() || + (StoredVal->getType()->isVectorTy() && + StoredVal->getType()->getVectorIsScalable())) return -1; // Don't coerce non-integral pointers to integers or vice versa. @@ -231,7 +235,7 @@ Value *StorePtr = DepSI->getPointerOperand(); uint64_t StoreSize = - DL.getTypeSizeInBits(DepSI->getValueOperand()->getType()); + DL.getTypeSizeInBits(DepSI->getValueOperand()->getType()).getFixedSize(); return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, StorePtr, StoreSize, DL); } @@ -336,7 +340,7 @@ return -1; Value *DepPtr = DepLI->getPointerOperand(); - uint64_t DepSize = DL.getTypeSizeInBits(DepLI->getType()); + uint64_t DepSize = DL.getTypeSizeInBits(DepLI->getType()).getFixedSize(); int R = analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, DL); if (R != -1) return R; @@ -346,7 +350,7 @@ int64_t LoadOffs = 0; const Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, DL); - unsigned LoadSize = DL.getTypeStoreSize(LoadTy); + unsigned LoadSize = DL.getTypeStoreSize(LoadTy).getFixedSize(); unsigned Size = getLoadLoadClobberFullWidthSize(LoadBase, LoadOffs, LoadSize, DepLI); @@ -436,8 +440,9 @@ return SrcVal; } - uint64_t StoreSize = (DL.getTypeSizeInBits(SrcVal->getType()) + 7) / 8; - uint64_t LoadSize = (DL.getTypeSizeInBits(LoadTy) + 7) / 8; + uint64_t StoreSize = + (DL.getTypeSizeInBits(SrcVal->getType()).getFixedSize() + 7) / 8; + uint64_t LoadSize = (DL.getTypeSizeInBits(LoadTy).getFixedSize() + 7) / 8; // Compute which bits of the stored value are being used by the load. Convert // to an integer type to start with. if (SrcVal->getType()->isPtrOrPtrVectorTy()) @@ -489,8 +494,9 @@ Instruction *InsertPt, const DataLayout &DL) { // If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to // widen SrcVal out to a larger load. - unsigned SrcValStoreSize = DL.getTypeStoreSize(SrcVal->getType()); - unsigned LoadSize = DL.getTypeStoreSize(LoadTy); + unsigned SrcValStoreSize = + DL.getTypeStoreSize(SrcVal->getType()).getFixedSize(); + unsigned LoadSize = DL.getTypeStoreSize(LoadTy).getFixedSize(); if (Offset + LoadSize > SrcValStoreSize) { assert(SrcVal->isSimple() && "Cannot widen volatile/atomic load!"); assert(SrcVal->getType()->isIntegerTy() && "Can't widen non-integer load"); @@ -533,8 +539,9 @@ Constant *getConstantLoadValueForLoad(Constant *SrcVal, unsigned Offset, Type *LoadTy, const DataLayout &DL) { - unsigned SrcValStoreSize = DL.getTypeStoreSize(SrcVal->getType()); - unsigned LoadSize = DL.getTypeStoreSize(LoadTy); + unsigned SrcValStoreSize = + DL.getTypeStoreSize(SrcVal->getType()).getFixedSize(); + unsigned LoadSize = DL.getTypeStoreSize(LoadTy).getFixedSize(); if (Offset + LoadSize > SrcValStoreSize) return nullptr; return getConstantStoreValueForLoad(SrcVal, Offset, LoadTy, DL); @@ -545,7 +552,7 @@ Type *LoadTy, HelperClass &Helper, const DataLayout &DL) { LLVMContext &Ctx = LoadTy->getContext(); - uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy) / 8; + uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy).getFixedSize() / 8; // We know that this method is only called when the mem transfer fully // provides the bits for the load. diff --git a/llvm/test/Transforms/GVN/vscale.ll b/llvm/test/Transforms/GVN/vscale.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/GVN/vscale.ll @@ -0,0 +1,337 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S < %s -basicaa -gvn -dce | FileCheck %s + +; Analyze Load from clobbering Load. + +define @load_store_clobber_load( *%p) { +; CHECK-LABEL: @load_store_clobber_load( +; CHECK-NEXT: [[LOAD1:%.*]] = load , * [[P:%.*]] +; CHECK-NEXT: store zeroinitializer, * undef +; CHECK-NEXT: [[ADD:%.*]] = add [[LOAD1]], [[LOAD1]] +; CHECK-NEXT: ret [[ADD]] +; + %load1 = load , * %p + store zeroinitializer, * undef + %load2 = load , * %p ; <- load to be eliminated + %add = add %load1, %load2 + ret %add +} + +define @load_store_clobber_load_mayalias(* %p, * %p2) { +; CHECK-LABEL: @load_store_clobber_load_mayalias( +; CHECK-NEXT: [[LOAD1:%.*]] = load , * [[P:%.*]] +; CHECK-NEXT: store zeroinitializer, * [[P2:%.*]] +; CHECK-NEXT: [[LOAD2:%.*]] = load , * [[P]] +; CHECK-NEXT: [[SUB:%.*]] = sub [[LOAD1]], [[LOAD2]] +; CHECK-NEXT: ret [[SUB]] +; + %load1 = load , * %p + store zeroinitializer, * %p2 + %load2 = load , * %p + %sub = sub %load1, %load2 + ret %sub +} + +define @load_store_clobber_load_noalias(* noalias %p, * noalias %p2) { +; CHECK-LABEL: @load_store_clobber_load_noalias( +; CHECK-NEXT: [[LOAD1:%.*]] = load , * [[P:%.*]] +; CHECK-NEXT: store zeroinitializer, * [[P2:%.*]] +; CHECK-NEXT: [[ADD:%.*]] = add [[LOAD1]], [[LOAD1]] +; CHECK-NEXT: ret [[ADD]] +; + %load1 = load , * %p + store zeroinitializer, * %p2 + %load2 = load , * %p ; <- load to be eliminated + %add = add %load1, %load2 + ret %add +} + +; TODO: %load2 could be eliminated +define i32 @load_clobber_load_gep1(* %p) { +; CHECK-LABEL: @load_clobber_load_gep1( +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr , * [[P:%.*]], i64 0, i64 1 +; CHECK-NEXT: [[LOAD1:%.*]] = load i32, i32* [[GEP1]] +; CHECK-NEXT: [[P2:%.*]] = bitcast * [[P]] to i32* +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i32, i32* [[P2]], i64 1 +; CHECK-NEXT: [[LOAD2:%.*]] = load i32, i32* [[GEP2]] +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[LOAD1]], [[LOAD2]] +; CHECK-NEXT: ret i32 [[ADD]] +; + %gep1 = getelementptr , * %p, i64 0, i64 1 + %load1 = load i32, i32* %gep1 + %p2 = bitcast * %p to i32* + %gep2 = getelementptr i32, i32* %p2, i64 1 + %load2 = load i32, i32* %gep2 ; <- load could be eliminated + %add = add i32 %load1, %load2 + ret i32 %add +} + +define i32 @load_clobber_load_gep2(* %p) { +; CHECK-LABEL: @load_clobber_load_gep2( +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr , * [[P:%.*]], i64 1, i64 0 +; CHECK-NEXT: [[LOAD1:%.*]] = load i32, i32* [[GEP1]] +; CHECK-NEXT: [[P2:%.*]] = bitcast * [[P]] to i32* +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i32, i32* [[P2]], i64 4 +; CHECK-NEXT: [[LOAD2:%.*]] = load i32, i32* [[GEP2]] +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[LOAD1]], [[LOAD2]] +; CHECK-NEXT: ret i32 [[ADD]] +; + %gep1 = getelementptr , * %p, i64 1, i64 0 + %load1 = load i32, i32* %gep1 + %p2 = bitcast * %p to i32* + %gep2 = getelementptr i32, i32* %p2, i64 4 + %load2 = load i32, i32* %gep2 ; <- can not determine at compile-time if %load1 and %load2 are same addr + %add = add i32 %load1, %load2 + ret i32 %add +} + +define i32 @load_clobber_load_gep3(* %p) { +; CHECK-LABEL: @load_clobber_load_gep3( +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr , * [[P:%.*]], i64 1, i64 0 +; CHECK-NEXT: [[LOAD1:%.*]] = load i32, i32* [[GEP1]] +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[LOAD1]], [[LOAD1]] +; CHECK-NEXT: ret i32 [[ADD]] +; + %gep1 = getelementptr , * %p, i64 1, i64 0 + %load1 = load i32, i32* %gep1 + %p2 = bitcast * %p to * + %gep2 = getelementptr , * %p2, i64 1, i64 0 + %load2 = load float, float* %gep2 ; <- load to be eliminated + %cast = bitcast float %load2 to i32 + %add = add i32 %load1, %cast + ret i32 %add +} + +define @load_clobber_load_fence(* %p) { +; CHECK-LABEL: @load_clobber_load_fence( +; CHECK-NEXT: [[LOAD1:%.*]] = load , * [[P:%.*]] +; CHECK-NEXT: call void asm "", "~{memory}"() +; CHECK-NEXT: [[LOAD2:%.*]] = load , * [[P]] +; CHECK-NEXT: [[SUB:%.*]] = sub [[LOAD1]], [[LOAD2]] +; CHECK-NEXT: ret [[SUB]] +; + %load1 = load , * %p + call void asm "", "~{memory}"() + %load2 = load , * %p + %sub = sub %load1, %load2 + ret %sub +} + +define @load_clobber_load_sideeffect(* %p) { +; CHECK-LABEL: @load_clobber_load_sideeffect( +; CHECK-NEXT: [[LOAD1:%.*]] = load , * [[P:%.*]] +; CHECK-NEXT: call void asm sideeffect "", ""() +; CHECK-NEXT: [[LOAD2:%.*]] = load , * [[P]] +; CHECK-NEXT: [[ADD:%.*]] = add [[LOAD1]], [[LOAD2]] +; CHECK-NEXT: ret [[ADD]] +; + %load1 = load , * %p + call void asm sideeffect "", ""() + %load2 = load , * %p + %add = add %load1, %load2 + ret %add +} + +; Analyze Load from clobbering Store. + +define @store_forward_to_load(* %p) { +; CHECK-LABEL: @store_forward_to_load( +; CHECK-NEXT: store zeroinitializer, * [[P:%.*]] +; CHECK-NEXT: ret zeroinitializer +; + store zeroinitializer, * %p + %load = load , * %p + ret %load +} + +define @store_forward_to_load_sideeffect(* %p) { +; CHECK-LABEL: @store_forward_to_load_sideeffect( +; CHECK-NEXT: store zeroinitializer, * [[P:%.*]] +; CHECK-NEXT: call void asm sideeffect "", ""() +; CHECK-NEXT: [[LOAD:%.*]] = load , * [[P]] +; CHECK-NEXT: ret [[LOAD]] +; + store zeroinitializer, * %p + call void asm sideeffect "", ""() + %load = load , * %p + ret %load +} + +define i32 @store_clobber_load() { +; CHECK-LABEL: @store_clobber_load( +; CHECK-NEXT: [[ALLOC:%.*]] = alloca +; CHECK-NEXT: store undef, * [[ALLOC]] +; CHECK-NEXT: [[PTR:%.*]] = getelementptr , * [[ALLOC]], i32 0, i32 1 +; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[PTR]] +; CHECK-NEXT: ret i32 [[LOAD]] +; + %alloc = alloca + store undef, * %alloc + %ptr = getelementptr , * %alloc, i32 0, i32 1 + %load = load i32, i32* %ptr + ret i32 %load +} + +; Analyze Load from clobbering MemInst. + +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) + +define i32 @memset_clobber_load( *%p) { +; CHECK-LABEL: @memset_clobber_load( +; CHECK-NEXT: [[CONV:%.*]] = bitcast * [[P:%.*]] to i8* +; CHECK-NEXT: tail call void @llvm.memset.p0i8.i64(i8* [[CONV]], i8 1, i64 200, i1 false) +; CHECK-NEXT: ret i32 16843009 +; + %conv = bitcast * %p to i8* + tail call void @llvm.memset.p0i8.i64(i8* %conv, i8 1, i64 200, i1 false) + %gep = getelementptr , * %p, i64 0, i64 5 + %load = load i32, i32* %gep + ret i32 %load +} + +define i32 @memset_clobber_load_vscaled_base( *%p) { +; CHECK-LABEL: @memset_clobber_load_vscaled_base( +; CHECK-NEXT: [[CONV:%.*]] = bitcast * [[P:%.*]] to i8* +; CHECK-NEXT: tail call void @llvm.memset.p0i8.i64(i8* [[CONV]], i8 1, i64 200, i1 false) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr , * [[P]], i64 1, i64 1 +; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[GEP]] +; CHECK-NEXT: ret i32 [[LOAD]] +; + %conv = bitcast * %p to i8* + tail call void @llvm.memset.p0i8.i64(i8* %conv, i8 1, i64 200, i1 false) + %gep = getelementptr , * %p, i64 1, i64 1 + %load = load i32, i32* %gep + ret i32 %load +} + +define i32 @memset_clobber_load_nonconst_index( *%p, i64 %idx1, i64 %idx2) { +; CHECK-LABEL: @memset_clobber_load_nonconst_index( +; CHECK-NEXT: [[CONV:%.*]] = bitcast * [[P:%.*]] to i8* +; CHECK-NEXT: tail call void @llvm.memset.p0i8.i64(i8* [[CONV]], i8 1, i64 200, i1 false) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr , * [[P]], i64 [[IDX1:%.*]], i64 [[IDX2:%.*]] +; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[GEP]] +; CHECK-NEXT: ret i32 [[LOAD]] +; + %conv = bitcast * %p to i8* + tail call void @llvm.memset.p0i8.i64(i8* %conv, i8 1, i64 200, i1 false) + %gep = getelementptr , * %p, i64 %idx1, i64 %idx2 + %load = load i32, i32* %gep + ret i32 %load +} + + +; Load elimination across BBs + +define * @load_from_alloc_replaced_with_undef() { +; CHECK-LABEL: @load_from_alloc_replaced_with_undef( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca +; CHECK-NEXT: br i1 undef, label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: store zeroinitializer, * [[A]] +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: ret * [[A]] +; +entry: + %a = alloca + %gep = getelementptr , * %a, i64 0, i64 1 + %load = load i32, i32* %gep ; <- load to be eliminated + %tobool = icmp eq i32 %load, 0 ; <- icmp to be eliminated + br i1 %tobool, label %if.end, label %if.then + +if.then: + store zeroinitializer, * %a + br label %if.end + +if.end: + ret * %a +} + +define i32 @redundant_load_elimination_1(* %p) { +; CHECK-LABEL: @redundant_load_elimination_1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[GEP:%.*]] = getelementptr , * [[P:%.*]], i64 1, i64 1 +; CHECK-NEXT: [[LOAD1:%.*]] = load i32, i32* [[GEP]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[LOAD1]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: ret i32 [[LOAD1]] +; +entry: + %gep = getelementptr , * %p, i64 1, i64 1 + %load1 = load i32, i32* %gep + %cmp = icmp eq i32 %load1, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + %load2 = load i32, i32* %gep ; <- load to be eliminated + %add = add i32 %load1, %load2 + br label %if.end + +if.end: + %result = phi i32 [ %add, %if.then ], [ %load1, %entry ] + ret i32 %result +} + +define void @redundant_load_elimination_2(i1 %c, * %p, i32* %q, %v) { +; CHECK-LABEL: @redundant_load_elimination_2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr , * [[P:%.*]], i64 1, i64 1 +; CHECK-NEXT: store i32 0, i32* [[GEP1]] +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr , * [[P]], i64 1, i64 0 +; CHECK-NEXT: store i32 1, i32* [[GEP2]] +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: store i32 0, i32* [[Q:%.*]] +; CHECK-NEXT: ret void +; CHECK: if.else: +; CHECK-NEXT: ret void +; +entry: + %gep1 = getelementptr , * %p, i64 1, i64 1 + store i32 0, i32* %gep1 + %gep2 = getelementptr , * %p, i64 1, i64 0 + store i32 1, i32* %gep2 + br i1 %c, label %if.else, label %if.then + +if.then: + %t = load i32, i32* %gep1 ; <- load to be eliminated + store i32 %t, i32* %q + ret void + +if.else: + ret void +} + +; TODO: load in if.then could have been eliminated +define void @missing_load_elimination(i1 %c, * %p, * %q, %v) { +; CHECK-LABEL: @missing_load_elimination( +; CHECK-NEXT: entry: +; CHECK-NEXT: store zeroinitializer, * [[P:%.*]] +; CHECK-NEXT: [[P1:%.*]] = getelementptr , * [[P]], i64 1 +; CHECK-NEXT: store [[V:%.*]], * [[P1]] +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[T:%.*]] = load , * [[P]] +; CHECK-NEXT: store [[T]], * [[Q:%.*]] +; CHECK-NEXT: ret void +; CHECK: if.else: +; CHECK-NEXT: ret void +; +entry: + store zeroinitializer, * %p + %p1 = getelementptr , * %p, i64 1 + store %v, * %p1 + br i1 %c, label %if.else, label %if.then + +if.then: + %t = load , * %p ; load could be eliminated + store %t, * %q + ret void + +if.else: + ret void +}