Index: llvm/lib/Analysis/Loads.cpp =================================================================== --- llvm/lib/Analysis/Loads.cpp +++ llvm/lib/Analysis/Loads.cpp @@ -664,6 +664,37 @@ return nullptr; } +static bool mayOverlapLoadedMemory(const StoreInst *Store, const LoadInst *Load, + const DataLayout &DL) { + if (Store->getValueOperand() != Load) { + return true; + } + auto *Ty = Load->getType(); + if (Ty->isScalableTy()) { + return true; + } + auto AlignValue = Load->getAlign().value(); + if (Store->getAlign().value() != AlignValue) { + return true; + } + auto TyStoreSize = DL.getTypeStoreSize(Ty).getFixedSize(); + if (TyStoreSize > AlignValue) { + return true; + } + return false; +} + +static bool writesUnchangedLoadedValueToSameLocation(const Instruction *Inst, + const Value *Available, + const DataLayout &DL) { + auto *Store = dyn_cast_or_null(Inst); + auto *Load = dyn_cast_or_null(Available); + if (!Store || !Load || mayOverlapLoadedMemory(Store, Load, DL)) { + return false; + } + return true; +} + Value *llvm::FindAvailableLoadedValue(LoadInst *Load, AAResults &AA, bool *IsLoadCSE, unsigned MaxInstsToScan) { @@ -701,9 +732,13 @@ // did not modify the memory location. if (Available) { MemoryLocation Loc = MemoryLocation::get(Load); - for (Instruction *Inst : MustNotAliasInsts) + for (Instruction *Inst : MustNotAliasInsts) { + if (writesUnchangedLoadedValueToSameLocation(Inst, Available, DL)) { + continue; + } if (isModSet(AA.getModRefInfo(Inst, Loc))) return nullptr; + } } return Available; Index: llvm/test/Transforms/InstCombine/load-elimination.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/InstCombine/load-elimination.ll @@ -0,0 +1,71 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=instcombine -S < %s | FileCheck %s + +define void @load_elimination_i8(ptr noundef %a, ptr noundef %b, ptr noundef %c) { +; CHECK-LABEL: @load_elimination_i8( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[C:%.*]], align 1 +; CHECK-NEXT: store i8 [[TMP0]], ptr [[A:%.*]], align 1 +; CHECK-NEXT: store i8 [[TMP0]], ptr [[B:%.*]], align 1 +; CHECK-NEXT: ret void +; +entry: + %0 = load i8, ptr %c, align 1 + store i8 %0, ptr %a, align 1 + %1 = load i8, ptr %c, align 1 + store i8 %1, ptr %b, align 1 + ret void +} + +define void @load_elimination_i32(ptr noundef %a, ptr noundef %b, ptr noundef %c) { +; CHECK-LABEL: @load_elimination_i32( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[C:%.*]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[A:%.*]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[B:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %0 = load i32, ptr %c, align 4 + store i32 %0, ptr %a, align 4 + %1 = load i32, ptr %c, align 4 + store i32 %1, ptr %b, align 4 + ret void +} + +define void @load_elimination_i32_i8(ptr noundef %a, ptr noundef %b, ptr noundef %c) { +; CHECK-LABEL: @load_elimination_i32_i8( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[C:%.*]], align 1 +; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP0]] to i32 +; CHECK-NEXT: store i32 [[CONV]], ptr [[A:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[C]], align 1 +; CHECK-NEXT: [[CONV1:%.*]] = zext i8 [[TMP1]] to i32 +; CHECK-NEXT: store i32 [[CONV1]], ptr [[B:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %0 = load i8, ptr %c, align 1 + %conv = zext i8 %0 to i32 + store i32 %conv, ptr %a, align 4 + %1 = load i8, ptr %c, align 1 + %conv1 = zext i8 %1 to i32 + store i32 %conv1, ptr %b, align 4 + ret void +} + +define void @load_elimination_float(ptr %x, ptr %a, ptr %b, ptr %c) { +; CHECK-LABEL: @load_elimination_float( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[X1:%.*]] = load float, ptr [[X:%.*]], align 4 +; CHECK-NEXT: store float [[X1]], ptr [[A:%.*]], align 4 +; CHECK-NEXT: store float [[X1]], ptr [[C:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %x1 = load float, ptr %x + store float %x1, ptr %a + %x2 = load float, ptr %x + store float %x2, ptr %c + ret void +} Index: llvm/test/Transforms/InstCombine/load.ll =================================================================== --- llvm/test/Transforms/InstCombine/load.ll +++ llvm/test/Transforms/InstCombine/load.ll @@ -213,9 +213,7 @@ ; CHECK-NEXT: [[X1:%.*]] = load float, ptr [[X:%.*]], align 4 ; CHECK-NEXT: store float [[X1]], ptr [[A:%.*]], align 4 ; CHECK-NEXT: store float [[X1]], ptr [[B:%.*]], align 4 -; CHECK-NEXT: [[X2:%.*]] = load float, ptr [[X]], align 4 -; CHECK-NEXT: store float [[X2]], ptr [[B]], align 4 -; CHECK-NEXT: store float [[X2]], ptr [[C:%.*]], align 4 +; CHECK-NEXT: store float [[X1]], ptr [[C:%.*]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -238,9 +236,7 @@ ; CHECK-NEXT: [[X1:%.*]] = load <4 x i8>, ptr [[X:%.*]], align 4 ; CHECK-NEXT: store <4 x i8> [[X1]], ptr [[A:%.*]], align 4 ; CHECK-NEXT: store <4 x i8> [[X1]], ptr [[B:%.*]], align 4 -; CHECK-NEXT: [[X2:%.*]] = load <4 x i8>, ptr [[X]], align 4 -; CHECK-NEXT: store <4 x i8> [[X2]], ptr [[B]], align 4 -; CHECK-NEXT: store <4 x i8> [[X2]], ptr [[C:%.*]], align 4 +; CHECK-NEXT: store <4 x i8> [[X1]], ptr [[C:%.*]], align 4 ; CHECK-NEXT: ret void ; entry: