diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -18,6 +18,7 @@ #include "llvm/Analysis/Loads.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/PatternMatch.h" @@ -60,7 +61,8 @@ continue; } - if (isa(I) || isa(I)) { + if (isa(I) || isa(I) || + isa(I)) { // If uses of the bitcast are ok, we are ok. ValuesToInspect.emplace_back(I, IsOffset); continue; @@ -256,6 +258,12 @@ if (Load->isVolatile()) return false; Worklist.insert(Load); + } else if (auto *SI = dyn_cast(Inst)) { + Worklist.insert(cast(SI->getTrueValue())); + Worklist.insert(cast(SI->getFalseValue())); + Worklist.insert(Inst); + if (!collectUsers(*Inst)) + return false; } else if (isa(Inst) || isa(Inst)) { Worklist.insert(Inst); if (!collectUsers(*Inst)) @@ -313,6 +321,13 @@ IC.InsertNewInstWith(NewI, *BC); NewI->takeName(BC); WorkMap[BC] = NewI; + } else if (auto *SI = dyn_cast(I)) { + auto *NewSI = SelectInst::Create(SI->getCondition(), + getReplacement(SI->getTrueValue()), + getReplacement(SI->getFalseValue())); + IC.InsertNewInstWith(NewSI, *SI); + NewSI->takeName(SI); + WorkMap[SI] = NewSI; } else if (auto *MemCpy = dyn_cast(I)) { auto *SrcV = getReplacement(MemCpy->getRawSource()); // The pointer may appear in the destination of a copy, but we don't want to diff --git a/llvm/test/Transforms/InstCombine/replace-alloca-merge.ll b/llvm/test/Transforms/InstCombine/replace-alloca-merge.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/replace-alloca-merge.ll @@ -0,0 +1,45 @@ +; RUN: opt -passes=instcombine -S -o - %s | FileCheck %s + +target triple="amdgcn-amd-amdhsa" + +define i8 @remove_alloca_use_arg([32 x i8] addrspace(4)* noalias readonly align 4 dereferenceable(32) %arg, i1 %cond) { +; CHECK-LABEL: @remove_alloca_use_arg +; CHECK-NEXT: entry: +; CHECK-NOT: [[ALLOCA:%.*]] = alloca [32 x i8], align 4, addrspace(5) +; CHECK-NEXT: [[VAL1_PTR:%.*]] = getelementptr [32 x i8], [32 x i8] addrspace(4)* %arg, i64 0, i64 1 +; CHECK-NEXT: [[VAL2_PTR:%.*]] = getelementptr [32 x i8], [32 x i8] addrspace(4)* %arg, i64 0, i64 0 +; CHECK-NEXT: [[VAL1:%.*]] = load i8, i8 addrspace(4)* [[VAL1_PTR]], align 1 +; CHECK-NEXT: [[VAL2:%.*]] = load i8, i8 addrspace(4)* [[VAL2_PTR]], align 4 +; CHECK-NEXT: [[LOAD_VAL:%.*]] = select i1 %cond, i8 [[VAL1]], i8 [[VAL2]] +; CHECK-NEXT: ret i8 [[LOAD_VAL]] +entry: + %alloca = alloca [32 x i8], align 4, addrspace(5) + call void @llvm.memcpy.p5i8.p4i8.i64([32 x i8] addrspace(5)* %alloca, [32 x i8] addrspace(4)* %arg, i64 256, i1 false) + %val1 = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* %alloca, i32 0, i32 1 + %val2 = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* %alloca, i32 0, i32 0 + %ptr = select i1 %cond, i8 addrspace(5)* %val1, i8 addrspace(5)* %val2 + %load = load i8, i8 addrspace(5)* %ptr + ret i8 %load +} + +define i8 @volatile_load_keep_alloca([32 x i8] addrspace(4)* noalias readonly align 4 dereferenceable(32) %arg, i1 %cond) { +; CHECK-LABEL: @volatile_load_keep_alloca +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [32 x i8], align 4, addrspace(5) +; CHECK-NEXT: call void @llvm.memcpy.p5a32i8.p4a32i8.i64([32 x i8] addrspace(5)* noundef nonnull align 4 dereferenceable(256) [[ALLOCA]], [32 x i8] addrspace(4)* noundef align 4 dereferenceable(256) %arg, i64 256, i1 false) +; CHECK-NEXT: [[VAL1_PTR:%.*]] = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* [[ALLOCA]], i64 0, i64 1 +; CHECK-NEXT: [[VAL2_PTR:%.*]] = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* [[ALLOCA]], i64 0, i64 0 +; CHECK-NEXT: [[PTR:%.*]] = select i1 %cond, i8 addrspace(5)* [[VAL1_PTR]], i8 addrspace(5)* [[VAL2_PTR]] +; CHECK-NEXT: [[LOAD_VAL:%.*]] = load volatile i8, i8 addrspace(5)* [[PTR]] +; CHECK-NEXT: ret i8 [[LOAD_VAL]] +entry: + %alloca = alloca [32 x i8], align 4, addrspace(5) + call void @llvm.memcpy.p5i8.p4i8.i64([32 x i8] addrspace(5)* %alloca, [32 x i8] addrspace(4)* %arg, i64 256, i1 false) + %val1 = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* %alloca, i32 0, i32 1 + %val2 = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* %alloca, i32 0, i32 0 + %ptr = select i1 %cond, i8 addrspace(5)* %val1, i8 addrspace(5)* %val2 + %load = load volatile i8, i8 addrspace(5)* %ptr + ret i8 %load +} + +declare void @llvm.memcpy.p5i8.p4i8.i64([32 x i8] addrspace(5)*, [32 x i8] addrspace(4)*, i64, i1)