diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -18,6 +18,7 @@ #include "llvm/Analysis/Loads.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/PatternMatch.h" @@ -1571,6 +1572,40 @@ if (!OtherBr || BBI == OtherBB->begin()) return false; + // Given a BasicBlock iterator, return a nullptr if it does not point + // to a StoreInst or the ptr operand differs from that of SI's. Return + // the associated StoreInst or a new StoreInst with the value operand + // bitcasted to the type of the value operand in SI. + auto OtherStoreIsMergeable = [&](BasicBlock::iterator BBI) -> StoreInst * { + StoreInst *OtherStore = dyn_cast(BBI); + if (!OtherStore || + OtherStore->getPointerOperand() != SI.getPointerOperand()) + return nullptr; + if (SI.isSameOperationAs(OtherStore)) + return OtherStore; + + // Bitcast must be inserted as value operands differ between + // SI and OtherStore. Only allow bitcast between single value types + // with matching bitwidths. + auto *SIVTy = SI.getValueOperand()->getType(); + auto *OSVTy = OtherStore->getValueOperand()->getType(); + if (!SIVTy->isSingleValueType() || SIVTy->isVectorTy() || + !OSVTy->isSingleValueType() || OSVTy->isVectorTy() || + SIVTy->getPrimitiveSizeInBits() != OSVTy->getPrimitiveSizeInBits()) + return nullptr; + + // Construct a new StoreInst with the value bitcasted to the type + // of SI.getValueOperand() + auto *NewVal = BitCastInst::CreateBitOrPointerCast( + OtherStore->getValueOperand(), SIVTy, "", OtherStore); + auto *NewOtherStore = new StoreInst(NewVal, OtherStore->getPointerOperand(), + OtherStore->getParent()); + + // Finally, erase OtherStore since it's redundant at this point + eraseInstFromFunction(*OtherStore); + return NewOtherStore; + }; + // If the other block ends in an unconditional branch, check for the 'if then // else' case. There is an instruction before the branch. StoreInst *OtherStore = nullptr; @@ -1585,9 +1620,8 @@ } // If this isn't a store, isn't a store to the same location, or is not the // right kind of store, bail out. - OtherStore = dyn_cast(BBI); - if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) || - !SI.isSameOperationAs(OtherStore)) + OtherStore = OtherStoreIsMergeable(BBI); + if (!OtherStore) return false; } else { // Otherwise, the other block ended with a conditional branch. If one of the @@ -1601,12 +1635,10 @@ // lives in OtherBB. for (;; --BBI) { // Check to see if we find the matching store. - if ((OtherStore = dyn_cast(BBI))) { - if (OtherStore->getOperand(1) != SI.getOperand(1) || - !SI.isSameOperationAs(OtherStore)) - return false; - break; - } + OtherStore = OtherStoreIsMergeable(BBI); + if (!OtherStore) + return false; + // If we find something that may be using or overwriting the stored // value, or if we run out of instructions, we can't do the transform. if (BBI->mayReadFromMemory() || BBI->mayThrow() || diff --git a/llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll b/llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll --- a/llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll +++ b/llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll @@ -27,12 +27,12 @@ ; CHECK-NEXT: [[I11:%.*]] = trunc i64 [[I7]] to i32 ; CHECK-NEXT: br label [[BB12]] ; CHECK: bb12: -; CHECK-NEXT: [[STOREMERGE1:%.*]] = phi i32 [ [[I11]], [[BB10]] ], [ 1, [[BB9]] ] -; CHECK-NEXT: store i32 [[STOREMERGE1]], ptr @arr_2, align 4 +; CHECK-NEXT: [[STOREMERGE:%.*]] = phi i32 [ 1, [[BB9]] ], [ [[I11]], [[BB10]] ] +; CHECK-NEXT: store i32 [[STOREMERGE]], ptr @arr_2, align 4 ; CHECK-NEXT: store i16 [[I4]], ptr @arr_4, align 2 ; CHECK-NEXT: [[I8:%.*]] = sext i16 [[I4]] to i32 ; CHECK-NEXT: store i32 [[I8]], ptr @arr_3, align 16 -; CHECK-NEXT: store i32 [[STOREMERGE1]], ptr getelementptr inbounds ([0 x i32], ptr @arr_2, i64 0, i64 1), align 4 +; CHECK-NEXT: store i32 [[STOREMERGE]], ptr getelementptr inbounds ([0 x i32], ptr @arr_2, i64 0, i64 1), align 4 ; CHECK-NEXT: store i16 [[I4]], ptr getelementptr inbounds ([0 x i16], ptr @arr_4, i64 0, i64 1), align 2 ; CHECK-NEXT: store i32 [[I8]], ptr getelementptr inbounds ([8 x i32], ptr @arr_3, i64 0, i64 1), align 4 ; CHECK-NEXT: ret void @@ -71,3 +71,149 @@ bb12: ; preds = %bb10, %bb9 ret void } + +define half @diff_types_same_width_merge(i1 %cond, half %a, i16 %b) { +; CHECK-LABEL: @diff_types_same_width_merge( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[COND:%.*]], label [[BB0:%.*]], label [[BB1:%.*]] +; CHECK: BB0: +; CHECK-NEXT: br label [[SINK:%.*]] +; CHECK: BB1: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16 [[B:%.*]] to half +; CHECK-NEXT: br label [[SINK]] +; CHECK: sink: +; CHECK-NEXT: [[STOREMERGE:%.*]] = phi half [ [[TMP0]], [[BB1]] ], [ [[A:%.*]], [[BB0]] ] +; CHECK-NEXT: ret half [[STOREMERGE]] +; +entry: + %alloca = alloca half + br i1 %cond, label %BB0, label %BB1 +BB0: + store half %a, ptr %alloca + br label %sink +BB1: + store i16 %b, ptr %alloca + br label %sink +sink: + %val = load half, ptr %alloca + ret half %val +} + +define i32 @diff_types_diff_width_no_merge(i1 %cond, i32 %a, i64 %b) { +; CHECK-LABEL: @diff_types_diff_width_no_merge( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca i64, align 8 +; CHECK-NEXT: br i1 [[COND:%.*]], label [[A:%.*]], label [[B:%.*]] +; CHECK: A: +; CHECK-NEXT: store i32 [[A:%.*]], ptr [[ALLOCA]], align 8 +; CHECK-NEXT: br label [[SINK:%.*]] +; CHECK: B: +; CHECK-NEXT: store i64 [[B:%.*]], ptr [[ALLOCA]], align 8 +; CHECK-NEXT: br label [[SINK]] +; CHECK: sink: +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ALLOCA]], align 8 +; CHECK-NEXT: ret i32 [[VAL]] +; +entry: + %alloca = alloca i64 + br i1 %cond, label %A, label %B +A: + store i32 %a, ptr %alloca + br label %sink +B: + store i64 %b, ptr %alloca + br label %sink +sink: + %val = load i32, ptr %alloca + ret i32 %val +} + +define <4 x i32> @vec_no_merge(i1 %cond, <2 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: @vec_no_merge( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca i64, align 16 +; CHECK-NEXT: br i1 [[COND:%.*]], label [[A:%.*]], label [[B:%.*]] +; CHECK: A: +; CHECK-NEXT: store <2 x i32> [[A:%.*]], ptr [[ALLOCA]], align 16 +; CHECK-NEXT: br label [[SINK:%.*]] +; CHECK: B: +; CHECK-NEXT: store <4 x i32> [[B:%.*]], ptr [[ALLOCA]], align 16 +; CHECK-NEXT: br label [[SINK]] +; CHECK: sink: +; CHECK-NEXT: [[VAL:%.*]] = load <4 x i32>, ptr [[ALLOCA]], align 16 +; CHECK-NEXT: ret <4 x i32> [[VAL]] +; +entry: + %alloca = alloca i64 + br i1 %cond, label %A, label %B +A: + store <2 x i32> %a, ptr %alloca + br label %sink +B: + store <4 x i32> %b, ptr %alloca + br label %sink +sink: + %val = load <4 x i32>, ptr %alloca + ret <4 x i32> %val +} + +%struct.half = type { half }; + +define %struct.half @one_elem_struct_merge(i1 %cond, %struct.half %a, half %b) { +; CHECK-LABEL: @one_elem_struct_merge( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[COND:%.*]], label [[BB0:%.*]], label [[BB1:%.*]] +; CHECK: BB0: +; CHECK-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_HALF:%.*]] [[A:%.*]], 0 +; CHECK-NEXT: br label [[SINK:%.*]] +; CHECK: BB1: +; CHECK-NEXT: br label [[SINK]] +; CHECK: sink: +; CHECK-NEXT: [[STOREMERGE:%.*]] = phi half [ [[TMP0]], [[BB0]] ], [ [[B:%.*]], [[BB1]] ] +; CHECK-NEXT: [[VAL1:%.*]] = insertvalue [[STRUCT_HALF]] poison, half [[STOREMERGE]], 0 +; CHECK-NEXT: ret [[STRUCT_HALF]] [[VAL1]] +; +entry: + %alloca = alloca i64 + br i1 %cond, label %BB0, label %BB1 +BB0: + store %struct.half %a, ptr %alloca + br label %sink +BB1: + store half %b, ptr %alloca + br label %sink +sink: + %val = load %struct.half, ptr %alloca + ret %struct.half %val +} + +%struct.tup = type { half, i32 }; + +define %struct.tup @multi_elem_struct_no_merge(i1 %cond, %struct.tup %a, half %b) { +; CHECK-LABEL: @multi_elem_struct_no_merge( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca i64, align 8 +; CHECK-NEXT: br i1 [[COND:%.*]], label [[A:%.*]], label [[B:%.*]] +; CHECK: A: +; CHECK-NEXT: store [[STRUCT_TUP:%.*]] [[A:%.*]], ptr [[ALLOCA]], align 8 +; CHECK-NEXT: br label [[SINK:%.*]] +; CHECK: B: +; CHECK-NEXT: store half [[B:%.*]], ptr [[ALLOCA]], align 8 +; CHECK-NEXT: br label [[SINK]] +; CHECK: sink: +; CHECK-NEXT: [[VAL:%.*]] = load [[STRUCT_TUP]], ptr [[ALLOCA]], align 8 +; CHECK-NEXT: ret [[STRUCT_TUP]] [[VAL]] +; +entry: + %alloca = alloca i64 + br i1 %cond, label %A, label %B +A: + store %struct.tup %a, ptr %alloca + br label %sink +B: + store half %b, ptr %alloca + br label %sink +sink: + %val = load %struct.tup, ptr %alloca + ret %struct.tup %val +} diff --git a/llvm/test/Transforms/InstCombine/store.ll b/llvm/test/Transforms/InstCombine/store.ll --- a/llvm/test/Transforms/InstCombine/store.ll +++ b/llvm/test/Transforms/InstCombine/store.ll @@ -115,12 +115,15 @@ ; "if then" define i32 @test4(i1 %C) { ; CHECK-LABEL: @test4( +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 47, ptr [[A]], align 4 ; CHECK-NEXT: br i1 [[C:%.*]], label [[COND:%.*]], label [[CONT:%.*]] ; CHECK: Cond: +; CHECK-NEXT: store i32 -987654321, ptr [[A]], align 4 ; CHECK-NEXT: br label [[CONT]] ; CHECK: Cont: -; CHECK-NEXT: [[STOREMERGE:%.*]] = phi i32 [ -987654321, [[COND]] ], [ 47, [[TMP0:%.*]] ] -; CHECK-NEXT: ret i32 [[STOREMERGE]] +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[A]], align 4 +; CHECK-NEXT: ret i32 [[V]] ; %A = alloca i32 store i32 47, ptr %A @@ -138,12 +141,12 @@ ; "if then" define void @test5(i1 %C, ptr %P) { ; CHECK-LABEL: @test5( +; CHECK-NEXT: store i32 47, ptr [[P:%.*]], align 1 ; CHECK-NEXT: br i1 [[C:%.*]], label [[COND:%.*]], label [[CONT:%.*]] ; CHECK: Cond: +; CHECK-NEXT: store i32 -987654321, ptr [[P]], align 1 ; CHECK-NEXT: br label [[CONT]] ; CHECK: Cont: -; CHECK-NEXT: [[STOREMERGE:%.*]] = phi i32 [ -987654321, [[COND]] ], [ 47, [[TMP0:%.*]] ] -; CHECK-NEXT: store i32 [[STOREMERGE]], ptr [[P:%.*]], align 1 ; CHECK-NEXT: ret void ; store i32 47, ptr %P, align 1 @@ -340,6 +343,7 @@ define void @store_to_readonly_noalias(ptr readonly noalias %0) { ; CHECK-LABEL: @store_to_readonly_noalias( ; CHECK-NEXT: ret void +; store i32 3, ptr %0, align 4 ret void } diff --git a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll --- a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll @@ -499,14 +499,15 @@ ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VAR2:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]] +; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX5]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i32 [[ADD]], 42 ; CHECK-NEXT: br i1 [[TMP3]], label [[COND_STORE:%.*]], label [[LATCH]] ; CHECK: cond_store: ; CHECK-NEXT: [[TMP4:%.*]] = add nsw i32 [[ADD]], 1 +; CHECK-NEXT: store i32 [[TMP4]], ptr [[ARRAYIDX5]], align 4 ; CHECK-NEXT: br label [[LATCH]] ; CHECK: latch: ; CHECK-NEXT: [[TMP5]] = phi i32 [ [[TMP4]], [[COND_STORE]] ], [ [[ADD]], [[FOR_BODY3]] ] -; CHECK-NEXT: store i32 [[TMP5]], ptr [[ARRAYIDX5]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[ITR]]