diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -18,6 +18,7 @@ #include "llvm/Analysis/Loads.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/PatternMatch.h" @@ -1611,6 +1612,17 @@ if (!OtherBr || BBI == OtherBB->begin()) return false; + auto OtherStoreIsMergeable = [&](StoreInst *OtherStore) -> bool { + if (!OtherStore || + OtherStore->getPointerOperand() != SI.getPointerOperand()) + return false; + + auto *SIVTy = SI.getValueOperand()->getType(); + auto *OSVTy = OtherStore->getValueOperand()->getType(); + return CastInst::isBitOrNoopPointerCastable(OSVTy, SIVTy, DL) && + SI.hasSameSpecialState(OtherStore); + }; + // If the other block ends in an unconditional branch, check for the 'if then // else' case. There is an instruction before the branch. StoreInst *OtherStore = nullptr; @@ -1626,8 +1638,7 @@ // If this isn't a store, isn't a store to the same location, or is not the // right kind of store, bail out. OtherStore = dyn_cast(BBI); - if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) || - !SI.isSameOperationAs(OtherStore)) + if (!OtherStoreIsMergeable(OtherStore)) return false; } else { // Otherwise, the other block ended with a conditional branch. If one of the @@ -1642,8 +1653,7 @@ for (;; --BBI) { // Check to see if we find the matching store. if ((OtherStore = dyn_cast(BBI))) { - if (OtherStore->getOperand(1) != SI.getOperand(1) || - !SI.isSameOperationAs(OtherStore)) + if (!OtherStoreIsMergeable(OtherStore)) return false; break; } @@ -1664,14 +1674,17 @@ } // Insert a PHI node now if we need it. - Value *MergedVal = OtherStore->getOperand(0); + Value *MergedVal = OtherStore->getValueOperand(); // The debug locations of the original instructions might differ. Merge them. DebugLoc MergedLoc = DILocation::getMergedLocation(SI.getDebugLoc(), OtherStore->getDebugLoc()); - if (MergedVal != SI.getOperand(0)) { - PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge"); - PN->addIncoming(SI.getOperand(0), SI.getParent()); - PN->addIncoming(OtherStore->getOperand(0), OtherBB); + if (MergedVal != SI.getValueOperand()) { + PHINode *PN = + PHINode::Create(SI.getValueOperand()->getType(), 2, "storemerge"); + PN->addIncoming(SI.getValueOperand(), SI.getParent()); + Builder.SetInsertPoint(OtherStore); + PN->addIncoming(Builder.CreateBitOrPointerCast(MergedVal, PN->getType()), + OtherBB); MergedVal = InsertNewInstBefore(PN, DestBB->front()); PN->setDebugLoc(MergedLoc); } diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/merging-stores-into-successor.ll b/llvm/test/Transforms/InstCombine/AMDGPU/merging-stores-into-successor.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/AMDGPU/merging-stores-into-successor.ll @@ -0,0 +1,33 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -S -passes=instcombine -o - %s | FileCheck %s + +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" +target triple = "amdgcn-amd-amdhsa" + +define ptr @inttoptr_merge(i1 %cond, i64 %a, ptr %b) { +; CHECK-LABEL: define ptr @inttoptr_merge +; CHECK-SAME: (i1 [[COND:%.*]], i64 [[A:%.*]], ptr [[B:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[COND]], label [[BB0:%.*]], label [[BB1:%.*]] +; CHECK: BB0: +; CHECK-NEXT: [[TMP0:%.*]] = inttoptr i64 [[A]] to ptr +; CHECK-NEXT: br label [[SINK:%.*]] +; CHECK: BB1: +; CHECK-NEXT: br label [[SINK]] +; CHECK: sink: +; CHECK-NEXT: [[STOREMERGE:%.*]] = phi ptr [ [[B]], [[BB1]] ], [ [[TMP0]], [[BB0]] ] +; CHECK-NEXT: ret ptr [[STOREMERGE]] +; +entry: + %alloca = alloca ptr + br i1 %cond, label %BB0, label %BB1 +BB0: + store i64 %a, ptr %alloca + br label %sink +BB1: + store ptr %b, ptr %alloca + br label %sink +sink: + %val = load ptr, ptr %alloca + ret ptr %val +} diff --git a/llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll b/llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll --- a/llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll +++ b/llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll @@ -71,3 +71,205 @@ bb12: ; preds = %bb10, %bb9 ret void } + +define half @diff_types_same_width_merge(i1 %cond, half %a, i16 %b) { +; CHECK-LABEL: @diff_types_same_width_merge( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[COND:%.*]], label [[BB0:%.*]], label [[BB1:%.*]] +; CHECK: BB0: +; CHECK-NEXT: br label [[SINK:%.*]] +; CHECK: BB1: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16 [[B:%.*]] to half +; CHECK-NEXT: br label [[SINK]] +; CHECK: sink: +; CHECK-NEXT: [[STOREMERGE:%.*]] = phi half [ [[TMP0]], [[BB1]] ], [ [[A:%.*]], [[BB0]] ] +; CHECK-NEXT: ret half [[STOREMERGE]] +; +entry: + %alloca = alloca half + br i1 %cond, label %BB0, label %BB1 +BB0: + store half %a, ptr %alloca + br label %sink +BB1: + store i16 %b, ptr %alloca + br label %sink +sink: + %val = load half, ptr %alloca + ret half %val +} + +define i32 @diff_types_diff_width_no_merge(i1 %cond, i32 %a, i64 %b) { +; CHECK-LABEL: @diff_types_diff_width_no_merge( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca i64, align 8 +; CHECK-NEXT: br i1 [[COND:%.*]], label [[A:%.*]], label [[B:%.*]] +; CHECK: A: +; CHECK-NEXT: store i32 [[A:%.*]], ptr [[ALLOCA]], align 8 +; CHECK-NEXT: br label [[SINK:%.*]] +; CHECK: B: +; CHECK-NEXT: store i64 [[B:%.*]], ptr [[ALLOCA]], align 8 +; CHECK-NEXT: br label [[SINK]] +; CHECK: sink: +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ALLOCA]], align 8 +; CHECK-NEXT: ret i32 [[VAL]] +; +entry: + %alloca = alloca i64 + br i1 %cond, label %A, label %B +A: + store i32 %a, ptr %alloca + br label %sink +B: + store i64 %b, ptr %alloca + br label %sink +sink: + %val = load i32, ptr %alloca + ret i32 %val +} + +define <4 x i32> @vec_no_merge(i1 %cond, <2 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: @vec_no_merge( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca i64, align 16 +; CHECK-NEXT: br i1 [[COND:%.*]], label [[A:%.*]], label [[B:%.*]] +; CHECK: A: +; CHECK-NEXT: store <2 x i32> [[A:%.*]], ptr [[ALLOCA]], align 16 +; CHECK-NEXT: br label [[SINK:%.*]] +; CHECK: B: +; CHECK-NEXT: store <4 x i32> [[B:%.*]], ptr [[ALLOCA]], align 16 +; CHECK-NEXT: br label [[SINK]] +; CHECK: sink: +; CHECK-NEXT: [[VAL:%.*]] = load <4 x i32>, ptr [[ALLOCA]], align 16 +; CHECK-NEXT: ret <4 x i32> [[VAL]] +; +entry: + %alloca = alloca i64 + br i1 %cond, label %A, label %B +A: + store <2 x i32> %a, ptr %alloca + br label %sink +B: + store <4 x i32> %b, ptr %alloca + br label %sink +sink: + %val = load <4 x i32>, ptr %alloca + ret <4 x i32> %val +} + +%struct.half = type { half }; + +define %struct.half @one_elem_struct_merge(i1 %cond, %struct.half %a, half %b) { +; CHECK-LABEL: @one_elem_struct_merge( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[COND:%.*]], label [[BB0:%.*]], label [[BB1:%.*]] +; CHECK: BB0: +; CHECK-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_HALF:%.*]] [[A:%.*]], 0 +; CHECK-NEXT: br label [[SINK:%.*]] +; CHECK: BB1: +; CHECK-NEXT: br label [[SINK]] +; CHECK: sink: +; CHECK-NEXT: [[STOREMERGE:%.*]] = phi half [ [[TMP0]], [[BB0]] ], [ [[B:%.*]], [[BB1]] ] +; CHECK-NEXT: [[VAL1:%.*]] = insertvalue [[STRUCT_HALF]] poison, half [[STOREMERGE]], 0 +; CHECK-NEXT: ret [[STRUCT_HALF]] [[VAL1]] +; +entry: + %alloca = alloca i64 + br i1 %cond, label %BB0, label %BB1 +BB0: + store %struct.half %a, ptr %alloca + br label %sink +BB1: + store half %b, ptr %alloca + br label %sink +sink: + %val = load %struct.half, ptr %alloca + ret %struct.half %val +} + +%struct.tup = type { half, i32 }; + +define %struct.tup @multi_elem_struct_no_merge(i1 %cond, %struct.tup %a, half %b) { +; CHECK-LABEL: @multi_elem_struct_no_merge( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca i64, align 8 +; CHECK-NEXT: br i1 [[COND:%.*]], label [[A:%.*]], label [[B:%.*]] +; CHECK: A: +; CHECK-NEXT: store [[STRUCT_TUP:%.*]] [[A:%.*]], ptr [[ALLOCA]], align 8 +; CHECK-NEXT: br label [[SINK:%.*]] +; CHECK: B: +; CHECK-NEXT: store half [[B:%.*]], ptr [[ALLOCA]], align 8 +; CHECK-NEXT: br label [[SINK]] +; CHECK: sink: +; CHECK-NEXT: [[VAL:%.*]] = load [[STRUCT_TUP]], ptr [[ALLOCA]], align 8 +; CHECK-NEXT: ret [[STRUCT_TUP]] [[VAL]] +; +entry: + %alloca = alloca i64 + br i1 %cond, label %A, label %B +A: + store %struct.tup %a, ptr %alloca + br label %sink +B: + store half %b, ptr %alloca + br label %sink +sink: + %val = load %struct.tup, ptr %alloca + ret %struct.tup %val +} + +define i16 @same_types_diff_align_no_merge(i1 %cond, i16 %a, i16 %b) { +; CHECK-LABEL: @same_types_diff_align_no_merge( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca i16, align 4 +; CHECK-NEXT: br i1 [[COND:%.*]], label [[BB0:%.*]], label [[BB1:%.*]] +; CHECK: BB0: +; CHECK-NEXT: store i16 [[A:%.*]], ptr [[ALLOCA]], align 8 +; CHECK-NEXT: br label [[SINK:%.*]] +; CHECK: BB1: +; CHECK-NEXT: store i16 [[B:%.*]], ptr [[ALLOCA]], align 4 +; CHECK-NEXT: br label [[SINK]] +; CHECK: sink: +; CHECK-NEXT: [[VAL:%.*]] = load i16, ptr [[ALLOCA]], align 4 +; CHECK-NEXT: ret i16 [[VAL]] +; +entry: + %alloca = alloca i16, align 4 + br i1 %cond, label %BB0, label %BB1 +BB0: + store i16 %a, ptr %alloca, align 8 + br label %sink +BB1: + store i16 %b, ptr %alloca, align 4 + br label %sink +sink: + %val = load i16, ptr %alloca + ret i16 %val +} + +define i64 @ptrtoint_merge(i1 %cond, i64 %a, ptr %b) { +; CHECK-LABEL: @ptrtoint_merge( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[COND:%.*]], label [[BB0:%.*]], label [[BB1:%.*]] +; CHECK: BB0: +; CHECK-NEXT: br label [[SINK:%.*]] +; CHECK: BB1: +; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[B:%.*]] to i64 +; CHECK-NEXT: br label [[SINK]] +; CHECK: sink: +; CHECK-NEXT: [[STOREMERGE:%.*]] = phi i64 [ [[A:%.*]], [[BB0]] ], [ [[TMP0]], [[BB1]] ] +; CHECK-NEXT: ret i64 [[STOREMERGE]] +; +entry: + %alloca = alloca ptr + br i1 %cond, label %BB0, label %BB1 +BB0: + store i64 %a, ptr %alloca + br label %sink +BB1: + store ptr %b, ptr %alloca + br label %sink +sink: + %val = load i64, ptr %alloca + ret i64 %val +}