Index: lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCalls.cpp +++ lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -3781,6 +3781,30 @@ // A constant value is trivially uniform. if (Constant *C = dyn_cast(II->getArgOperand(0))) return replaceInstUsesWith(*II, C); + + // The rest of these may not be safe if the exec may not be the same between + // the def and use. + Value *Src = II->getArgOperand(0); + Instruction *SrcInst = dyn_cast(Src); + if (SrcInst && SrcInst->getParent() != II->getParent()) + break; + + // readfirstlane (readfirstlane x) -> readfirstlane x + // readlane (readfirstlane x), y -> readfirstlane x + if (match(Src, m_Intrinsic())) + return replaceInstUsesWith(*II, Src); + + if (IID == Intrinsic::amdgcn_readfirstlane) { + // readfirstlane (readlane x, y) -> readlane x, y + if (match(Src, m_Intrinsic())) + return replaceInstUsesWith(*II, Src); + } else { + // readlane (readlane x, y), y -> readlane x, y + if (match(Src, m_Intrinsic( + m_Value(), m_Specific(II->getArgOperand(1))))) + return replaceInstUsesWith(*II, Src); + } + break; } case Intrinsic::stackrestore: { Index: test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll =================================================================== --- test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll +++ test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll @@ -2462,6 +2462,63 @@ ret void } +define i32 @readfirstlane_idempotent(i32 %arg) { +; CHECK-LABEL: @readfirstlane_idempotent( +; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[ARG:%.*]]) +; CHECK-NEXT: ret i32 [[READ0]] +; + %read0 = call i32 @llvm.amdgcn.readfirstlane(i32 %arg) + %read1 = call i32 @llvm.amdgcn.readfirstlane(i32 %read0) + %read2 = call i32 @llvm.amdgcn.readfirstlane(i32 %read1) + ret i32 %read2 +} + +define i32 @readfirstlane_readlane(i32 %arg) { +; CHECK-LABEL: @readfirstlane_readlane( +; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[ARG:%.*]]) +; CHECK-NEXT: ret i32 [[READ0]] +; + %read0 = call i32 @llvm.amdgcn.readfirstlane(i32 %arg) + %read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 0) + ret i32 %read1 +} + +define i32 @readfirstlane_readfirstlane_different_block(i32 %arg) { +; CHECK-LABEL: @readfirstlane_readfirstlane_different_block( +; CHECK-NEXT: bb0: +; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[ARG:%.*]]) +; CHECK-NEXT: br label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[READ1:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[READ0]]) +; CHECK-NEXT: ret i32 [[READ1]] +; +bb0: + %read0 = call i32 @llvm.amdgcn.readfirstlane(i32 %arg) + br label %bb1 + +bb1: + %read1 = call i32 @llvm.amdgcn.readfirstlane(i32 %read0) + ret i32 %read1 +} + +define i32 @readfirstlane_readlane_different_block(i32 %arg) { +; CHECK-LABEL: @readfirstlane_readlane_different_block( +; CHECK-NEXT: bb0: +; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[ARG:%.*]], i32 0) +; CHECK-NEXT: br label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[READ1:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[READ0]]) +; CHECK-NEXT: ret i32 [[READ1]] +; +bb0: + %read0 = call i32 @llvm.amdgcn.readlane(i32 %arg, i32 0) + br label %bb1 + +bb1: + %read1 = call i32 @llvm.amdgcn.readfirstlane(i32 %read0) + ret i32 %read1 +} + ; -------------------------------------------------------------------- ; llvm.amdgcn.readlane ; -------------------------------------------------------------------- @@ -2491,6 +2548,74 @@ ret void } +define i32 @readlane_idempotent(i32 %arg, i32 %lane) { +; CHECK-LABEL: @readlane_idempotent( +; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[ARG:%.*]], i32 [[LANE:%.*]]) +; CHECK-NEXT: ret i32 [[READ0]] +; + %read0 = call i32 @llvm.amdgcn.readlane(i32 %arg, i32 %lane) + %read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 %lane) + ret i32 %read1 +} + +define i32 @readlane_idempotent_different_lanes(i32 %arg, i32 %lane0, i32 %lane1) { +; CHECK-LABEL: @readlane_idempotent_different_lanes( +; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[ARG:%.*]], i32 [[LANE0:%.*]]) +; CHECK-NEXT: [[READ1:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[READ0]], i32 [[LANE1:%.*]]) +; CHECK-NEXT: ret i32 [[READ1]] +; + %read0 = call i32 @llvm.amdgcn.readlane(i32 %arg, i32 %lane0) + %read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 %lane1) + ret i32 %read1 +} + +define i32 @readlane_readfirstlane(i32 %arg) { +; CHECK-LABEL: @readlane_readfirstlane( +; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[ARG:%.*]]) +; CHECK-NEXT: ret i32 [[READ0]] +; + %read0 = call i32 @llvm.amdgcn.readfirstlane(i32 %arg) + %read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 0) + ret i32 %read1 +} + +define i32 @readlane_idempotent_different_block(i32 %arg, i32 %lane) { +; CHECK-LABEL: @readlane_idempotent_different_block( +; CHECK-NEXT: bb0: +; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[ARG:%.*]], i32 [[LANE:%.*]]) +; CHECK-NEXT: br label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[READ1:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[READ0]], i32 [[LANE]]) +; CHECK-NEXT: ret i32 [[READ1]] +; +bb0: + %read0 = call i32 @llvm.amdgcn.readlane(i32 %arg, i32 %lane) + br label %bb1 + +bb1: + %read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 %lane) + ret i32 %read1 +} + + +define i32 @readlane_readfirstlane_different_block(i32 %arg) { +; CHECK-LABEL: @readlane_readfirstlane_different_block( +; CHECK-NEXT: bb0: +; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[ARG:%.*]]) +; CHECK-NEXT: br label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[READ1:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[READ0]], i32 0) +; CHECK-NEXT: ret i32 [[READ1]] +; +bb0: + %read0 = call i32 @llvm.amdgcn.readfirstlane(i32 %arg) + br label %bb1 + +bb1: + %read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 0) + ret i32 %read1 +} + ; -------------------------------------------------------------------- ; llvm.amdgcn.update.dpp.i32 ; --------------------------------------------------------------------