Index: lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCalls.cpp +++ lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -3776,11 +3776,29 @@ II->setOperand(0, UndefValue::get(Old->getType())); return II; } + case Intrinsic::amdgcn_readfirstlane: case Intrinsic::amdgcn_readlane: { // A constant value is trivially uniform. if (Constant *C = dyn_cast(II->getArgOperand(0))) return replaceInstUsesWith(*II, C); + + // readfirstlane (readfirstlane x) -> readfirstlane x + // readlane (readfirstlane x), y -> readfirstlane x + if (match(II->getArgOperand(0), m_Intrinsic())) + return replaceInstUsesWith(*II, II->getArgOperand(0)); + + if (IID == Intrinsic::amdgcn_readfirstlane) { + // readfirstlane (readlane x, y) -> readlane x, y + if (match(II->getArgOperand(0), m_Intrinsic())) + return replaceInstUsesWith(*II, II->getArgOperand(0)); + } else { + // readlane (readlane x, y), y -> readlane x, y + if (match(II->getArgOperand(0), m_Intrinsic( + m_Value(), m_Specific(II->getArgOperand(1))))) + return replaceInstUsesWith(*II, II->getArgOperand(0)); + } + break; } case Intrinsic::stackrestore: { Index: test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll =================================================================== --- test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll +++ test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll @@ -2462,6 +2462,27 @@ ret void } +define i32 @readfirstlane_idempotent(i32 %arg) { +; CHECK-LABEL: @readfirstlane_idempotent( +; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[ARG:%.*]]) +; CHECK-NEXT: ret i32 [[READ0]] +; + %read0 = call i32 @llvm.amdgcn.readfirstlane(i32 %arg) + %read1 = call i32 @llvm.amdgcn.readfirstlane(i32 %read0) + %read2 = call i32 @llvm.amdgcn.readfirstlane(i32 %read1) + ret i32 %read2 +} + +define i32 @readfirstlane_readlane(i32 %arg) { +; CHECK-LABEL: @readfirstlane_readlane( +; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[ARG:%.*]]) +; CHECK-NEXT: ret i32 [[READ0]] +; + %read0 = call i32 @llvm.amdgcn.readfirstlane(i32 %arg) + %read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 0) + ret i32 %read1 +} + ; -------------------------------------------------------------------- ; llvm.amdgcn.readlane ; -------------------------------------------------------------------- @@ -2491,6 +2512,37 @@ ret void } +define i32 @readlane_idempotent(i32 %arg, i32 %lane) { +; CHECK-LABEL: @readlane_idempotent( +; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[ARG:%.*]], i32 [[LANE:%.*]]) +; CHECK-NEXT: ret i32 [[READ0]] +; + %read0 = call i32 @llvm.amdgcn.readlane(i32 %arg, i32 %lane) + %read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 %lane) + ret i32 %read1 +} + +define i32 @readlane_idempotent_different_lanes(i32 %arg, i32 %lane0, i32 %lane1) { +; CHECK-LABEL: @readlane_idempotent_different_lanes( +; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[ARG:%.*]], i32 [[LANE0:%.*]]) +; CHECK-NEXT: [[READ1:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[READ0]], i32 [[LANE1:%.*]]) +; CHECK-NEXT: ret i32 [[READ1]] +; + %read0 = call i32 @llvm.amdgcn.readlane(i32 %arg, i32 %lane0) + %read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 %lane1) + ret i32 %read1 +} + +define i32 @readlane_readfirstlane(i32 %arg) { +; CHECK-LABEL: @readlane_readfirstlane( +; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[ARG:%.*]]) +; CHECK-NEXT: ret i32 [[READ0]] +; + %read0 = call i32 @llvm.amdgcn.readfirstlane(i32 %arg) + %read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 0) + ret i32 %read1 +} + ; -------------------------------------------------------------------- ; llvm.amdgcn.update.dpp.i32 ; --------------------------------------------------------------------