Index: llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -3959,6 +3959,21 @@ II->setOperand(0, UndefValue::get(Old->getType())); return II; } + case Intrinsic::amdgcn_permlane16: + case Intrinsic::amdgcn_permlanex16: { + // Discard vdst_in if it's not going to be read. + Value *VDstIn = II->getArgOperand(0); + if (isa(VDstIn)) + break; + + ConstantInt *FetchInvalid = cast(II->getArgOperand(4)); + ConstantInt *BoundCtrl = cast(II->getArgOperand(5)); + if (!FetchInvalid->getZExtValue() && !BoundCtrl->getZExtValue()) + break; + + II->setArgOperand(0, UndefValue::get(VDstIn->getType())); + return II; + } case Intrinsic::amdgcn_readfirstlane: case Intrinsic::amdgcn_readlane: { // A constant value is trivially uniform. Index: llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll =================================================================== --- llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll +++ llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll @@ -2655,5 +2655,83 @@ ret void } -; CHECK: attributes [[CONVERGENT]] = { convergent } +; -------------------------------------------------------------------- +; llvm.amdgcn.permlane16 +; -------------------------------------------------------------------- + +declare i32 @llvm.amdgcn.permlane16(i32, i32, i32, i32, i1 immarg, i1 immarg) + +define amdgpu_kernel void @permlane16(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) { +; CHECK-LABEL: @permlane16( +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.amdgcn.permlane16(i32 12345, i32 [[SRC0:%.*]], i32 [[SRC1:%.*]], i32 [[SRC2:%.*]], i1 false, i1 false) +; CHECK-NEXT: store i32 [[RES]], i32 addrspace(1)* [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; + %res = call i32 @llvm.amdgcn.permlane16(i32 12345, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) + store i32 %res, i32 addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @permlane16_bound_ctrl(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) { +; CHECK-LABEL: @permlane16_bound_ctrl( +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.amdgcn.permlane16(i32 undef, i32 [[SRC0:%.*]], i32 [[SRC1:%.*]], i32 [[SRC2:%.*]], i1 false, i1 true) +; CHECK-NEXT: store i32 [[RES]], i32 addrspace(1)* [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; + %res = call i32 @llvm.amdgcn.permlane16(i32 12345, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 true) + store i32 %res, i32 addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @permlane16_fetch_invalid_bound_ctrl(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) { +; CHECK-LABEL: @permlane16_fetch_invalid_bound_ctrl( +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.amdgcn.permlane16(i32 undef, i32 [[SRC0:%.*]], i32 [[SRC1:%.*]], i32 [[SRC2:%.*]], i1 true, i1 true) +; CHECK-NEXT: store i32 [[RES]], i32 addrspace(1)* [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; + %res = call i32 @llvm.amdgcn.permlane16(i32 12345, i32 %src0, i32 %src1, i32 %src2, i1 true, i1 true) + store i32 %res, i32 addrspace(1)* %out + ret void +} + +; -------------------------------------------------------------------- +; llvm.amdgcn.permlanex16 +; -------------------------------------------------------------------- + +declare i32 @llvm.amdgcn.permlanex16(i32, i32, i32, i32, i1 immarg, i1 immarg) + +define amdgpu_kernel void @permlanex16(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) { +; CHECK-LABEL: @permlanex16( +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.amdgcn.permlanex16(i32 12345, i32 [[SRC0:%.*]], i32 [[SRC1:%.*]], i32 [[SRC2:%.*]], i1 false, i1 false) +; CHECK-NEXT: store i32 [[RES]], i32 addrspace(1)* [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; + %res = call i32 @llvm.amdgcn.permlanex16(i32 12345, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) + store i32 %res, i32 addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @permlanex16_bound_ctrl(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) { +; CHECK-LABEL: @permlanex16_bound_ctrl( +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.amdgcn.permlanex16(i32 undef, i32 [[SRC0:%.*]], i32 [[SRC1:%.*]], i32 [[SRC2:%.*]], i1 false, i1 true) +; CHECK-NEXT: store i32 [[RES]], i32 addrspace(1)* [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; + %res = call i32 @llvm.amdgcn.permlanex16(i32 12345, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 true) + store i32 %res, i32 addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @permlanex16_fetch_invalid_bound_ctrl(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) { +; CHECK-LABEL: @permlanex16_fetch_invalid_bound_ctrl( +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.amdgcn.permlanex16(i32 undef, i32 [[SRC0:%.*]], i32 [[SRC1:%.*]], i32 [[SRC2:%.*]], i1 true, i1 true) +; CHECK-NEXT: store i32 [[RES]], i32 addrspace(1)* [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; + %res = call i32 @llvm.amdgcn.permlanex16(i32 12345, i32 %src0, i32 %src1, i32 %src2, i1 true, i1 true) + store i32 %res, i32 addrspace(1)* %out + ret void +} + +; CHECK: attributes [[CONVERGENT]] = { convergent }