diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -681,31 +681,6 @@ SCSrc_b32:$src1, 0, SCSrc_b32:$src2, VGPR_32:$vdst_in) >; -// Permlane intrinsic that has either fetch invalid or bound control -// fields enabled. -class BoundControlOrFetchInvalidPermlane : - PatFrag<(ops node:$vdst_in, node:$src0, node:$src1, node:$src2, - node:$fi, node:$bc), - (permlane node:$vdst_in, node:$src0, node: - $src1, node:$src2, node:$fi, node:$bc)> { - let PredicateCode = [{ return N->getConstantOperandVal(5) != 0 || - N->getConstantOperandVal(6) != 0; }]; - let GISelPredicateCode = [{ - return MI.getOperand(6).getImm() != 0 || - MI.getOperand(7).getImm() != 0; - }]; -} - -// Drop the input value if it won't be read. -class PermlaneDiscardVDstIn : GCNPat< - (permlane srcvalue, i32:$src0, i32:$src1, i32:$src2, - timm:$fi, timm:$bc), - (inst (as_i1timm $fi), VGPR_32:$src0, (as_i1timm $bc), - SCSrc_b32:$src1, 0, SCSrc_b32:$src2, - (IMPLICIT_DEF)) ->; - let SubtargetPredicate = isGFX10Plus in { let isCommutable = 1, isReMaterializable = 1 in { @@ -721,13 +696,6 @@ def : PermlanePat; def : PermlanePat; - def : PermlaneDiscardVDstIn< - BoundControlOrFetchInvalidPermlane, - V_PERMLANE16_B32_e64>; - def : PermlaneDiscardVDstIn< - BoundControlOrFetchInvalidPermlane, - V_PERMLANEX16_B32_e64>; - defm V_ADD_NC_U16 : VOP3Inst <"v_add_nc_u16", VOP3_Profile, add>; defm V_SUB_NC_U16 : VOP3Inst <"v_sub_nc_u16", VOP3_Profile, sub>; diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll @@ -34,7 +34,7 @@ ; GFX11-NEXT: global_store_b32 v1, v0, s[4:5] ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 0, i1 0) + %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) store i32 %v, ptr addrspace(1) %out ret void } @@ -64,7 +64,7 @@ ; GFX11-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 1, i32 2, i1 0, i1 0) + %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 1, i32 2, i1 false, i1 false) store i32 %v, ptr addrspace(1) %out ret void } @@ -97,7 +97,7 @@ ; GFX11-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 4660, i32 49617, i1 0, i1 0) + %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 4660, i32 49617, i1 false, i1 false) store i32 %v, ptr addrspace(1) %out ret void } @@ -156,7 +156,7 @@ ; GFX11-GISEL-NEXT: s_endpgm %tidx = call i32 @llvm.amdgcn.workitem.id.x() %tidy = call i32 @llvm.amdgcn.workitem.id.y() - %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 %tidx, i32 %tidy, i1 0, i1 0) + %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 %tidx, i32 %tidy, i1 false, i1 false) store i32 %v, ptr addrspace(1) %out ret void } @@ -209,7 +209,7 @@ ; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-GISEL-NEXT: s_endpgm %tidx = call i32 @llvm.amdgcn.workitem.id.x() - %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 %tidx, i32 %src2, i1 0, i1 0) + %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 %tidx, i32 %src2, i1 false, i1 false) store i32 %v, ptr addrspace(1) %out ret void } @@ -265,7 +265,7 @@ ; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-GISEL-NEXT: s_endpgm %tidy = call i32 @llvm.amdgcn.workitem.id.y() - %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 %src1, i32 %tidy, i1 0, i1 0) + %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 %src1, i32 %tidy, i1 false, i1 false) store i32 %v, ptr addrspace(1) %out ret void } @@ -295,7 +295,7 @@ ; GFX11-NEXT: global_store_b32 v1, v0, s[4:5] ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 1, i1 0) + %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 true, i1 false) store i32 %v, ptr addrspace(1) %out ret void } @@ -325,7 +325,7 @@ ; GFX11-NEXT: global_store_b32 v1, v0, s[4:5] ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 0, i1 1) + %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 true) store i32 %v, ptr addrspace(1) %out ret void } @@ -355,7 +355,7 @@ ; GFX11-NEXT: global_store_b32 v1, v0, s[4:5] ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 1, i1 1) + %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 true, i1 true) store i32 %v, ptr addrspace(1) %out ret void } @@ -385,7 +385,7 @@ ; GFX11-NEXT: global_store_b32 v1, v0, s[4:5] ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 0, i1 0) + %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) store i32 %v, ptr addrspace(1) %out ret void } @@ -415,7 +415,7 @@ ; GFX11-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 1, i32 2, i1 0, i1 0) + %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 1, i32 2, i1 false, i1 false) store i32 %v, ptr addrspace(1) %out ret void } @@ -448,7 +448,7 @@ ; GFX11-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 4660, i32 49617, i1 0, i1 0) + %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 4660, i32 49617, i1 false, i1 false) store i32 %v, ptr addrspace(1) %out ret void } @@ -507,7 +507,7 @@ ; GFX11-GISEL-NEXT: s_endpgm %tidx = call i32 @llvm.amdgcn.workitem.id.x() %tidy = call i32 @llvm.amdgcn.workitem.id.y() - %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 %tidx, i32 %tidy, i1 0, i1 0) + %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 %tidx, i32 %tidy, i1 false, i1 false) store i32 %v, ptr addrspace(1) %out ret void } @@ -560,7 +560,7 @@ ; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-GISEL-NEXT: s_endpgm %tidx = call i32 @llvm.amdgcn.workitem.id.x() - %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 %tidx, i32 %src2, i1 0, i1 0) + %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 %tidx, i32 %src2, i1 false, i1 false) store i32 %v, ptr addrspace(1) %out ret void } @@ -616,7 +616,7 @@ ; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-GISEL-NEXT: s_endpgm %tidy = call i32 @llvm.amdgcn.workitem.id.y() - %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 %src1, i32 %tidy, i1 0, i1 0) + %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 %src1, i32 %tidy, i1 false, i1 false) store i32 %v, ptr addrspace(1) %out ret void } @@ -646,7 +646,7 @@ ; GFX11-NEXT: global_store_b32 v1, v0, s[4:5] ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 1, i1 0) + %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 true, i1 false) store i32 %v, ptr addrspace(1) %out ret void } @@ -676,7 +676,7 @@ ; GFX11-NEXT: global_store_b32 v1, v0, s[4:5] ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 0, i1 1) + %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 true) store i32 %v, ptr addrspace(1) %out ret void } @@ -706,7 +706,7 @@ ; GFX11-NEXT: global_store_b32 v1, v0, s[4:5] ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 1, i1 1) + %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 true, i1 true) store i32 %v, ptr addrspace(1) %out ret void } @@ -735,7 +735,7 @@ ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %tidx = call i32 @llvm.amdgcn.workitem.id.x() - %v = call i32 @llvm.amdgcn.permlane16(i32 %tidx, i32 %tidx, i32 %src1, i32 %src2, i1 0, i1 0) + %v = call i32 @llvm.amdgcn.permlane16(i32 %tidx, i32 %tidx, i32 %src1, i32 %src2, i1 false, i1 false) store i32 %v, ptr addrspace(1) %out ret void } @@ -764,7 +764,8 @@ ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %tidx = call i32 @llvm.amdgcn.workitem.id.x() - %v = call i32 @llvm.amdgcn.permlane16(i32 undef, i32 %tidx, i32 %src1, i32 %src2, i1 0, i1 0) + %undef = freeze i32 poison + %v = call i32 @llvm.amdgcn.permlane16(i32 %undef, i32 %tidx, i32 %src1, i32 %src2, i1 false, i1 false) store i32 %v, ptr addrspace(1) %out ret void } @@ -821,7 +822,7 @@ ; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-GISEL-NEXT: s_endpgm %tidx = call i32 @llvm.amdgcn.workitem.id.x() - %v = call i32 @llvm.amdgcn.permlane16(i32 12345, i32 %tidx, i32 %src1, i32 %src2, i1 0, i1 0) + %v = call i32 @llvm.amdgcn.permlane16(i32 12345, i32 %tidx, i32 %src1, i32 %src2, i1 false, i1 false) store i32 %v, ptr addrspace(1) %out ret void } @@ -850,7 +851,8 @@ ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %tidx = call i32 @llvm.amdgcn.workitem.id.x() - %v = call i32 @llvm.amdgcn.permlane16(i32 12345, i32 %tidx, i32 %src1, i32 %src2, i1 1, i1 0) + %undef = freeze i32 poison + %v = call i32 @llvm.amdgcn.permlane16(i32 %undef, i32 %tidx, i32 %src1, i32 %src2, i1 true, i1 false) store i32 %v, ptr addrspace(1) %out ret void } @@ -879,7 +881,8 @@ ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %tidx = call i32 @llvm.amdgcn.workitem.id.x() - %v = call i32 @llvm.amdgcn.permlane16(i32 12345, i32 %tidx, i32 %src1, i32 %src2, i1 0, i1 1) + %undef = freeze i32 poison + %v = call i32 @llvm.amdgcn.permlane16(i32 %undef, i32 %tidx, i32 %src1, i32 %src2, i1 false, i1 true) store i32 %v, ptr addrspace(1) %out ret void } @@ -908,7 +911,8 @@ ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %tidx = call i32 @llvm.amdgcn.workitem.id.x() - %v = call i32 @llvm.amdgcn.permlane16(i32 12345, i32 %tidx, i32 %src1, i32 %src2, i1 1, i1 1) + %undef = freeze i32 poison + %v = call i32 @llvm.amdgcn.permlane16(i32 %undef, i32 %tidx, i32 %src1, i32 %src2, i1 true, i1 true) store i32 %v, ptr addrspace(1) %out ret void } @@ -937,7 +941,7 @@ ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %tidx = call i32 @llvm.amdgcn.workitem.id.x() - %v = call i32 @llvm.amdgcn.permlanex16(i32 %tidx, i32 %tidx, i32 %src1, i32 %src2, i1 0, i1 0) + %v = call i32 @llvm.amdgcn.permlanex16(i32 %tidx, i32 %tidx, i32 %src1, i32 %src2, i1 false, i1 false) store i32 %v, ptr addrspace(1) %out ret void } @@ -966,7 +970,8 @@ ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %tidx = call i32 @llvm.amdgcn.workitem.id.x() - %v = call i32 @llvm.amdgcn.permlanex16(i32 undef, i32 %tidx, i32 %src1, i32 %src2, i1 0, i1 0) + %undef = freeze i32 poison + %v = call i32 @llvm.amdgcn.permlanex16(i32 %undef, i32 %tidx, i32 %src1, i32 %src2, i1 false, i1 false) store i32 %v, ptr addrspace(1) %out ret void } @@ -1023,7 +1028,7 @@ ; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-GISEL-NEXT: s_endpgm %tidx = call i32 @llvm.amdgcn.workitem.id.x() - %v = call i32 @llvm.amdgcn.permlanex16(i32 12345, i32 %tidx, i32 %src1, i32 %src2, i1 0, i1 0) + %v = call i32 @llvm.amdgcn.permlanex16(i32 12345, i32 %tidx, i32 %src1, i32 %src2, i1 false, i1 false) store i32 %v, ptr addrspace(1) %out ret void } @@ -1052,7 +1057,8 @@ ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %tidx = call i32 @llvm.amdgcn.workitem.id.x() - %v = call i32 @llvm.amdgcn.permlanex16(i32 12345, i32 %tidx, i32 %src1, i32 %src2, i1 1, i1 0) + %undef = freeze i32 poison + %v = call i32 @llvm.amdgcn.permlanex16(i32 %undef, i32 %tidx, i32 %src1, i32 %src2, i1 true, i1 false) store i32 %v, ptr addrspace(1) %out ret void } @@ -1081,7 +1087,8 @@ ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %tidx = call i32 @llvm.amdgcn.workitem.id.x() - %v = call i32 @llvm.amdgcn.permlanex16(i32 12345, i32 %tidx, i32 %src1, i32 %src2, i1 0, i1 1) + %undef = freeze i32 poison + %v = call i32 @llvm.amdgcn.permlanex16(i32 %undef, i32 %tidx, i32 %src1, i32 %src2, i1 false, i1 true) store i32 %v, ptr addrspace(1) %out ret void } @@ -1110,7 +1117,8 @@ ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %tidx = call i32 @llvm.amdgcn.workitem.id.x() - %v = call i32 @llvm.amdgcn.permlanex16(i32 12345, i32 %tidx, i32 %src1, i32 %src2, i1 1, i1 1) + %undef = freeze i32 poison + %v = call i32 @llvm.amdgcn.permlanex16(i32 %undef, i32 %tidx, i32 %src1, i32 %src2, i1 true, i1 true) store i32 %v, ptr addrspace(1) %out ret void }