Index: llvm/lib/Target/AMDGPU/VOP3Instructions.td =================================================================== --- llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -660,12 +660,9 @@ defm : IMAD32_Pats; def VOP3_PERMLANE_Profile : VOP3_Profile, VOP3_OPSEL> { - let Src0RC64 = VRegSrc_32; - let Src1RC64 = SCSrc_b32; - let Src2RC64 = SCSrc_b32; let InsVOP3OpSel = (ins IntOpSelMods:$src0_modifiers, VRegSrc_32:$src0, - IntOpSelMods:$src1_modifiers, SCSrc_b32:$src1, - IntOpSelMods:$src2_modifiers, SCSrc_b32:$src2, + IntOpSelMods:$src1_modifiers, SSrc_b32:$src1, + IntOpSelMods:$src2_modifiers, SSrc_b32:$src2, VGPR_32:$vdst_in, op_sel0:$op_sel); let HasClamp = 0; let HasExtVOP3DPP = 0; Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll +++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll @@ -27,9 +27,8 @@ ; GCN-LABEL: {{^}}v_permlane16_b32_vll: ; FIXME-GFX10PLUS: It is allowed to have both immediates as literals ; GFX10PLUS-DAG: s_movk_i32 [[SRC1:s[0-9]+]], 0x1234 -; GFX10PLUS-DAG: s_mov_b32 [[SRC2:s[0-9]+]], 0xc1d1 ; GFX10PLUS-NOT: v_readfirstlane_b32 -; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], [[SRC2]]{{$}} +; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], 0xc1d1{{$}} define amdgpu_kernel void @v_permlane16_b32_vll(i32 addrspace(1)* %out, i32 %src0) #1 { %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 4660, i32 49617, i1 0, i1 0) store i32 %v, i32 addrspace(1)* %out @@ -124,9 +123,8 @@ ; GCN-LABEL: {{^}}v_permlanex16_b32_vll: ; FIXME-GFX10PLUS: It is allowed to have both immediates as literals ; GFX10PLUS-DAG: s_movk_i32 [[SRC1:s[0-9]+]], 0x1234 -; GFX10PLUS-DAG: s_mov_b32 [[SRC2:s[0-9]+]], 0xc1d1 ; GFX10PLUS-NOT: v_readfirstlane_b32 -; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], [[SRC2]]{{$}} +; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], 0xc1d1{{$}} define amdgpu_kernel void @v_permlanex16_b32_vll(i32 addrspace(1)* %out, i32 %src0) #1 { %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 4660, i32 49617, i1 0, i1 0) store i32 %v, i32 addrspace(1)* %out Index: llvm/test/MC/AMDGPU/gfx10_asm_vop3.s =================================================================== --- llvm/test/MC/AMDGPU/gfx10_asm_vop3.s +++ llvm/test/MC/AMDGPU/gfx10_asm_vop3.s @@ -12797,6 +12797,9 @@ v_permlane16_b32 v5, v1, -4.0, s3 // GFX10: encoding: [0x05,0x00,0x77,0xd7,0x01,0xef,0x0d,0x00] +v_permlane16_b32 v5, v1, 0xaf123456, s3 +// GFX10: encoding: [0x05,0x00,0x77,0xd7,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf] + v_permlane16_b32 v5, v1, s2, s103 // GFX10: encoding: [0x05,0x00,0x77,0xd7,0x01,0x05,0x9c,0x01] @@ -12830,6 +12833,9 @@ v_permlane16_b32 v5, v1, s2, -4.0 // GFX10: encoding: [0x05,0x00,0x77,0xd7,0x01,0x05,0xdc,0x03] +v_permlane16_b32 v5, v1, s2, 0xaf123456 +// GFX10: encoding: [0x05,0x00,0x77,0xd7,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf] + v_permlane16_b32 v5, v1, s2, s3 op_sel:[1,0] // GFX10: encoding: [0x05,0x08,0x77,0xd7,0x01,0x05,0x0c,0x00] @@ -12923,6 +12929,9 @@ v_permlanex16_b32 v5, v1, -4.0, s3 // GFX10: encoding: [0x05,0x00,0x78,0xd7,0x01,0xef,0x0d,0x00] +v_permlanex16_b32 v5, v1, 0xaf123456, s3 +// GFX10: encoding: [0x05,0x00,0x78,0xd7,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf] + v_permlanex16_b32 v5, v1, s2, s103 // GFX10: encoding: [0x05,0x00,0x78,0xd7,0x01,0x05,0x9c,0x01] @@ -12956,6 +12965,9 @@ v_permlanex16_b32 v5, v1, s2, -4.0 // GFX10: encoding: [0x05,0x00,0x78,0xd7,0x01,0x05,0xdc,0x03] +v_permlanex16_b32 v5, v1, s2, 0xaf123456 +// GFX10: encoding: [0x05,0x00,0x78,0xd7,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf] + v_permlanex16_b32 v5, v1, s2, s3 op_sel:[1,0] // GFX10: encoding: [0x05,0x08,0x78,0xd7,0x01,0x05,0x0c,0x00] Index: llvm/test/MC/AMDGPU/gfx11_asm_vop3.s =================================================================== --- llvm/test/MC/AMDGPU/gfx11_asm_vop3.s +++ llvm/test/MC/AMDGPU/gfx11_asm_vop3.s @@ -5287,6 +5287,12 @@ v_permlane16_b32 v255, v255, src_scc, exec_hi op_sel:[0,1] // GFX11: encoding: [0xff,0x10,0x5b,0xd6,0xff,0xfb,0xfd,0x01] +v_permlane16_b32 v5, v1, 0xaf123456, s3 +// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf] + +v_permlane16_b32 v5, v1, s2, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf] + v_permlanex16_b32 v5, v1, s2, s3 // GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0x05,0x0c,0x00] @@ -5323,6 +5329,12 @@ v_permlanex16_b32 v255, v255, src_scc, exec_hi op_sel:[0,1] // GFX11: encoding: [0xff,0x10,0x5c,0xd6,0xff,0xfb,0xfd,0x01] +v_permlanex16_b32 v5, v1, 0xaf123456, s3 +// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf] + +v_permlanex16_b32 v5, v1, s2, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf] + v_qsad_pk_u16_u8 v[5:6], v[1:2], v2, ttmp[14:15] // GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x01,0x05,0xea,0x01] Index: llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt =================================================================== --- llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt +++ llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt @@ -16044,6 +16044,9 @@ # GFX10: v_permlane16_b32 v5, v1, s103, s3 ; encoding: [0x05,0x00,0x77,0xd7,0x01,0xcf,0x0c,0x00] 0x05,0x00,0x77,0xd7,0x01,0xcf,0x0c,0x00 +# GFX10: v_permlane16_b32 v5, v1, 0xaf123456, s3 ; encoding: [0x05,0x00,0x77,0xd7,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x77,0xd7,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf + # GFX10: v_permlane16_b32 v5, v1, s2, -1 ; encoding: [0x05,0x00,0x77,0xd7,0x01,0x05,0x04,0x03] 0x05,0x00,0x77,0xd7,0x01,0x05,0x04,0x03 @@ -16071,6 +16074,9 @@ # GFX10: v_permlane16_b32 v5, v1, s2, s3 ; encoding: [0x05,0x00,0x77,0xd7,0x01,0x05,0x0c,0x00] 0x05,0x00,0x77,0xd7,0x01,0x05,0x0c,0x00 +# GFX10: v_permlane16_b32 v5, v1, s2, 0xaf123456 ; encoding: [0x05,0x00,0x77,0xd7,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf] +0x05,0x00,0x77,0xd7,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf + # GFX10: v_permlane16_b32 v5, v1, s2, s3 op_sel:[0,1] ; encoding: [0x05,0x10,0x77,0xd7,0x01,0x05,0x0c,0x00] 0x05,0x10,0x77,0xd7,0x01,0x05,0x0c,0x00 @@ -16149,6 +16155,9 @@ # GFX10: v_permlanex16_b32 v5, v1, s103, s3 ; encoding: [0x05,0x00,0x78,0xd7,0x01,0xcf,0x0c,0x00] 0x05,0x00,0x78,0xd7,0x01,0xcf,0x0c,0x00 +# GFX10: v_permlanex16_b32 v5, v1, 0xaf123456, s3 ; encoding: [0x05,0x00,0x78,0xd7,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x78,0xd7,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf + # GFX10: v_permlanex16_b32 v5, v1, s2, -1 ; encoding: [0x05,0x00,0x78,0xd7,0x01,0x05,0x04,0x03] 0x05,0x00,0x78,0xd7,0x01,0x05,0x04,0x03 @@ -16176,6 +16185,9 @@ # GFX10: v_permlanex16_b32 v5, v1, s2, s3 ; encoding: [0x05,0x00,0x78,0xd7,0x01,0x05,0x0c,0x00] 0x05,0x00,0x78,0xd7,0x01,0x05,0x0c,0x00 +# GFX10: v_permlanex16_b32 v5, v1, s2, 0xaf123456 ; encoding: [0x05,0x00,0x78,0xd7,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf] +0x05,0x00,0x78,0xd7,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf + # GFX10: v_permlanex16_b32 v5, v1, s2, s3 op_sel:[0,1] ; encoding: [0x05,0x10,0x78,0xd7,0x01,0x05,0x0c,0x00] 0x05,0x10,0x78,0xd7,0x01,0x05,0x0c,0x00