Index: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td +++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td @@ -1475,6 +1475,19 @@ let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1); } +// Read in from vcc or arbitrary SGPR +def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> { + let Src0RC32 = VCSrc_32; // See comment in def VOP2b_I32_I1_I32_I32_I1 above. + let Asm32 = "$vdst, $src0, $src1, vcc"; + let Asm64 = "$vdst, $src0, $src1, $src2"; + let Outs32 = (outs DstRC:$vdst); + let Outs64 = (outs DstRC:$vdst); + + // Suppress src2 implied by type since the 32-bit encoding uses an + // implicit VCC use. + let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1); +} + class VOP3b_Profile : VOPProfile<[vt, vt, vt, vt]> { let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst); let Asm64 = "$vdst, $sdst, $src0_modifiers, $src1_modifiers, $src2_modifiers"#"$clamp"#"$omod"; @@ -1516,11 +1529,6 @@ def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>; def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>; def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>; -def VOP_CNDMASK : VOPProfile <[i32, i32, i32, untyped]> { - let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1); - let Ins64 = (ins Src0RC64:$src0, Src1RC64:$src1, SSrc_64:$src2); - let Asm64 = "$vdst, $src0, $src1, $src2"; -} def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>; def VOP_MADAK : VOPProfile <[f32, f32, f32, f32]> { @@ -1863,6 +1871,26 @@ let DisableDecoder = DisableVIDecoder; } +class VOP3e_Real_si op, dag outs, dag ins, string asm, string opName, + bit HasMods = 0, bit VOP3Only = 0> : + VOP3Common , + VOP3e , + SIMCInstr { + let AssemblerPredicates = [isSICI]; + let DecoderNamespace = "SICI"; + let DisableDecoder = DisableSIDecoder; +} + +class VOP3e_Real_vi op, dag outs, dag ins, string asm, string opName, + bit HasMods = 0, bit VOP3Only = 0> : + VOP3Common , + VOP3e_vi , + SIMCInstr { + let AssemblerPredicates = [isVI]; + let DecoderNamespace = "VI"; + let DisableDecoder = DisableVIDecoder; +} + multiclass VOP3_m pattern, string opName, int NumSrcArgs, bit HasMods = 1, bit VOP3Only = 0> { @@ -1941,6 +1969,19 @@ VOP3DisableFields<1, useSrc2Input, HasMods>; } +// Same as VOP3b_2_3_m but no 2nd destination (sdst), e.g. v_cndmask_b32. +multiclass VOP3e_2_3_m pattern, string opName, string revOp, + bit HasMods = 1, bit useSrc2Input = 0, bit VOP3Only = 0> { + def "" : VOP3_Pseudo ; + + def _si : VOP3e_Real_si , + VOP3DisableFields<1, useSrc2Input, HasMods>; + + def _vi : VOP3e_Real_vi , + VOP3DisableFields<1, useSrc2Input, HasMods>; +} + multiclass VOP3_C_m pattern, string opName, bit HasMods, bit defExec, @@ -2067,6 +2108,33 @@ opName, revOp, P.HasModifiers>; } +multiclass VOP2e_Helper pat32, list pat64, + string revOp, bit useSGPRInput> { + + let SchedRW = [Write32Bit, WriteSALU] in { + let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in { + defm _e32 : VOP2_m ; + } + + defm _e64 : VOP3e_2_3_m ; + } +} + +multiclass VOP2eInst : VOP2e_Helper < + op, opName, P, [], + !if(P.HasModifiers, + [(set P.DstVT:$vdst, + (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, + i1:$clamp, i32:$omod)), + (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))], + [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]), + revOp, !eq(P.NumSrcArgs, 3) +>; + multiclass VOP2b_Helper pat32, list pat64, string revOp, bit useSGPRInput> { Index: llvm/trunk/lib/Target/AMDGPU/SIInstructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstructions.td +++ llvm/trunk/lib/Target/AMDGPU/SIInstructions.td @@ -1473,15 +1473,9 @@ // VOP2 Instructions //===----------------------------------------------------------------------===// -multiclass V_CNDMASK { - defm _e32 : VOP2_m ; - - defm _e64 : VOP3_m < - op, VOP_CNDMASK.Outs, VOP_CNDMASK.Ins64, - name#!cast(VOP_CNDMASK.Asm64), [], name, 3, 0>; -} - -defm V_CNDMASK_B32 : V_CNDMASK, "v_cndmask_b32">; +defm V_CNDMASK_B32 : VOP2eInst , "v_cndmask_b32", + VOP2e_I32_I32_I32_I1 +>; let isCommutable = 1 in { defm V_ADD_F32 : VOP2Inst , "v_add_f32", Index: llvm/trunk/test/CodeGen/AMDGPU/fceil64.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/fceil64.ll +++ llvm/trunk/test/CodeGen/AMDGPU/fceil64.ll @@ -25,8 +25,7 @@ ; SI-DAG: cndmask_b32 ; SI-DAG: v_cmp_lt_f64 ; SI-DAG: v_cmp_lg_f64 -; SI-DAG: s_and_b64 -; SI: v_cndmask_b32 +; SI-DAG: v_cndmask_b32 ; SI: v_cndmask_b32 ; SI: v_add_f64 ; SI: s_endpgm Index: llvm/trunk/test/CodeGen/AMDGPU/sint_to_fp.i64.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/sint_to_fp.i64.ll +++ llvm/trunk/test/CodeGen/AMDGPU/sint_to_fp.i64.ll @@ -25,7 +25,7 @@ ; GCN-DAG: v_cmp_lt_u64 ; GCN: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}} -; GCN: v_cndmask_b32_e32 [[SIGN_SEL:v[0-9]+]], +; GCN: v_cndmask_b32_e{{32|64}} [[SIGN_SEL:v[0-9]+]], ; GCN: {{buffer|flat}}_store_dword {{.*}}[[SIGN_SEL]] define void @v_sint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() Index: llvm/trunk/test/MC/AMDGPU/vop2-err.s =================================================================== --- llvm/trunk/test/MC/AMDGPU/vop2-err.s +++ llvm/trunk/test/MC/AMDGPU/vop2-err.s @@ -8,6 +8,9 @@ v_mul_i32_i24 v1, v2, 100 // CHECK: error: invalid operand for instruction +v_cndmask_b32 v1, v2, v3 +// CHECK: error: too few operands for instruction + //===----------------------------------------------------------------------===// // _e32 checks //===----------------------------------------------------------------------===// @@ -20,6 +23,9 @@ v_mul_i32_i24_e32 v1, v2, s3 // CHECK: error: invalid operand for instruction +v_cndmask_b32_e32 v1, v2, v3, s[0:1] +// CHECK: error: invalid operand for instruction + //===----------------------------------------------------------------------===// // _e64 checks //===----------------------------------------------------------------------===// Index: llvm/trunk/test/MC/AMDGPU/vop2.s =================================================================== --- llvm/trunk/test/MC/AMDGPU/vop2.s +++ llvm/trunk/test/MC/AMDGPU/vop2.s @@ -98,8 +98,11 @@ // Instructions //===----------------------------------------------------------------------===// -// GCN: v_cndmask_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x00] -v_cndmask_b32 v1, v2, v3 +// GCN: v_cndmask_b32_e32 v1, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x00] +v_cndmask_b32 v1, v2, v3, vcc + +// GCN: v_cndmask_b32_e32 v1, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x00] +v_cndmask_b32_e32 v1, v2, v3, vcc // SICI: v_readlane_b32 s1, v2, s3 ; encoding: [0x02,0x07,0x02,0x02] // VI: v_readlane_b32 s1, v2, s3 ; encoding: [0x01,0x00,0x89,0xd2,0x02,0x07,0x00,0x00] Index: llvm/trunk/test/MC/AMDGPU/vop3.s =================================================================== --- llvm/trunk/test/MC/AMDGPU/vop3.s +++ llvm/trunk/test/MC/AMDGPU/vop3.s @@ -202,6 +202,14 @@ // SICI: v_cndmask_b32_e64 v1, v3, v5, s[4:5] ; encoding: [0x01,0x00,0x00,0xd2,0x03,0x0b,0x12,0x00] // VI: v_cndmask_b32_e64 v1, v3, v5, s[4:5] ; encoding: [0x01,0x00,0x00,0xd1,0x03,0x0b,0x12,0x00] +v_cndmask_b32_e64 v1, v3, v5, s[4:5] +// SICI: v_cndmask_b32_e64 v1, v3, v5, s[4:5] ; encoding: [0x01,0x00,0x00,0xd2,0x03,0x0b,0x12,0x00] +// VI: v_cndmask_b32_e64 v1, v3, v5, s[4:5] ; encoding: [0x01,0x00,0x00,0xd1,0x03,0x0b,0x12,0x00] + +v_cndmask_b32_e64 v1, v3, v5, vcc +// SICI: v_cndmask_b32_e64 v1, v3, v5, vcc ; encoding: [0x01,0x00,0x00,0xd2,0x03,0x0b,0xaa,0x01] +// VI: v_cndmask_b32_e64 v1, v3, v5, vcc ; encoding: [0x01,0x00,0x00,0xd1,0x03,0x0b,0xaa,0x01] + //TODO: readlane, writelane v_add_f32 v1, v3, s5 Index: llvm/trunk/test/MC/Disassembler/AMDGPU/vop2_vi.txt =================================================================== --- llvm/trunk/test/MC/Disassembler/AMDGPU/vop2_vi.txt +++ llvm/trunk/test/MC/Disassembler/AMDGPU/vop2_vi.txt @@ -1,5 +1,8 @@ # RUN: llvm-mc -arch=amdgcn -mcpu=tonga -disassemble -show-encoding < %s | FileCheck %s -check-prefix=VI +# VI: v_cndmask_b32_e32 v1, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x00] +0x02 0x07 0x02 0x00 + # VI: v_readlane_b32 s1, v2, s3 ; encoding: [0x01,0x00,0x89,0xd2,0x02,0x07,0x00,0x00] 0x01 0x00 0x89 0xd2 0x02 0x07 0x00 0x00 Index: llvm/trunk/test/MC/Disassembler/AMDGPU/vop3_vi.txt =================================================================== --- llvm/trunk/test/MC/Disassembler/AMDGPU/vop3_vi.txt +++ llvm/trunk/test/MC/Disassembler/AMDGPU/vop3_vi.txt @@ -111,6 +111,9 @@ # VI: v_cndmask_b32_e64 v1, v3, v5, s[4:5] ; encoding: [0x01,0x00,0x00,0xd1,0x03,0x0b,0x12,0x00] 0x01 0x00 0x00 0xd1 0x03 0x0b 0x12 0x00 +# VI: v_cndmask_b32_e64 v1, v3, v5, vcc ; encoding: [0x01,0x00,0x00,0xd1,0x03,0x0b,0xaa,0x01] +0x01 0x00 0x00 0xd1 0x03 0x0b 0xaa 0x01 + # VI: v_add_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x01,0xd1,0x03,0x0b,0x00,0x00] 0x01 0x00 0x01 0xd1 0x03 0x0b 0x00 0x00