Index: llvm/trunk/lib/Target/AMDGPU/AMDGPU.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPU.td +++ llvm/trunk/lib/Target/AMDGPU/AMDGPU.td @@ -262,8 +262,8 @@ "Support v_mac_f32/f16 with SDWA (Sub-DWORD Addressing) extension" >; -def FeatureSDWAClampVOPC : SubtargetFeature<"sdwa-clamp-vopc", - "HasSDWAClampVOPC", +def FeatureSDWAOutModsVOPC : SubtargetFeature<"sdwa-out-mods-vopc", + "HasSDWAOutModsVOPC", "true", "Support clamp for VOPC with SDWA (Sub-DWORD Addressing) extension" >; @@ -452,7 +452,7 @@ FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts, FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel, FeatureScalarStores, FeatureInv2PiInlineImm, - FeatureSDWA, FeatureSDWAClampVOPC, FeatureSDWAMac, FeatureDPP + FeatureSDWA, FeatureSDWAOutModsVOPC, FeatureSDWAMac, FeatureDPP ] >; Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -153,7 +153,7 @@ bool HasSDWAScalar; bool HasSDWASdst; bool HasSDWAMac; - bool HasSDWAClampVOPC; + bool HasSDWAOutModsVOPC; bool HasDPP; bool FlatAddressSpace; bool FlatInstOffsets; @@ -452,8 +452,8 @@ return HasSDWAMac; } - bool hasSDWAClampVOPC() const { - return HasSDWAClampVOPC; + bool hasSDWAOutModsVOPC() const { + return HasSDWAOutModsVOPC; } /// \brief Returns the offset in bytes from the start of the input buffer Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -128,7 +128,7 @@ HasSDWAScalar(false), HasSDWASdst(false), HasSDWAMac(false), - HasSDWAClampVOPC(false), + HasSDWAOutModsVOPC(false), HasDPP(false), FlatAddressSpace(false), FlatInstOffsets(false), Index: llvm/trunk/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ llvm/trunk/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -626,7 +626,9 @@ using namespace AMDGPU::SDWA; if (STI.getFeatureBits()[AMDGPU::FeatureGFX9]) { - if (SDWA9EncValues::SRC_VGPR_MIN <= Val && + // XXX: static_cast is needed to avoid stupid warning: + // compare with unsigned is always true + if (SDWA9EncValues::SRC_VGPR_MIN <= static_cast(Val) && Val <= SDWA9EncValues::SRC_VGPR_MAX) { return createRegOperand(getVgprClassId(Width), Val - SDWA9EncValues::SRC_VGPR_MIN); Index: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2444,8 +2444,6 @@ } int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); - if ( DstIdx == -1) - DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::sdst); const int OpIndicies[] = { DstIdx, Src0Idx, Src1Idx, Src2Idx }; @@ -2488,14 +2486,20 @@ ErrInfo = "Only VCC allowed as dst in SDWA instructions on VI"; return false; } - } else if (!ST.hasSDWAClampVOPC()) { + } else if (!ST.hasSDWAOutModsVOPC()) { // No clamp allowed on GFX9 for VOPC const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp); - if (Clamp != nullptr && - (!Clamp->isImm() || Clamp->getImm() != 0)) { + if (Clamp && (!Clamp->isImm() || Clamp->getImm() != 0)) { ErrInfo = "Clamp not allowed in VOPC SDWA instructions on VI"; return false; } + + // No omod allowed on GFX9 for VOPC + const MachineOperand *OMod = getNamedOperand(MI, AMDGPU::OpName::omod); + if (OMod && (!OMod->isImm() || OMod->getImm() != 0)) { + ErrInfo = "OMod not allowed in VOPC SDWA instructions on VI"; + return false; + } } } } Index: llvm/trunk/lib/Target/AMDGPU/SIPeepholeSDWA.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIPeepholeSDWA.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIPeepholeSDWA.cpp @@ -627,10 +627,13 @@ return false; } - if (!ST.hasSDWAClampVOPC() && TII->hasModifiersSet(MI, AMDGPU::OpName::clamp)) + if (!ST.hasSDWAOutModsVOPC() && + (TII->hasModifiersSet(MI, AMDGPU::OpName::clamp) || + TII->hasModifiersSet(MI, AMDGPU::OpName::omod))) return false; - } else if (TII->getNamedOperand(MI, AMDGPU::OpName::sdst)) { + } else if (TII->getNamedOperand(MI, AMDGPU::OpName::sdst) || + !TII->getNamedOperand(MI, AMDGPU::OpName::vdst)) { return false; } @@ -649,25 +652,24 @@ SDWAOpcode = AMDGPU::getSDWAOp(AMDGPU::getVOPe32(MI.getOpcode())); assert(SDWAOpcode != -1); - // Copy dst, if it is present in original then should also be present in SDWA - MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); - if (!Dst && !TII->isVOPC(MI)) - return false; - const MCInstrDesc &SDWADesc = TII->get(SDWAOpcode); // Create SDWA version of instruction MI and initialize its operands MachineInstrBuilder SDWAInst = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), SDWADesc); + // Copy dst, if it is present in original then should also be present in SDWA + MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); if (Dst) { assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::vdst) != -1); SDWAInst.add(*Dst); - } else { - Dst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst); + } else if ((Dst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst))) { assert(Dst && AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::sdst) != -1); SDWAInst.add(*Dst); + } else { + assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::sdst) != -1); + SDWAInst.addReg(AMDGPU::VCC, RegState::Define); } // Copy src0, initialize src0_modifiers. All sdwa instructions has src0 and @@ -714,20 +716,22 @@ } // Copy omod if present, initialize otherwise if needed - MachineOperand *OMod = TII->getNamedOperand(MI, AMDGPU::OpName::omod); - if (OMod) { - assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::omod) != -1); - SDWAInst.add(*OMod); - } else if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::omod) != -1) { - SDWAInst.addImm(0); + if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::omod) != -1) { + MachineOperand *OMod = TII->getNamedOperand(MI, AMDGPU::OpName::omod); + if (OMod) { + SDWAInst.add(*OMod); + } else { + SDWAInst.addImm(0); + } } - // Initialize dst_sel and dst_unused if present - if (Dst) { - assert( - AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_sel) != -1 && - AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_unused) != -1); + // Initialize dst_sel if present + if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_sel) != -1) { SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD); + } + + // Initialize dst_unused if present + if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_unused) != -1) { SDWAInst.addImm(AMDGPU::SDWA::DstUnused::UNUSED_PAD); } Index: llvm/trunk/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir +++ llvm/trunk/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir @@ -0,0 +1,446 @@ +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=si-peephole-sdwa -verify-machineinstrs -o - %s | FileCheck -check-prefix=VI -check-prefix=GFX89 -check-prefix=GCN %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=si-peephole-sdwa -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 -check-prefix=GFX89 -check-prefix=GCN %s + +# GFX89-LABEL: {{^}}name: vop1_instructions + +# GFX89: %{{[0-9]+}} = V_MOV_B32_sdwa 0, %{{[0-9]+}}, 0, 5, 0, 5, implicit %exec +# GFX89: %{{[0-9]+}} = V_FRACT_F32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, implicit %exec +# GFX89: %{{[0-9]+}} = V_SIN_F32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, implicit %exec +# GFX89: %{{[0-9]+}} = V_CVT_U32_F32_sdwa 0, %{{[0-9]+}}, 0, 5, 0, 5, implicit %exec +# GFX89: %{{[0-9]+}} = V_CVT_F32_I32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, implicit %exec + + +# GFX89: %{{[0-9]+}} = V_MOV_B32_sdwa 0, %{{[0-9]+}}, 0, 6, 0, 5, implicit %exec +# GFX89: %{{[0-9]+}} = V_FRACT_F32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, implicit %exec +# GFX89: %{{[0-9]+}} = V_SIN_F32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, implicit %exec +# GFX89: %{{[0-9]+}} = V_CVT_U32_F32_sdwa 0, %{{[0-9]+}}, 0, 5, 0, 5, implicit %exec +# GFX89: %{{[0-9]+}} = V_CVT_F32_I32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, implicit %exec + + +# VI: %{{[0-9]+}} = V_FRACT_F32_sdwa 1, %{{[0-9]+}}, 0, 0, 5, 0, 5, implicit %exec +# VI: %{{[0-9]+}} = V_SIN_F32_sdwa 0, %{{[0-9]+}}, 1, 0, 5, 0, 5, implicit %exec +# VI: %{{[0-9]+}} = V_CVT_U32_F32_sdwa 1, %{{[0-9]+}}, 0, 5, 0, 5, implicit %exec +# VI: %{{[0-9]+}} = V_CVT_F32_I32_e64 %{{[0-9]+}}, 0, 1, implicit %exec + +# GFX9: %{{[0-9]+}} = V_FRACT_F32_sdwa 1, %{{[0-9]+}}, 0, 0, 5, 0, 5, implicit %exec +# GFX9: %{{[0-9]+}} = V_SIN_F32_sdwa 0, %{{[0-9]+}}, 1, 0, 5, 0, 5, implicit %exec +# GFX9: %{{[0-9]+}} = V_CVT_U32_F32_sdwa 1, %{{[0-9]+}}, 0, 5, 0, 5, implicit %exec +# GFX9: %{{[0-9]+}} = V_CVT_F32_I32_sdwa 0, %{{[0-9]+}}, 0, 1, 5, 0, 5, implicit %exec + + +--- +name: vop1_instructions +tracksRegLiveness: true +registers: + - { id: 0, class: vreg_64 } + - { id: 1, class: vreg_64 } + - { id: 2, class: sreg_64 } + - { id: 3, class: vgpr_32 } + - { id: 4, class: sreg_32_xm0 } + - { id: 5, class: sreg_32_xm0 } + - { id: 6, class: sreg_32_xm0 } + - { id: 7, class: sreg_32_xm0 } + - { id: 8, class: sreg_32 } + - { id: 9, class: vgpr_32 } + - { id: 10, class: vgpr_32 } + - { id: 11, class: vgpr_32 } + - { id: 12, class: vgpr_32 } + - { id: 13, class: vgpr_32 } + - { id: 14, class: vgpr_32 } + - { id: 15, class: vgpr_32 } + - { id: 16, class: vgpr_32 } + - { id: 17, class: vgpr_32 } + - { id: 18, class: vgpr_32 } + - { id: 19, class: vgpr_32 } + - { id: 20, class: vgpr_32 } + - { id: 21, class: vgpr_32 } + - { id: 22, class: vgpr_32 } + - { id: 23, class: vgpr_32 } + - { id: 24, class: vgpr_32 } + - { id: 25, class: vgpr_32 } + - { id: 26, class: vgpr_32 } + - { id: 27, class: vgpr_32 } + - { id: 28, class: vgpr_32 } + - { id: 29, class: vgpr_32 } + - { id: 30, class: vgpr_32 } + - { id: 31, class: vgpr_32 } + - { id: 32, class: vgpr_32 } + - { id: 33, class: vgpr_32 } + - { id: 34, class: vgpr_32 } + - { id: 35, class: vgpr_32 } + - { id: 36, class: vgpr_32 } + - { id: 37, class: vgpr_32 } + - { id: 38, class: vgpr_32 } + - { id: 39, class: vgpr_32 } + - { id: 40, class: vgpr_32 } + - { id: 41, class: vgpr_32 } + - { id: 42, class: vgpr_32 } + - { id: 43, class: vgpr_32 } + - { id: 44, class: vgpr_32 } + - { id: 45, class: vgpr_32 } + - { id: 46, class: vgpr_32 } + - { id: 47, class: vgpr_32 } + - { id: 48, class: vgpr_32 } + - { id: 100, class: vgpr_32 } +body: | + bb.0: + liveins: %vgpr0_vgpr1, %vgpr2_vgpr3, %sgpr30_sgpr31 + + %2 = COPY %sgpr30_sgpr31 + %1 = COPY %vgpr2_vgpr3 + %0 = COPY %vgpr0_vgpr1 + %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4) + + %5 = S_MOV_B32 65535 + %6 = S_MOV_B32 65535 + + %10 = V_LSHRREV_B32_e64 16, %3, implicit %exec + %11 = V_MOV_B32_e32 %10, implicit %exec + %12 = V_LSHLREV_B32_e64 16, %11, implicit %exec + %14 = V_FRACT_F32_e32 123, implicit %exec + %15 = V_LSHLREV_B32_e64 16, %14, implicit %exec + %16 = V_LSHRREV_B32_e64 16, %15, implicit %exec + %17 = V_SIN_F32_e32 %16, implicit %exec + %18 = V_LSHLREV_B32_e64 16, %17, implicit %exec + %19 = V_LSHRREV_B32_e64 16, %18, implicit %exec + %20 = V_CVT_U32_F32_e32 %19, implicit %exec + %21 = V_LSHLREV_B32_e64 16, %20, implicit %exec + %23 = V_CVT_F32_I32_e32 123, implicit %exec + %24 = V_LSHLREV_B32_e64 16, %23, implicit %exec + + %25 = V_LSHRREV_B32_e64 16, %3, implicit %exec + %26 = V_MOV_B32_e64 %25, implicit %exec + %26 = V_LSHLREV_B32_e64 16, %26, implicit %exec + %27 = V_FRACT_F32_e64 0, %6, 0, 0, implicit %exec + %28 = V_LSHLREV_B32_e64 16, %27, implicit %exec + %29 = V_LSHRREV_B32_e64 16, %28, implicit %exec + %30 = V_SIN_F32_e64 0, %29, 0, 0, implicit %exec + %31 = V_LSHLREV_B32_e64 16, %30, implicit %exec + %32 = V_LSHRREV_B32_e64 16, %31, implicit %exec + %33 = V_CVT_U32_F32_e64 0, %32, 0, 0, implicit %exec + %34 = V_LSHLREV_B32_e64 16, %33, implicit %exec + %35 = V_CVT_F32_I32_e64 %6, 0, 0, implicit %exec + %36 = V_LSHLREV_B32_e64 16, %35, implicit %exec + + + %37 = V_LSHRREV_B32_e64 16, %36, implicit %exec + %38 = V_FRACT_F32_e64 1, %37, 0, 0, implicit %exec + %39 = V_LSHLREV_B32_e64 16, %38, implicit %exec + %40 = V_LSHRREV_B32_e64 16, %39, implicit %exec + %41 = V_SIN_F32_e64 0, %40, 1, 0, implicit %exec + %42 = V_LSHLREV_B32_e64 16, %41, implicit %exec + %43 = V_LSHRREV_B32_e64 16, %42, implicit %exec + %44 = V_CVT_U32_F32_e64 1, %43, 0, 0, implicit %exec + %45 = V_LSHLREV_B32_e64 16, %44, implicit %exec + %46 = V_LSHRREV_B32_e64 16, %45, implicit %exec + %47 = V_CVT_F32_I32_e64 %46, 0, 1, implicit %exec + %48 = V_LSHLREV_B32_e64 16, %47, implicit %exec + + + %100 = V_MOV_B32_e32 %48, implicit %exec + + FLAT_STORE_DWORD %0, %100, 0, 0, 0, implicit %exec, implicit %flat_scr :: (store 4) + %sgpr30_sgpr31 = COPY %2 + S_SETPC_B64_return %sgpr30_sgpr31 + +... +--- +# GCN-LABEL: {{^}}name: vop2_instructions + + +# VI: %{{[0-9]+}} = V_AND_B32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 0, 6, 5, implicit %exec +# VI: %{{[0-9]+}} = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit %exec +# VI: %{{[0-9]+}} = V_SUB_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 6, 0, 5, 1, implicit %exec +# VI: %{{[0-9]+}} = V_MAC_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, %{{[0-9]+}}, 0, 0, 6, 0, 6, 1, implicit %exec +# VI: %{{[0-9]+}} = V_MAC_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, %{{[0-9]+}}, 0, 0, 6, 0, 5, 1, implicit %exec + +# GFX9: %{{[0-9]+}} = V_AND_B32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 0, 6, 5, implicit %exec +# GFX9: %{{[0-9]+}} = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit %exec +# GFX9: %{{[0-9]+}} = V_SUB_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 6, 0, 5, 1, implicit %exec +# GFX9: %{{[0-9]+}} = V_MAC_F32_e32 %{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}}, implicit %exec +# GFX9: %{{[0-9]+}} = V_MAC_F16_e32 %{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}}, implicit %exec + + +# VI: %{{[0-9]+}} = V_AND_B32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 5, 0, 6, 5, implicit %exec +# VI: %{{[0-9]+}} = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit %exec +# VI: %{{[0-9]+}} = V_SUB_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, 1, implicit %exec +# VI: %{{[0-9]+}} = V_MAC_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, %{{[0-9]+}}, 0, 0, 6, 0, 6, 1, implicit %exec +# VI: %{{[0-9]+}} = V_MAC_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, %{{[0-9]+}}, 0, 0, 6, 0, 5, 1, implicit %exec + +# GFX9: %{{[0-9]+}} = V_AND_B32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 5, 0, 6, 5, implicit %exec +# GFX9: %{{[0-9]+}} = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit %exec +# GFX9: %{{[0-9]+}} = V_SUB_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, 1, implicit %exec +# GFX9: %{{[0-9]+}} = V_MAC_F32_e64 0, 23, 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, implicit %exec +# GFX9: %{{[0-9]+}} = V_MAC_F16_e64 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, implicit %exec + + +# VI: %{{[0-9]+}} = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit %exec +# VI: %{{[0-9]+}} = V_SUB_F16_sdwa 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 0, 5, 0, 6, 1, implicit %exec +# VI: %{{[0-9]+}} = V_MAC_F32_sdwa 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, %{{[0-9]+}}, 1, 0, 6, 0, 6, 1, implicit %exec +# VI: %{{[0-9]+}} = V_MAC_F16_e64 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 2, implicit %exec + +# GFX9: %{{[0-9]+}} = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit %exec +# GFX9: %{{[0-9]+}} = V_SUB_F16_sdwa 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 0, 5, 0, 6, 1, implicit %exec +# GFX9: %{{[0-9]+}} = V_MAC_F32_e64 1, 23, 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 1, 0, implicit %exec +# GFX9: %{{[0-9]+}} = V_MAC_F16_e64 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 2, implicit %exec + +name: vop2_instructions +tracksRegLiveness: true +registers: + - { id: 0, class: vreg_64 } + - { id: 1, class: vreg_64 } + - { id: 2, class: sreg_64 } + - { id: 3, class: vgpr_32 } + - { id: 4, class: sreg_32_xm0 } + - { id: 5, class: sreg_32_xm0 } + - { id: 6, class: sreg_32_xm0 } + - { id: 7, class: sreg_32_xm0 } + - { id: 8, class: sreg_32 } + - { id: 9, class: vgpr_32 } + - { id: 10, class: vgpr_32 } + - { id: 11, class: vgpr_32 } + - { id: 12, class: vgpr_32 } + - { id: 13, class: vgpr_32 } + - { id: 14, class: vgpr_32 } + - { id: 15, class: vgpr_32 } + - { id: 16, class: vgpr_32 } + - { id: 17, class: vgpr_32 } + - { id: 18, class: vgpr_32 } + - { id: 19, class: vgpr_32 } + - { id: 20, class: vgpr_32 } + - { id: 21, class: vgpr_32 } + - { id: 22, class: vgpr_32 } + - { id: 23, class: vgpr_32 } + - { id: 24, class: vgpr_32 } + - { id: 25, class: vgpr_32 } + - { id: 26, class: vgpr_32 } + - { id: 27, class: vgpr_32 } + - { id: 28, class: vgpr_32 } + - { id: 29, class: vgpr_32 } + - { id: 30, class: vgpr_32 } + - { id: 31, class: vgpr_32 } + - { id: 32, class: vgpr_32 } + - { id: 33, class: vgpr_32 } + - { id: 34, class: vgpr_32 } + - { id: 35, class: vgpr_32 } + - { id: 36, class: vgpr_32 } + - { id: 37, class: vgpr_32 } + - { id: 38, class: vgpr_32 } + - { id: 39, class: vgpr_32 } + - { id: 40, class: vgpr_32 } + - { id: 41, class: vgpr_32 } + - { id: 42, class: vgpr_32 } + - { id: 43, class: vgpr_32 } + - { id: 44, class: vgpr_32 } + - { id: 45, class: vgpr_32 } + - { id: 46, class: vgpr_32 } + - { id: 47, class: vgpr_32 } + - { id: 48, class: vgpr_32 } + - { id: 49, class: vgpr_32 } + - { id: 50, class: vgpr_32 } + - { id: 51, class: vgpr_32 } + - { id: 52, class: vgpr_32 } + - { id: 53, class: vgpr_32 } + - { id: 54, class: vgpr_32 } + - { id: 55, class: vgpr_32 } + - { id: 56, class: vgpr_32 } + - { id: 57, class: vgpr_32 } + - { id: 58, class: vgpr_32 } + - { id: 59, class: vgpr_32 } + - { id: 60, class: vgpr_32 } + - { id: 100, class: vgpr_32 } +body: | + bb.0: + liveins: %vgpr0_vgpr1, %vgpr2_vgpr3, %sgpr30_sgpr31 + + %2 = COPY %sgpr30_sgpr31 + %1 = COPY %vgpr2_vgpr3 + %0 = COPY %vgpr0_vgpr1 + %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4) + + %5 = S_MOV_B32 65535 + %6 = S_MOV_B32 65535 + + %11 = V_LSHRREV_B32_e64 16, %3, implicit %exec + %12 = V_AND_B32_e32 %6, %11, implicit %exec + %13 = V_LSHLREV_B32_e64 16, %12, implicit %exec + %14 = V_LSHRREV_B32_e64 16, %13, implicit %exec + %15 = V_BFE_U32 %13, 8, 8, implicit %exec + %16 = V_ADD_F32_e32 %14, %15, implicit %exec + %17 = V_LSHLREV_B32_e64 16, %16, implicit %exec + %18 = V_LSHRREV_B32_e64 16, %17, implicit %exec + %19 = V_BFE_U32 %17, 8, 8, implicit %exec + %20 = V_SUB_F16_e32 %18, %19, implicit %exec + %21 = V_LSHLREV_B32_e64 16, %20, implicit %exec + %22 = V_BFE_U32 %20, 8, 8, implicit %exec + %23 = V_MAC_F32_e32 %21, %22, %22, implicit %exec + %24 = V_LSHLREV_B32_e64 16, %23, implicit %exec + %25 = V_LSHRREV_B32_e64 16, %24, implicit %exec + %26 = V_BFE_U32 %24, 8, 8, implicit %exec + %27 = V_MAC_F16_e32 %25, %26, %26, implicit %exec + %28 = V_LSHLREV_B32_e64 16, %27, implicit %exec + + %29 = V_LSHRREV_B32_e64 16, %28, implicit %exec + %30 = V_AND_B32_e64 23, %29, implicit %exec + %31 = V_LSHLREV_B32_e64 16, %30, implicit %exec + %32 = V_LSHRREV_B32_e64 16, %31, implicit %exec + %33 = V_BFE_U32 %31, 8, 8, implicit %exec + %34 = V_ADD_F32_e64 0, %32, 0, %33, 0, 0, implicit %exec + %35 = V_LSHLREV_B32_e64 16, %34, implicit %exec + %37 = V_BFE_U32 %35, 8, 8, implicit %exec + %38 = V_SUB_F16_e64 0, 23, 0, %37, 0, 0, implicit %exec + %39 = V_LSHLREV_B32_e64 16, %38, implicit %exec + %40 = V_BFE_U32 %39, 8, 8, implicit %exec + %41 = V_MAC_F32_e64 0, 23, 0, %40, 0, %40, 0, 0, implicit %exec + %42 = V_LSHLREV_B32_e64 16, %41, implicit %exec + %43 = V_LSHRREV_B32_e64 16, %42, implicit %exec + %44 = V_BFE_U32 %42, 8, 8, implicit %exec + %45 = V_MAC_F16_e64 0, %43, 0, %44, 0, %44, 0, 0, implicit %exec + %46 = V_LSHLREV_B32_e64 16, %45, implicit %exec + + %47 = V_LSHRREV_B32_e64 16, %46, implicit %exec + %48 = V_BFE_U32 %46, 8, 8, implicit %exec + %49 = V_ADD_F32_e64 0, %47, 1, %48, 0, 0, implicit %exec + %50 = V_LSHLREV_B32_e64 16, %49, implicit %exec + %51 = V_BFE_U32 %50, 8, 8, implicit %exec + %52 = V_SUB_F16_e64 1, 23, 1, %51, 0, 0, implicit %exec + %53 = V_LSHLREV_B32_e64 16, %52, implicit %exec + %54 = V_BFE_U32 %53, 8, 8, implicit %exec + %55 = V_MAC_F32_e64 1, 23, 1, %54, 1, %54, 1, 0, implicit %exec + %56 = V_LSHLREV_B32_e64 16, %55, implicit %exec + %57 = V_LSHRREV_B32_e64 16, %56, implicit %exec + %58 = V_BFE_U32 %56, 8, 8, implicit %exec + %59 = V_MAC_F16_e64 1, %57, 1, %58, 1, %58, 0, 2, implicit %exec + %60 = V_LSHLREV_B32_e64 16, %59, implicit %exec + + %100 = V_MOV_B32_e32 %60, implicit %exec + + FLAT_STORE_DWORD %0, %100, 0, 0, 0, implicit %exec, implicit %flat_scr :: (store 4) + %sgpr30_sgpr31 = COPY %2 + S_SETPC_B64_return %sgpr30_sgpr31 + +... +--- + +# GCN-LABEL: {{^}}name: vopc_instructions + +# GFX89: %{{[0-9]+}} = V_MOV_B32_e32 123, implicit %exec +# GFX89: %vcc = V_CMP_EQ_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit %exec +# GFX89: %vcc = V_CMPX_GT_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec +# GFX89: %vcc = V_CMP_LT_I32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit %exec +# GFX89: %vcc = V_CMPX_EQ_I32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec + + +# VI: %vcc = V_CMP_EQ_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit %exec +# VI: %{{[0-9]+}} = V_CMPX_GT_F32_e64 0, 23, 0, killed %{{[0-9]+}}, 0, 0, implicit-def %exec, implicit %exec +# VI: %vcc = V_CMP_LT_I32_sdwa 0, %{{[0-9]+}}, 0, %3, 0, 6, 4, implicit-def %vcc, implicit %exec +# VI: %{{[0-9]+}} = V_CMPX_EQ_I32_e64 23, killed %{{[0-9]+}}, implicit-def %exec, implicit %exec + +# GFX9: %vcc = V_CMP_EQ_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit %exec +# GFX9: %{{[0-9]+}} = V_MOV_B32_e32 23, implicit %exec +# GFX9: %{{[0-9]+}} = V_CMPX_GT_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec +# GFX9: %vcc = V_CMP_LT_I32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit %exec +# GFX9: %{{[0-9]+}} = V_MOV_B32_e32 23, implicit %exec +# GFX9: %{{[0-9]+}} = V_CMPX_EQ_I32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec + + +# VI: %vcc = V_CMP_EQ_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 1, 6, 4, implicit-def %vcc, implicit %exec +# VI: %vcc = V_CMPX_GT_F32_e64 0, 23, 0, killed %{{[0-9]+}}, 0, 2, implicit-def %exec, implicit %exec +# VI: %vcc = V_CMP_EQ_F32_e64 0, %{{[0-9]+}}, 0, killed %{{[0-9]+}}, 1, 2, implicit %exec +# VI: %vcc = V_CMPX_GT_F32_sdwa 1, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec +# VI: %vcc = V_CMPX_GT_F32_sdwa 0, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec +# VI: %vcc = V_CMPX_GT_F32_sdwa 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec +# VI: %vcc = V_CMPX_GT_F32_e64 1, 23, 1, killed %{{[0-9]+}}, 1, 2, implicit-def %exec, implicit %exec + +# GFX9: %vcc = V_CMP_EQ_F32_e64 0, %{{[0-9]+}}, 0, killed %{{[0-9]+}}, 1, 0, implicit %exec +# GFX9: %vcc = V_CMPX_GT_F32_e64 0, 23, 0, killed %{{[0-9]+}}, 0, 2, implicit-def %exec, implicit %exec +# GFX9: %vcc = V_CMP_EQ_F32_e64 0, %{{[0-9]+}}, 0, killed %{{[0-9]+}}, 1, 2, implicit %exec +# GFX9: %vcc = V_CMPX_GT_F32_sdwa 1, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec +# GFX9: %vcc = V_CMPX_GT_F32_sdwa 0, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec +# GFX9: %vcc = V_CMPX_GT_F32_sdwa 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec +# GFX9: %vcc = V_CMPX_GT_F32_e64 1, 23, 1, killed %{{[0-9]+}}, 1, 2, implicit-def %exec, implicit %exec + + +name: vopc_instructions +tracksRegLiveness: true +registers: + - { id: 0, class: vreg_64 } + - { id: 1, class: vreg_64 } + - { id: 2, class: sreg_64 } + - { id: 3, class: vgpr_32 } + - { id: 4, class: sreg_32_xm0 } + - { id: 5, class: sreg_32_xm0 } + - { id: 6, class: sreg_32_xm0 } + - { id: 7, class: sreg_32_xm0 } + - { id: 8, class: sreg_32 } + - { id: 9, class: vgpr_32 } + - { id: 10, class: vgpr_32 } + - { id: 11, class: vgpr_32 } + - { id: 12, class: vgpr_32 } + - { id: 13, class: vgpr_32 } + - { id: 14, class: vgpr_32 } + - { id: 15, class: vgpr_32 } + - { id: 16, class: vgpr_32 } + - { id: 17, class: vgpr_32 } + - { id: 18, class: sreg_64 } + - { id: 19, class: sreg_64 } + - { id: 20, class: vgpr_32 } + - { id: 21, class: vgpr_32 } + - { id: 22, class: vgpr_32 } + - { id: 23, class: vgpr_32 } + - { id: 24, class: vgpr_32 } + - { id: 25, class: vgpr_32 } + - { id: 26, class: vgpr_32 } + - { id: 27, class: vgpr_32 } + - { id: 100, class: vgpr_32 } +body: | + bb.0: + liveins: %vgpr0_vgpr1, %vgpr2_vgpr3, %sgpr30_sgpr31 + + %2 = COPY %sgpr30_sgpr31 + %1 = COPY %vgpr2_vgpr3 + %0 = COPY %vgpr0_vgpr1 + %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4) + + %5 = S_MOV_B32 65535 + %6 = S_MOV_B32 65535 + + %10 = V_AND_B32_e64 %5, %3, implicit %exec + V_CMP_EQ_F32_e32 123, killed %10, implicit-def %vcc, implicit %exec + %11 = V_AND_B32_e64 %5, %3, implicit %exec + V_CMPX_GT_F32_e32 123, killed %11, implicit-def %vcc, implicit-def %exec, implicit %exec + %12 = V_AND_B32_e64 %5, %3, implicit %exec + V_CMP_LT_I32_e32 123, killed %12, implicit-def %vcc, implicit %exec + %13 = V_AND_B32_e64 %5, %3, implicit %exec + V_CMPX_EQ_I32_e32 123, killed %13, implicit-def %vcc, implicit-def %exec, implicit %exec + + %14 = V_AND_B32_e64 %5, %3, implicit %exec + %vcc = V_CMP_EQ_F32_e64 0, %6, 0, killed %14, 0, 0, implicit %exec + %15 = V_AND_B32_e64 %5, %3, implicit %exec + %18 = V_CMPX_GT_F32_e64 0, 23, 0, killed %15, 0, 0, implicit-def %exec, implicit %exec + %16 = V_AND_B32_e64 %5, %3, implicit %exec + %vcc = V_CMP_LT_I32_e64 %6, killed %16, implicit %exec + %17 = V_AND_B32_e64 %5, %3, implicit %exec + %19 = V_CMPX_EQ_I32_e64 23, killed %17, implicit-def %exec, implicit %exec + + %20 = V_AND_B32_e64 %5, %3, implicit %exec + %vcc = V_CMP_EQ_F32_e64 0, %6, 0, killed %20, 1, 0, implicit %exec + %21 = V_AND_B32_e64 %5, %3, implicit %exec + %vcc = V_CMPX_GT_F32_e64 0, 23, 0, killed %21, 0, 2, implicit-def %exec, implicit %exec + %23 = V_AND_B32_e64 %5, %3, implicit %exec + %vcc = V_CMP_EQ_F32_e64 0, %6, 0, killed %23, 1, 2, implicit %exec + %24 = V_AND_B32_e64 %5, %3, implicit %exec + %vcc = V_CMPX_GT_F32_e64 1, 23, 0, killed %24, 0, 0, implicit-def %exec, implicit %exec + %25 = V_AND_B32_e64 %5, %3, implicit %exec + %vcc = V_CMPX_GT_F32_e64 0, 23, 1, killed %25, 0, 0, implicit-def %exec, implicit %exec + %26 = V_AND_B32_e64 %5, %3, implicit %exec + %vcc = V_CMPX_GT_F32_e64 1, 23, 1, killed %26, 0, 0, implicit-def %exec, implicit %exec + %27 = V_AND_B32_e64 %5, %3, implicit %exec + %vcc = V_CMPX_GT_F32_e64 1, 23, 1, killed %27, 1, 2, implicit-def %exec, implicit %exec + + + %100 = V_MOV_B32_e32 %vcc_lo, implicit %exec + + FLAT_STORE_DWORD %0, %100, 0, 0, 0, implicit %exec, implicit %flat_scr :: (store 4) + %sgpr30_sgpr31 = COPY %2 + S_SETPC_B64_return %sgpr30_sgpr31 Index: llvm/trunk/test/CodeGen/AMDGPU/select-vectors.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/select-vectors.ll +++ llvm/trunk/test/CodeGen/AMDGPU/select-vectors.ll @@ -66,7 +66,7 @@ } ; GCN-LABEL: {{^}}select_v4i8: -; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32 ; GCN-NOT: cndmask define amdgpu_kernel void @select_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a, <4 x i8> %b, i8 %c) #0 { %cmp = icmp eq i8 %c, 0