Index: llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -301,6 +301,7 @@ void SelectATOMIC_CMP_SWAP(SDNode *N); void SelectDSAppendConsume(SDNode *N, unsigned IntrID); void SelectDS_GWS(SDNode *N, unsigned IntrID); + void SelectInterpP1F16(SDNode *N); void SelectINTRINSIC_W_CHAIN(SDNode *N); void SelectINTRINSIC_WO_CHAIN(SDNode *N); void SelectINTRINSIC_VOID(SDNode *N); @@ -2338,6 +2339,64 @@ CurDAG->setNodeMemRefs(cast(Selected), {MMO}); } +void AMDGPUDAGToDAGISel::SelectInterpP1F16(SDNode *N) { + if (Subtarget->getLDSBankCount() != 16) { + // This is a single instruction with a pattern. + SelectCode(N); + return; + } + + SDLoc DL(N); + + // This requires 2 instructions. It is possible to write a pattern to support + // this, but the generated isel emitter doesn't correctly deal with multiple + // output instructions using the same physical register input. The copy to m0 + // is incorrectly placed before the second instruction. + // + // TODO: Match source modifiers. + // + // def : Pat < + // (int_amdgcn_interp_p1_f16 + // (VOP3Mods f32:$src0, i32:$src0_modifiers), + // (i32 timm:$attrchan), (i32 timm:$attr), + // (i1 timm:$high), M0), + // (V_INTERP_P1LV_F16 $src0_modifiers, VGPR_32:$src0, timm:$attr, + // timm:$attrchan, 0, + // (V_INTERP_MOV_F32 2, timm:$attr, timm:$attrchan), timm:$high)> { + // let Predicates = [has16BankLDS]; + // } + + // 16 bank LDS + SDValue ToM0 = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, AMDGPU::M0, + N->getOperand(5), SDValue()); + + SDVTList VTs = CurDAG->getVTList(MVT::f32, MVT::Other); + + SDNode *InterpMov = + CurDAG->getMachineNode(AMDGPU::V_INTERP_MOV_F32, DL, VTs, { + CurDAG->getTargetConstant(2, DL, MVT::i32), // P0 + N->getOperand(3), // Attr + N->getOperand(2), // Attrchan + ToM0.getValue(1) // In glue + }); + + SDNode *InterpP1LV = + CurDAG->getMachineNode(AMDGPU::V_INTERP_P1LV_F16, DL, MVT::f32, { + CurDAG->getTargetConstant(0, DL, MVT::i32), // $src0_modifiers + N->getOperand(1), // Src0 + N->getOperand(3), // Attr + N->getOperand(2), // Attrchan + CurDAG->getTargetConstant(0, DL, MVT::i32), // $src2_modifiers + SDValue(InterpMov, 0), // Src2 - holds two f16 values selected by high + N->getOperand(4), // high + CurDAG->getTargetConstant(0, DL, MVT::i1), // $clamp + CurDAG->getTargetConstant(0, DL, MVT::i32), // $omod + SDValue(InterpMov, 1) + }); + + CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), SDValue(InterpP1LV, 0)); +} + void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) { unsigned IntrID = cast(N->getOperand(1))->getZExtValue(); switch (IntrID) { @@ -2366,6 +2425,9 @@ case Intrinsic::amdgcn_wwm: Opcode = AMDGPU::WWM; break; + case Intrinsic::amdgcn_interp_p1_f16: + SelectInterpP1F16(N); + return; default: SelectCode(N); return; Index: llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -474,9 +474,6 @@ BUILD_VERTICAL_VECTOR, /// Pointer to the start of the shader's constant data. CONST_DATA_PTR, - INTERP_P1LL_F16, - INTERP_P1LV_F16, - INTERP_P2_F16, PC_ADD_REL_OFFSET, LDS, KILL, Index: llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -4324,9 +4324,6 @@ NODE_NAME_CASE(KILL) NODE_NAME_CASE(DUMMY_CHAIN) case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break; - NODE_NAME_CASE(INTERP_P1LL_F16) - NODE_NAME_CASE(INTERP_P1LV_F16) - NODE_NAME_CASE(INTERP_P2_F16) NODE_NAME_CASE(LOAD_D16_HI) NODE_NAME_CASE(LOAD_D16_LO) NODE_NAME_CASE(LOAD_D16_HI_I8) Index: llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -329,18 +329,6 @@ def AMDGPUperm : SDNode<"AMDGPUISD::PERM", AMDGPUDTIntTernaryOp, []>; -def AMDGPUinterp_p1ll_f16 : SDNode<"AMDGPUISD::INTERP_P1LL_F16", - SDTypeProfile<1, 7, [SDTCisFP<0>]>, - [SDNPInGlue, SDNPOutGlue]>; - -def AMDGPUinterp_p1lv_f16 : SDNode<"AMDGPUISD::INTERP_P1LV_F16", - SDTypeProfile<1, 9, [SDTCisFP<0>]>, - [SDNPInGlue, SDNPOutGlue]>; - -def AMDGPUinterp_p2_f16 : SDNode<"AMDGPUISD::INTERP_P2_F16", - SDTypeProfile<1, 8, [SDTCisFP<0>]>, - [SDNPInGlue]>; - def AMDGPUkill : SDNode<"AMDGPUISD::KILL", AMDGPUKillSDT, [SDNPHasChain, SDNPSideEffect]>; Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -5906,38 +5906,6 @@ } case Intrinsic::amdgcn_fdiv_fast: return lowerFDIV_FAST(Op, DAG); - case Intrinsic::amdgcn_interp_p1_f16: { - if (getSubtarget()->getLDSBankCount() == 16) { - // 16 bank LDS - SDValue ToM0 = DAG.getCopyToReg(DAG.getEntryNode(), DL, AMDGPU::M0, - Op.getOperand(5), SDValue()); - - // FIXME: This implicitly will insert a second CopyToReg to M0. - SDValue S = DAG.getNode( - ISD::INTRINSIC_WO_CHAIN, DL, MVT::f32, - DAG.getTargetConstant(Intrinsic::amdgcn_interp_mov, DL, MVT::i32), - DAG.getTargetConstant(2, DL, MVT::i32), // P0 - Op.getOperand(2), // Attrchan - Op.getOperand(3), // Attr - Op.getOperand(5)); // m0 - - SDValue Ops[] = { - Op.getOperand(1), // Src0 - Op.getOperand(2), // Attrchan - Op.getOperand(3), // Attr - DAG.getTargetConstant(0, DL, MVT::i32), // $src0_modifiers - S, // Src2 - holds two f16 values selected by high - DAG.getTargetConstant(0, DL, MVT::i32), // $src2_modifiers - Op.getOperand(4), // high - DAG.getTargetConstant(0, DL, MVT::i1), // $clamp - DAG.getTargetConstant(0, DL, MVT::i32), // $omod - ToM0.getValue(1) - }; - return DAG.getNode(AMDGPUISD::INTERP_P1LV_F16, DL, MVT::f32, Ops); - } - - return SDValue(); - } case Intrinsic::amdgcn_sin: return DAG.getNode(AMDGPUISD::SIN_HW, DL, VT, Op.getOperand(1)); Index: llvm/lib/Target/AMDGPU/VOP3Instructions.td =================================================================== --- llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -491,15 +491,7 @@ } -def V_INTERP_P1LV_F16 : VOP3Interp <"v_interp_p1lv_f16", VOP3_INTERP16<[f32, f32, i32, f16]>, - [(set f32:$vdst, (AMDGPUinterp_p1lv_f16 f32:$src0, (i32 timm:$attrchan), - (i32 timm:$attr), - (i32 timm:$src0_modifiers), - (f32 VRegSrc_32:$src2), - (i32 timm:$src2_modifiers), - (i1 timm:$high), - (i1 timm:$clamp), - (i32 timm:$omod)))]>; +def V_INTERP_P1LV_F16 : VOP3Interp <"v_interp_p1lv_f16", VOP3_INTERP16<[f32, f32, i32, f16]>>; } // End Uses = [M0, EXEC], FPDPRounding = 1 } // End SubtargetPredicate = Has16BitInsts, isCommutable = 1