Index: llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -278,7 +278,6 @@ void SelectAddcSubb(SDNode *N); void SelectUADDO_USUBO(SDNode *N); void SelectDIV_SCALE(SDNode *N); - void SelectDIV_FMAS(SDNode *N); void SelectMAD_64_32(SDNode *N); void SelectFMA_W_CHAIN(SDNode *N); void SelectFMUL_W_CHAIN(SDNode *N); @@ -871,10 +870,6 @@ SelectDIV_SCALE(N); return; } - case AMDGPUISD::DIV_FMAS: { - SelectDIV_FMAS(N); - return; - } case AMDGPUISD::MAD_I64_I32: case AMDGPUISD::MAD_U64_U32: { SelectMAD_64_32(N); @@ -1128,35 +1123,6 @@ CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); } -void AMDGPUDAGToDAGISel::SelectDIV_FMAS(SDNode *N) { - const GCNSubtarget *ST = static_cast(Subtarget); - const SIRegisterInfo *TRI = ST->getRegisterInfo(); - - SDLoc SL(N); - EVT VT = N->getValueType(0); - - assert(VT == MVT::f32 || VT == MVT::f64); - - unsigned Opc - = (VT == MVT::f64) ? AMDGPU::V_DIV_FMAS_F64 : AMDGPU::V_DIV_FMAS_F32; - - SDValue CarryIn = N->getOperand(3); - // V_DIV_FMAS implicitly reads VCC. - SDValue VCC = CurDAG->getCopyToReg(CurDAG->getEntryNode(), SL, - TRI->getVCC(), CarryIn, SDValue()); - - SDValue Ops[10]; - - SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]); - SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]); - SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]); - - Ops[8] = VCC; - Ops[9] = VCC.getValue(1); - - CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); -} - // We need to handle this here because tablegen doesn't support matching // instructions with multiple outputs. void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) { Index: llvm/lib/Target/AMDGPU/VOP3Instructions.td =================================================================== --- llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -716,6 +716,24 @@ V_PERMLANEX16_B32>; } // End SubtargetPredicate = isGFX10Plus +class DivFmasPat : GCNPat< + (AMDGPUdiv_fmas (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)), + (VOP3Mods vt:$src1, i32:$src1_modifiers), + (VOP3Mods vt:$src2, i32:$src2_modifiers), + (i1 CondReg)), + (inst $src0_modifiers, $src0, $src1_modifiers, $src1, $src2_modifiers, $src2) +>; + +let WaveSizePredicate = isWave64 in { +def : DivFmasPat; +def : DivFmasPat; +} + +let WaveSizePredicate = isWave32 in { +def : DivFmasPat; +def : DivFmasPat; +} + //===----------------------------------------------------------------------===// // Integer Clamp Patterns //===----------------------------------------------------------------------===//