Index: lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -135,6 +135,7 @@ void SelectADD_SUB_I64(SDNode *N); void SelectDIV_SCALE(SDNode *N); + void SelectFMA(SDNode *N); SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val, uint32_t Offset, uint32_t Width); @@ -283,6 +284,11 @@ SelectADD_SUB_I64(N); return; } + case AMDGPUISD::FMA: { + SelectFMA(N); + break; + } + case ISD::SCALAR_TO_VECTOR: case AMDGPUISD::BUILD_VERTICAL_VECTOR: case ISD::BUILD_VECTOR: { @@ -640,6 +646,20 @@ CurDAG->RemoveDeadNode(N); } +void AMDGPUDAGToDAGISel::SelectFMA(SDNode *N) { + SDLoc SL(N); + // 0 1 2 3 4 5 6 7 + // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod + SDValue Ops[9]; + + SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]); + SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]); + SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]); + Ops[8] = N->getOperand(0); + + CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32, N->getVTList(), Ops); +} + // We need to handle this here because tablegen doesn't support matching // instructions with multiple outputs. void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { Index: lib/Target/AMDGPU/AMDGPUISelLowering.h =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.h +++ lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -222,6 +222,11 @@ // This is SETCC with the full mask result which is used for a compare with a // result bit per item in the wavefront. SETCC, + SETREG, + // This FMA has input and out chain + FMA, + //This MUL has input and output chain + FMUL, // SIN_HW, COS_HW - f32 for SI, 1 ULP max error, valid from -100 pi to 100 pi. // Denormals handled on some parts. Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2780,6 +2780,9 @@ NODE_NAME_CASE(DWORDADDR) NODE_NAME_CASE(FRACT) NODE_NAME_CASE(SETCC) + NODE_NAME_CASE(SETREG) + NODE_NAME_CASE(FMA) + NODE_NAME_CASE(FMUL) NODE_NAME_CASE(CLAMP) NODE_NAME_CASE(COS_HW) NODE_NAME_CASE(SIN_HW) Index: lib/Target/AMDGPU/AMDGPUInstrInfo.td =================================================================== --- lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -150,6 +150,19 @@ def AMDGPUsetcc : SDNode<"AMDGPUISD::SETCC", AMDGPUSetCCOp>; +def AMDGPUSetRegOp : SDTypeProfile<0, 2, [ + SDTCisSameAs<0, 1>, SDTCisInt<0> +]>; + +def AMDGPUsetreg : SDNode<"AMDGPUISD::SETREG", AMDGPUSetRegOp, [ + SDNPHasChain, SDNPSideEffect]>; + +def AMDGPUfma : SDNode<"AMDGPUISD::FMA", SDTFPTernaryOp, [ + SDNPHasChain]>; + +def AMDGPUmul : SDNode<"AMDGPUISD::FMUL", SDTFPBinOp, [ + SDNPHasChain]>; + def AMDGPUcvt_f32_ubyte0 : SDNode<"AMDGPUISD::CVT_F32_UBYTE0", SDTIntToFPOp, []>; def AMDGPUcvt_f32_ubyte1 : SDNode<"AMDGPUISD::CVT_F32_UBYTE1", Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -2751,7 +2751,7 @@ return DAG.getNode(ISD::FMUL, SL, MVT::f32, r3, Mul); } - +/* SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const { if (SDValue FastLowered = lowerFastUnsafeFDIV(Op, DAG)) return FastLowered; @@ -2767,25 +2767,76 @@ SDValue DenominatorScaled = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT, RHS, RHS, LHS); SDValue NumeratorScaled = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT, LHS, RHS, LHS); + SDValue CondDenominateor = DAG.getNode(AMDGPUISD::FP_CLASS, SL, MVT::i1, DenominatorScaled, DAG.getConstant(16, SL, MVT::i32)); + SDValue Denominator_new = DAG.getNode(ISD::SELECT, SL, MVT::f32, CondDenominateor, DAG.getConstantFP(0, SL, MVT::f32), DenominatorScaled); + + SDValue CondNumerator = DAG.getNode(AMDGPUISD::FP_CLASS, SL, MVT::i1, NumeratorScaled, DAG.getConstant(16, SL, MVT::i32)); + SDValue Numerator_new = DAG.getNode(ISD::SELECT, SL, MVT::f32, CondNumerator, DAG.getConstantFP(0, SL, MVT::f32), NumeratorScaled); + // Denominator is scaled to not be denormal, so using rcp is ok. - SDValue ApproxRcp = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, DenominatorScaled); + SDValue ApproxRcp = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, Denominator_new); - SDValue NegDivScale0 = DAG.getNode(ISD::FNEG, SL, MVT::f32, DenominatorScaled); + SDValue NegDivScale0 = DAG.getNode(ISD::FNEG, SL, MVT::f32, Denominator_new); SDValue Fma0 = DAG.getNode(ISD::FMA, SL, MVT::f32, NegDivScale0, ApproxRcp, One); SDValue Fma1 = DAG.getNode(ISD::FMA, SL, MVT::f32, Fma0, ApproxRcp, ApproxRcp); - SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f32, NumeratorScaled, Fma1); + SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f32, Numerator_new, Fma1); - SDValue Fma2 = DAG.getNode(ISD::FMA, SL, MVT::f32, NegDivScale0, Mul, NumeratorScaled); + SDValue Fma2 = DAG.getNode(ISD::FMA, SL, MVT::f32, NegDivScale0, Mul, Numerator_new); SDValue Fma3 = DAG.getNode(ISD::FMA, SL, MVT::f32, Fma2, Fma1, Mul); - SDValue Fma4 = DAG.getNode(ISD::FMA, SL, MVT::f32, NegDivScale0, Fma3, NumeratorScaled); + SDValue Fma4 = DAG.getNode(ISD::FMA, SL, MVT::f32, NegDivScale0, Fma3, Numerator_new); SDValue Scale = NumeratorScaled.getValue(1); SDValue Fmas = DAG.getNode(AMDGPUISD::DIV_FMAS, SL, MVT::f32, Fma4, Fma1, Fma3, Scale); return DAG.getNode(AMDGPUISD::DIV_FIXUP, SL, MVT::f32, Fmas, RHS, LHS); } +*/ +SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const { + if (SDValue FastLowered = lowerFastUnsafeFDIV(Op, DAG)) + return FastLowered; + + SDLoc SL(Op); + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + + const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f32); + + SDVTList ScaleVT = DAG.getVTList(MVT::f32, MVT::i1); + + SDValue DenominatorScaled = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT, RHS, RHS, LHS); + SDValue NumeratorScaled = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT, LHS, RHS, LHS); + + // Denominator is scaled to not be denormal, so using rcp is ok. + SDValue ApproxRcp = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, DenominatorScaled); + SDValue NegDivScale0 = DAG.getNode(ISD::FNEG, SL, MVT::f32, DenominatorScaled); + + const SDValue Index = DAG.getConstant(8, SL, MVT::i32); + const SDValue EnableDenormValue = DAG.getConstant(1008, SL, MVT::i32); + SDValue EnableDenorm = DAG.getNode(AMDGPUISD::SETREG, SL, MVT::Other, DAG.getEntryNode(), EnableDenormValue, Index); + + SDVTList FmaVT = DAG.getVTList(MVT::f32, MVT::Other); + SDValue Fma0 = DAG.getNode(AMDGPUISD::FMA, SL, FmaVT, EnableDenorm,NegDivScale0, ApproxRcp, One); + SDValue Fma1 = DAG.getNode(AMDGPUISD::FMA, SL, FmaVT, Fma0.getValue(1), Fma0.getValue(0), ApproxRcp, ApproxRcp); + + SDValue Mul = DAG.getNode(AMDGPUISD::FMUL, SL, FmaVT, Fma1.getValue(1), NumeratorScaled, Fma1.getValue(0)); + + SDValue Fma2 = DAG.getNode(AMDGPUISD::FMA, SL, FmaVT, Mul.getValue(1), NegDivScale0, Mul.getValue(0), NumeratorScaled); + SDValue Fma3 = DAG.getNode(AMDGPUISD::FMA, SL, FmaVT, Fma2.getValue(1), Fma2.getValue(0), Fma1.getValue(0), Mul.getValue(0)); + SDValue Fma4 = DAG.getNode(AMDGPUISD::FMA, SL, FmaVT, Fma3.getValue(1), NegDivScale0, Fma3.getValue(0), NumeratorScaled); + + const SDValue DisableDenormValue = DAG.getConstant(960, SL, MVT::i32); + SDValue DisableDenorm = DAG.getNode(AMDGPUISD::SETREG, SL, MVT::Other, Fma4.getValue(1), DisableDenormValue, Index); + SDValue OutputChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, DisableDenorm, DAG.getRoot()); + DAG.setRoot(OutputChain); + + SDValue Scale = NumeratorScaled.getValue(1); + SDValue Fmas = DAG.getNode(AMDGPUISD::DIV_FMAS, SL, MVT::f32, Fma4.getValue(0), Fma1.getValue(0), Fma3.getValue(0), Scale); + + return DAG.getNode(AMDGPUISD::DIV_FIXUP, SL, MVT::f32, Fmas, RHS, LHS); +} + SDValue SITargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const { if (DAG.getTarget().Options.UnsafeFPMath) Index: lib/Target/AMDGPU/SOPInstructions.td =================================================================== --- lib/Target/AMDGPU/SOPInstructions.td +++ lib/Target/AMDGPU/SOPInstructions.td @@ -590,11 +590,13 @@ >; } +let hasSideEffects = 1, isBarrier = 1 in { def S_SETREG_B32 : SOPK_Pseudo < "s_setreg_b32", (outs), (ins SReg_32:$sdst, hwreg:$simm16), "$simm16, $sdst" >; +} // FIXME: Not on SI? //def S_GETREG_REGRD_B32 : SOPK_32 , "s_getreg_regrd_b32">; @@ -872,6 +874,13 @@ >; //===----------------------------------------------------------------------===// +// S_SETREG_B32 Pattern. +//===----------------------------------------------------------------------===// +def : Pat < + (AMDGPUsetreg i32:$reg, i32:$simm16), + (S_SETREG_B32 $reg, (as_i16imm $simm16)) +>; +//===----------------------------------------------------------------------===// // SOP1 Patterns //===----------------------------------------------------------------------===// Index: lib/Target/AMDGPU/VOP3Instructions.td =================================================================== --- lib/Target/AMDGPU/VOP3Instructions.td +++ lib/Target/AMDGPU/VOP3Instructions.td @@ -219,9 +219,19 @@ def V_MAD_U16 : VOP3Inst <"v_mad_u16", VOP3_Profile>; def V_MAD_I16 : VOP3Inst <"v_mad_i16", VOP3_Profile>; } - } // End SubtargetPredicate = isVI +def : Pat < + (AMDGPUfma f32:$src0, f32:$src1, f32:$src2), + (V_FMA_F32 0, $src0, 0, $src1, 0, $src2, 0, 0) +>; + +def : Pat < + (AMDGPUmul (VOP3NoMods0 f32:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod), + (VOP3NoMods f32:$src1, i32:$src1_modifiers)), + (V_MUL_F32_e64 $src0_modifiers, $src0, $src1_modifiers, $src1, $clamp, $omod) +>; + //===----------------------------------------------------------------------===// // Target Index: test/CodeGen/AMDGPU/dump =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/dump @@ -0,0 +1,201 @@ + + + +=== fdiv_f32 +Initial selection DAG: BB#0 'fdiv_f32:entry' +SelectionDAG has 21 nodes: + t0: ch = EntryToken + t2: i64,ch = CopyFromReg t0, Register:i64 %vreg0 + t4: i64 = add t2, Constant:i64<36> + t6: i64,ch = load t0, t4, undef:i64 + t7: i64,ch = merge_values t6, t6:1 + t9: i64 = add t2, Constant:i64<44> + t10: f32,ch = load t0, t9, undef:i64 + t11: f32,ch = merge_values t10, t10:1 + t13: i64 = add t2, Constant:i64<48> + t14: f32,ch = load t0, t13, undef:i64 + t15: f32,ch = merge_values t14, t14:1 + t18: i64 = Constant<0> + t16: ch = TokenFactor t7:1, t11:1, t15:1 + t17: f32 = fdiv t11, t15 + t19: ch = store t16, t17, t7, undef:i64 + t20: ch = ENDPGM t19 + + +Optimized lowered selection DAG: BB#0 'fdiv_f32:entry' +SelectionDAG has 17 nodes: + t0: ch = EntryToken + t2: i64,ch = CopyFromReg t0, Register:i64 %vreg0 + t4: i64 = add t2, Constant:i64<36> + t6: i64,ch = load t0, t4, undef:i64 + t9: i64 = add t2, Constant:i64<44> + t10: f32,ch = load t0, t9, undef:i64 + t13: i64 = add t2, Constant:i64<48> + t14: f32,ch = load t0, t13, undef:i64 + t16: ch = TokenFactor t6:1, t10:1, t14:1 + t17: f32 = fdiv t10, t14 + t19: ch = store t16, t17, t6, undef:i64 + t20: ch = ENDPGM t19 + + +Type-legalized selection DAG: BB#0 'fdiv_f32:entry' +SelectionDAG has 17 nodes: + t0: ch = EntryToken + t2: i64,ch = CopyFromReg t0, Register:i64 %vreg0 + t4: i64 = add t2, Constant:i64<36> + t6: i64,ch = load t0, t4, undef:i64 + t9: i64 = add t2, Constant:i64<44> + t10: f32,ch = load t0, t9, undef:i64 + t13: i64 = add t2, Constant:i64<48> + t14: f32,ch = load t0, t13, undef:i64 + t16: ch = TokenFactor t6:1, t10:1, t14:1 + t17: f32 = fdiv t10, t14 + t19: ch = store t16, t17, t6, undef:i64 + t20: ch = ENDPGM t19 + + +Legalized selection DAG: BB#0 'fdiv_f32:entry' +SelectionDAG has 39 nodes: + t0: ch = EntryToken + t2: i64,ch = CopyFromReg t0, Register:i64 %vreg0 + t24: f32,i1 = DIV_SCALE t47, t47, t45 + t25: f32,i1 = DIV_SCALE t45, t47, t45 + t26: f32 = RCP t24 + t27: f32 = fneg t24 + t30: ch = SETREG t0, Constant:i32<1008>, Constant:i32<8> + t31: f32,ch = FMA t30, t27, t26, ConstantFP:f32<1.000000e+00> + t32: f32,ch = FMA t31:1, t31, t26, t26 + t33: f32,ch = FMUL t32:1, t25, t32 + t34: f32,ch = FMA t33:1, t27, t33, t25 + t35: f32,ch = FMA t34:1, t34, t32, t33 + t36: f32,ch = FMA t35:1, t27, t35, t25 + t4: i64 = add t2, Constant:i64<36> + t42: v2i32,ch = load t0, t4, undef:i64 + t9: i64 = add t2, Constant:i64<44> + t44: i32,ch = load t0, t9, undef:i64 + t45: f32 = bitcast t44 + t13: i64 = add t2, Constant:i64<48> + t46: i32,ch = load t0, t13, undef:i64 + t47: f32 = bitcast t46 + t38: ch = SETREG t36:1, Constant:i32<960>, Constant:i32<8> + t16: ch = TokenFactor t42:1, t44:1, t46:1 + t40: f32 = DIV_FMAS t36, t32, t35, t25:1 + t41: f32 = DIV_FIXUP t40, t47, t45 + t21: i32 = bitcast t41 + t43: i64 = bitcast t42 + t22: ch = store t16, t21, t43, undef:i64 + t20: ch = ENDPGM t22 + t39: ch = TokenFactor t38, t20 + + +Optimized legalized selection DAG: BB#0 'fdiv_f32:entry' +SelectionDAG has 39 nodes: + t0: ch = EntryToken + t2: i64,ch = CopyFromReg t0, Register:i64 %vreg0 + t24: f32,i1 = DIV_SCALE t47, t47, t45 + t25: f32,i1 = DIV_SCALE t45, t47, t45 + t26: f32 = RCP t24 + t27: f32 = fneg t24 + t30: ch = SETREG t0, Constant:i32<1008>, Constant:i32<8> + t31: f32,ch = FMA t30, t27, t26, ConstantFP:f32<1.000000e+00> + t32: f32,ch = FMA t31:1, t31, t26, t26 + t33: f32,ch = FMUL t32:1, t25, t32 + t34: f32,ch = FMA t33:1, t27, t33, t25 + t35: f32,ch = FMA t34:1, t34, t32, t33 + t36: f32,ch = FMA t35:1, t27, t35, t25 + t4: i64 = add t2, Constant:i64<36> + t42: v2i32,ch = load t0, t4, undef:i64 + t9: i64 = add t2, Constant:i64<44> + t44: i32,ch = load t0, t9, undef:i64 + t45: f32 = bitcast t44 + t13: i64 = add t2, Constant:i64<48> + t46: i32,ch = load t0, t13, undef:i64 + t47: f32 = bitcast t46 + t38: ch = SETREG t36:1, Constant:i32<960>, Constant:i32<8> + t16: ch = TokenFactor t42:1, t44:1, t46:1 + t40: f32 = DIV_FMAS t36, t32, t35, t25:1 + t41: f32 = DIV_FIXUP t40, t47, t45 + t21: i32 = bitcast t41 + t43: i64 = bitcast t42 + t22: ch = store t16, t21, t43, undef:i64 + t20: ch = ENDPGM t22 + t39: ch = TokenFactor t38, t20 + + +===== Instruction selection begins: BB#0 'entry' +ISEL: Starting pattern match on root node: t20: ch = ENDPGM t22 + + Morphed node: t20: ch = S_ENDPGM t22 + +ISEL: Match complete! +ISEL: Starting pattern match on root node: t22: ch = store t16, t21, t43, undef:i64 + + Initial Opcode index to 2446 + Match failed at index 2452 + Continuing at 2482 + Skipped scope entry (due to false predicate) at index 2495, continuing at 2547 + Match failed at index 2554 + Continuing at 2571 + Match failed at index 2572 + Continuing at 2622 + Morphed node: t22: ch = BUFFER_STORE_DWORD_OFFSET t21, t62, TargetConstant:i32<0>, TargetConstant:i16<0>, TargetConstant:i1<0>, TargetConstant:i1<0>, TargetConstant:i1<0>, t16 + +ISEL: Match complete! +ISEL: Starting pattern match on root node: t21: i32 = bitcast t41 + + Initial Opcode index to 71374 + Skipped scope entry (due to false predicate) at index 71383, continuing at 71388 +ISEL: Match complete! +ISEL: Starting pattern match on root node: t41: f32 = DIV_FIXUP t40, t47, t45 + + Initial Opcode index to 85298 + TypeSwitch[f32] from 85301 to 85304 + Morphed node: t41: f32 = V_DIV_FIXUP_F32 TargetConstant:i32<0>, t40, TargetConstant:i32<0>, t47, TargetConstant:i32<0>, t45, TargetConstant:i32<0>, TargetConstant:i32<0> + +ISEL: Match complete! +ISEL: Starting pattern match on root node: t38: ch = SETREG t36:1, Constant:i32<960>, Constant:i32<8> + + Initial Opcode index to 75665 + Morphed node: t38: ch = S_SETREG_B32 Constant:i32<960>, TargetConstant:i16<8>, t36:1 + +ISEL: Match complete! +ISEL: Starting pattern match on root node: t40: f32 = DIV_FMAS t36, t32, t35, t25:1 + + Initial Opcode index to 85138 + TypeSwitch[f32] from 85144 to 85147 + Morphed node: t40: f32 = V_DIV_FMAS_F32 TargetConstant:i32<0>, t36, TargetConstant:i32<0>, t32, TargetConstant:i32<0>, t35, TargetConstant:i32<0>, TargetConstant:i32<0>, t65:1 + +ISEL: Match complete! +llc: /media/wdng/enable_ieeebit/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp:2851: void llvm::SelectionDAGISel::SelectCodeCommon(llvm::SDNode*, const unsigned char*, unsigned int): Assertion `!NodeToMatch->isMachineOpcode() && "Node already selected!"' failed. +#0 0x000000000291693d llvm::sys::PrintStackTrace(llvm::raw_ostream&) /media/wdng/enable_ieeebit/llvm/lib/Support/Unix/Signals.inc:402:0 +#1 0x0000000002916c98 PrintStackTraceSignalHandler(void*) /media/wdng/enable_ieeebit/llvm/lib/Support/Unix/Signals.inc:466:0 +#2 0x0000000002914e64 llvm::sys::RunSignalHandlers() /media/wdng/enable_ieeebit/llvm/lib/Support/Signals.cpp:44:0 +#3 0x00000000029162ce SignalHandler(int) /media/wdng/enable_ieeebit/llvm/lib/Support/Unix/Signals.inc:256:0 +#4 0x00007ff867eb9330 __restore_rt (/lib/x86_64-linux-gnu/libpthread.so.0+0x10330) +#5 0x00007ff866ecdc37 gsignal (/lib/x86_64-linux-gnu/libc.so.6+0x36c37) +#6 0x00007ff866ed1028 abort (/lib/x86_64-linux-gnu/libc.so.6+0x3a028) +#7 0x00007ff866ec6bf6 (/lib/x86_64-linux-gnu/libc.so.6+0x2fbf6) +#8 0x00007ff866ec6ca2 (/lib/x86_64-linux-gnu/libc.so.6+0x2fca2) +#9 0x00000000027af889 llvm::SelectionDAGISel::SelectCodeCommon(llvm::SDNode*, unsigned char const*, unsigned int) /media/wdng/enable_ieeebit/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp:2854:0 +#10 0x00000000012755ce (anonymous namespace)::AMDGPUDAGToDAGISel::SelectCode(llvm::SDNode*) /media/wdng/enable_ieeebit/llvm/build/lib/Target/AMDGPU/AMDGPUGenDAGISel.inc:38649:0 +#11 0x000000000127dfab (anonymous namespace)::AMDGPUDAGToDAGISel::Select(llvm::SDNode*) /media/wdng/enable_ieeebit/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp:489:0 +#12 0x00000000027a6dd6 llvm::SelectionDAGISel::DoInstructionSelection() /media/wdng/enable_ieeebit/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp:948:0 +#13 0x00000000027a66dd llvm::SelectionDAGISel::CodeGenAndEmitDAG() /media/wdng/enable_ieeebit/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp:855:0 +#14 0x00000000027a5340 llvm::SelectionDAGISel::SelectBasicBlock(llvm::ilist_iterator, false, true>, llvm::ilist_iterator, false, true>, bool&) /media/wdng/enable_ieeebit/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp:684:0 +#15 0x00000000027a9822 llvm::SelectionDAGISel::SelectAllBasicBlocks(llvm::Function const&) /media/wdng/enable_ieeebit/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp:1554:0 +#16 0x00000000027a4207 llvm::SelectionDAGISel::runOnMachineFunction(llvm::MachineFunction&) /media/wdng/enable_ieeebit/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp:509:0 +#17 0x000000000127c030 (anonymous namespace)::AMDGPUDAGToDAGISel::runOnMachineFunction(llvm::MachineFunction&) /media/wdng/enable_ieeebit/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp:163:0 +#18 0x00000000020bc5fb llvm::MachineFunctionPass::runOnFunction(llvm::Function&) /media/wdng/enable_ieeebit/llvm/lib/CodeGen/MachineFunctionPass.cpp:62:0 +#19 0x000000000242c294 llvm::FPPassManager::runOnFunction(llvm::Function&) /media/wdng/enable_ieeebit/llvm/lib/IR/LegacyPassManager.cpp:1510:0 +#20 0x000000000242c427 llvm::FPPassManager::runOnModule(llvm::Module&) /media/wdng/enable_ieeebit/llvm/lib/IR/LegacyPassManager.cpp:1531:0 +#21 0x000000000242c7c2 (anonymous namespace)::MPPassManager::runOnModule(llvm::Module&) /media/wdng/enable_ieeebit/llvm/lib/IR/LegacyPassManager.cpp:1587:0 +#22 0x000000000242cf12 llvm::legacy::PassManagerImpl::run(llvm::Module&) /media/wdng/enable_ieeebit/llvm/lib/IR/LegacyPassManager.cpp:1690:0 +#23 0x000000000242d153 llvm::legacy::PassManager::run(llvm::Module&) /media/wdng/enable_ieeebit/llvm/lib/IR/LegacyPassManager.cpp:1722:0 +#24 0x0000000000f36441 compileModule(char**, llvm::LLVMContext&) /media/wdng/enable_ieeebit/llvm/tools/llc/llc.cpp:526:0 +#25 0x0000000000f34b25 main /media/wdng/enable_ieeebit/llvm/tools/llc/llc.cpp:282:0 +#26 0x00007ff866eb8f45 __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x21f45) +#27 0x0000000000f32e29 _start (/media/wdng/enable_ieeebit/llvm/build/./bin/llc+0xf32e29) +Stack dump: +0. Program arguments: /media/wdng/enable_ieeebit/llvm/build/./bin/llc -march=amdgcn -verify-machineinstrs -debug-only=isel +1. Running pass 'Function Pass Manager' on module ''. +2. Running pass 'AMDGPU DAG->DAG Pattern Instruction Selection' on function '@fdiv_f32'