Diff 77265

lib/Target/AMDGPU/AMDGPUISelLowering.h

Context not available.
	// This is SETCC with the full mask result which is used for a compare with a	// This is SETCC with the full mask result which is used for a compare with a
	// result bit per item in the wavefront.	// result bit per item in the wavefront.
	SETCC,	SETCC,
		SETREG,
		// This FMA has input and out chain
		FMA,
		//This MUL has input and output chain
		FMUL,
		arsenmUnsubmitted Done Reply Inline Actions These should be FMA_W_CHAIN, FMUL_W_CHAIN arsenm: These should be FMA_W_CHAIN, FMUL_W_CHAIN

	// SIN_HW, COS_HW - f32 for SI, 1 ULP max error, valid from -100 pi to 100 pi.	// SIN_HW, COS_HW - f32 for SI, 1 ULP max error, valid from -100 pi to 100 pi.
	// Denormals handled on some parts.	// Denormals handled on some parts.
Context not available.
		arsenmUnsubmitted Done Reply Inline Actions I would only put the comment once for the block of the 2 instructions arsenm: I would only put the comment once for the block of the 2 instructions

lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Context not available.
	NODE_NAME_CASE(DWORDADDR)	NODE_NAME_CASE(DWORDADDR)
	NODE_NAME_CASE(FRACT)	NODE_NAME_CASE(FRACT)
	NODE_NAME_CASE(SETCC)	NODE_NAME_CASE(SETCC)
		NODE_NAME_CASE(SETREG)
		NODE_NAME_CASE(FMA)
		NODE_NAME_CASE(FMUL)
	NODE_NAME_CASE(CLAMP)	NODE_NAME_CASE(CLAMP)
	NODE_NAME_CASE(COS_HW)	NODE_NAME_CASE(COS_HW)
	NODE_NAME_CASE(SIN_HW)	NODE_NAME_CASE(SIN_HW)
Context not available.

lib/Target/AMDGPU/AMDGPUInstrInfo.td

Context not available.

	def AMDGPUsetcc : SDNode<"AMDGPUISD::SETCC", AMDGPUSetCCOp>;	def AMDGPUsetcc : SDNode<"AMDGPUISD::SETCC", AMDGPUSetCCOp>;

		def AMDGPUSetRegOp : SDTypeProfile<0, 2, [
		SDTCisSameAs<0, 1>, SDTCisInt<0>
		]>;

		def AMDGPUsetreg : SDNode<"AMDGPUISD::SETREG", AMDGPUSetRegOp, [
		SDNPHasChain, SDNPSideEffect]>;

		def AMDGPUfma : SDNode<"AMDGPUISD::FMA", SDTFPTernaryOp, [
		SDNPHasChain]>;

		def AMDGPUmul : SDNode<"AMDGPUISD::FMUL", SDTFPBinOp, [
		SDNPHasChain]>;

	def AMDGPUcvt_f32_ubyte0 : SDNode<"AMDGPUISD::CVT_F32_UBYTE0",	def AMDGPUcvt_f32_ubyte0 : SDNode<"AMDGPUISD::CVT_F32_UBYTE0",
	SDTIntToFPOp, []>;	SDTIntToFPOp, []>;
	def AMDGPUcvt_f32_ubyte1 : SDNode<"AMDGPUISD::CVT_F32_UBYTE1",	def AMDGPUcvt_f32_ubyte1 : SDNode<"AMDGPUISD::CVT_F32_UBYTE1",
Context not available.

lib/Target/AMDGPU/SIISelLowering.cpp

Context not available.

	return DAG.getNode(ISD::FMUL, SL, MVT::f32, r3, Mul);	return DAG.getNode(ISD::FMUL, SL, MVT::f32, r3, Mul);
	}	}
		/*
	SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {	SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
	if (SDValue FastLowered = lowerFastUnsafeFDIV(Op, DAG))	if (SDValue FastLowered = lowerFastUnsafeFDIV(Op, DAG))
	return FastLowered;	return FastLowered;
Context not available.
	SDValue DenominatorScaled = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT, RHS, RHS, LHS);	SDValue DenominatorScaled = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT, RHS, RHS, LHS);
	SDValue NumeratorScaled = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT, LHS, RHS, LHS);	SDValue NumeratorScaled = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT, LHS, RHS, LHS);

		SDValue CondDenominateor = DAG.getNode(AMDGPUISD::FP_CLASS, SL, MVT::i1, DenominatorScaled, DAG.getConstant(16, SL, MVT::i32));
		SDValue Denominator_new = DAG.getNode(ISD::SELECT, SL, MVT::f32, CondDenominateor, DAG.getConstantFP(0, SL, MVT::f32), DenominatorScaled);

		SDValue CondNumerator = DAG.getNode(AMDGPUISD::FP_CLASS, SL, MVT::i1, NumeratorScaled, DAG.getConstant(16, SL, MVT::i32));
		SDValue Numerator_new = DAG.getNode(ISD::SELECT, SL, MVT::f32, CondNumerator, DAG.getConstantFP(0, SL, MVT::f32), NumeratorScaled);

		arsenmUnsubmitted Done Reply Inline Actions The constant should be a bitmask formed from the enums for the fields you are setting rather than the magic numbers arsenm: The constant should be a bitmask formed from the enums for the fields you are setting rather…
	// Denominator is scaled to not be denormal, so using rcp is ok.	// Denominator is scaled to not be denormal, so using rcp is ok.
	SDValue ApproxRcp = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, DenominatorScaled);	SDValue ApproxRcp = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, Denominator_new);

	SDValue NegDivScale0 = DAG.getNode(ISD::FNEG, SL, MVT::f32, DenominatorScaled);	SDValue NegDivScale0 = DAG.getNode(ISD::FNEG, SL, MVT::f32, Denominator_new);

		arsenmUnsubmitted Done Reply Inline Actions These lines go over 80 columns arsenm: These lines go over 80 columns
		tstellarAMDAuthorUnsubmitted Not Done Reply Inline Actions These magic number still need to be replaced with enum values. tstellarAMD: These magic number still need to be replaced with enum values.
	SDValue Fma0 = DAG.getNode(ISD::FMA, SL, MVT::f32, NegDivScale0, ApproxRcp, One);	SDValue Fma0 = DAG.getNode(ISD::FMA, SL, MVT::f32, NegDivScale0, ApproxRcp, One);
	SDValue Fma1 = DAG.getNode(ISD::FMA, SL, MVT::f32, Fma0, ApproxRcp, ApproxRcp);	SDValue Fma1 = DAG.getNode(ISD::FMA, SL, MVT::f32, Fma0, ApproxRcp, ApproxRcp);

	SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f32, NumeratorScaled, Fma1);	SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f32, Numerator_new, Fma1);

	SDValue Fma2 = DAG.getNode(ISD::FMA, SL, MVT::f32, NegDivScale0, Mul, NumeratorScaled);	SDValue Fma2 = DAG.getNode(ISD::FMA, SL, MVT::f32, NegDivScale0, Mul, Numerator_new);
	SDValue Fma3 = DAG.getNode(ISD::FMA, SL, MVT::f32, Fma2, Fma1, Mul);	SDValue Fma3 = DAG.getNode(ISD::FMA, SL, MVT::f32, Fma2, Fma1, Mul);
	SDValue Fma4 = DAG.getNode(ISD::FMA, SL, MVT::f32, NegDivScale0, Fma3, NumeratorScaled);	SDValue Fma4 = DAG.getNode(ISD::FMA, SL, MVT::f32, NegDivScale0, Fma3, Numerator_new);

	SDValue Scale = NumeratorScaled.getValue(1);	SDValue Scale = NumeratorScaled.getValue(1);
	SDValue Fmas = DAG.getNode(AMDGPUISD::DIV_FMAS, SL, MVT::f32, Fma4, Fma1, Fma3, Scale);	SDValue Fmas = DAG.getNode(AMDGPUISD::DIV_FMAS, SL, MVT::f32, Fma4, Fma1, Fma3, Scale);

	return DAG.getNode(AMDGPUISD::DIV_FIXUP, SL, MVT::f32, Fmas, RHS, LHS);	return DAG.getNode(AMDGPUISD::DIV_FIXUP, SL, MVT::f32, Fmas, RHS, LHS);
	}	}
		*/
		SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
		if (SDValue FastLowered = lowerFastUnsafeFDIV(Op, DAG))
		return FastLowered;

		SDLoc SL(Op);
		SDValue LHS = Op.getOperand(0);
		SDValue RHS = Op.getOperand(1);

		const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f32);

		SDVTList ScaleVT = DAG.getVTList(MVT::f32, MVT::i1);

		SDValue DenominatorScaled = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT, RHS, RHS, LHS);
		SDValue NumeratorScaled = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT, LHS, RHS, LHS);

		// Denominator is scaled to not be denormal, so using rcp is ok.
		SDValue ApproxRcp = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, DenominatorScaled);
		SDValue NegDivScale0 = DAG.getNode(ISD::FNEG, SL, MVT::f32, DenominatorScaled);

		const SDValue Index = DAG.getConstant(8, SL, MVT::i32);
		const SDValue EnableDenormValue = DAG.getConstant(1008, SL, MVT::i32);
		SDValue EnableDenorm = DAG.getNode(AMDGPUISD::SETREG, SL, MVT::Other, DAG.getEntryNode(), EnableDenormValue, Index);

		SDVTList FmaVT = DAG.getVTList(MVT::f32, MVT::Other);
		SDValue Fma0 = DAG.getNode(AMDGPUISD::FMA, SL, FmaVT, EnableDenorm,NegDivScale0, ApproxRcp, One);
		SDValue Fma1 = DAG.getNode(AMDGPUISD::FMA, SL, FmaVT, Fma0.getValue(1), Fma0.getValue(0), ApproxRcp, ApproxRcp);

		SDValue Mul = DAG.getNode(AMDGPUISD::FMUL, SL, FmaVT, Fma1.getValue(1), NumeratorScaled, Fma1.getValue(0));
		arsenmUnsubmitted Done Reply Inline Actions You don't need any of the getValue(0)s arsenm: You don't need any of the getValue(0)s

		SDValue Fma2 = DAG.getNode(AMDGPUISD::FMA, SL, FmaVT, Mul.getValue(1), NegDivScale0, Mul.getValue(0), NumeratorScaled);
		tstellarAMDAuthorUnsubmitted Not Done Reply Inline Actions Another magic number here. tstellarAMD: Another magic number here.
		SDValue Fma3 = DAG.getNode(AMDGPUISD::FMA, SL, FmaVT, Fma2.getValue(1), Fma2.getValue(0), Fma1.getValue(0), Mul.getValue(0));
		SDValue Fma4 = DAG.getNode(AMDGPUISD::FMA, SL, FmaVT, Fma3.getValue(1), NegDivScale0, Fma3.getValue(0), NumeratorScaled);

		const SDValue DisableDenormValue = DAG.getConstant(960, SL, MVT::i32);
		SDValue DisableDenorm = DAG.getNode(AMDGPUISD::SETREG, SL, MVT::Other, Fma4.getValue(1), DisableDenormValue, Index);
		SDValue OutputChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, DisableDenorm, DAG.getRoot());
		tstellarAMDAuthorUnsubmitted Done Reply Inline Actions The indentation here and in the rest of the block looks wrong. tstellarAMD: The indentation here and in the rest of the block looks wrong.
		DAG.setRoot(OutputChain);

		SDValue Scale = NumeratorScaled.getValue(1);
		SDValue Fmas = DAG.getNode(AMDGPUISD::DIV_FMAS, SL, MVT::f32, Fma4.getValue(0), Fma1.getValue(0), Fma3.getValue(0), Scale);

		return DAG.getNode(AMDGPUISD::DIV_FIXUP, SL, MVT::f32, Fmas, RHS, LHS);
		}


	SDValue SITargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const {	SDValue SITargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const {
	if (DAG.getTarget().Options.UnsafeFPMath)	if (DAG.getTarget().Options.UnsafeFPMath)
Context not available.
		tstellarAMDAuthorUnsubmitted Done Reply Inline Actions Extra whitespace change. tstellarAMD: Extra whitespace change.
		arsenmUnsubmitted Not Done Reply Inline Actions We should probably not use target constant here and teach FoldImmediate to turn the register setreg into the immediate setreg to save the code size on multiple uses of the immediate, like will happen in the unrolled vector case arsenm: We should probably not use target constant here and teach FoldImmediate to turn the register…

lib/Target/AMDGPU/SOPInstructions.td

Context not available.
	>;	>;
	}	}

		let hasSideEffects = 1, isBarrier = 1 in {
		arsenmUnsubmitted Not Done Reply Inline Actions This shouldn't have isBarrier set arsenm: This shouldn't have isBarrier set
	def S_SETREG_B32 : SOPK_Pseudo <	def S_SETREG_B32 : SOPK_Pseudo <
	"s_setreg_b32",	"s_setreg_b32",
	(outs), (ins SReg_32:$sdst, hwreg:$simm16),	(outs), (ins SReg_32:$sdst, hwreg:$simm16),
	"$simm16, $sdst"	"$simm16, $sdst"
	>;	>;
		arsenmUnsubmitted Done Reply Inline Actions You can move the hasSideEffects here instead of the let block since it's just the one instruction arsenm: You can move the hasSideEffects here instead of the let block since it's just the one…
		}

	// FIXME: Not on SI?	// FIXME: Not on SI?
	//def S_GETREG_REGRD_B32 : SOPK_32 <sopk<0x14, 0x13>, "s_getreg_regrd_b32">;	//def S_GETREG_REGRD_B32 : SOPK_32 <sopk<0x14, 0x13>, "s_getreg_regrd_b32">;
Context not available.
	>;	>;

	//===----------------------------------------------------------------------===//	//===----------------------------------------------------------------------===//
		// S_SETREG_B32 Pattern.
		//===----------------------------------------------------------------------===//
		def : Pat <
		(AMDGPUsetreg i32:$reg, i32:$simm16),
		(S_SETREG_B32 $reg, (as_i16imm $simm16))
		>;
		//===----------------------------------------------------------------------===//
	// SOP1 Patterns	// SOP1 Patterns
	//===----------------------------------------------------------------------===//	//===----------------------------------------------------------------------===//

Context not available.

lib/Target/AMDGPU/VOP3Instructions.td

Context not available.
	def V_MAD_U16 : VOP3Inst <"v_mad_u16", VOP3_Profile<VOP_I16_I16_I16_I16>>;	def V_MAD_U16 : VOP3Inst <"v_mad_u16", VOP3_Profile<VOP_I16_I16_I16_I16>>;
	def V_MAD_I16 : VOP3Inst <"v_mad_i16", VOP3_Profile<VOP_I16_I16_I16_I16>>;	def V_MAD_I16 : VOP3Inst <"v_mad_i16", VOP3_Profile<VOP_I16_I16_I16_I16>>;
	}	}

	} // End SubtargetPredicate = isVI	} // End SubtargetPredicate = isVI

		def : Pat <
		(AMDGPUfma f32:$src0, f32:$src1, f32:$src2),
		(V_FMA_F32 0, $src0, 0, $src1, 0, $src2, 0, 0)
		arsenmUnsubmitted Done Reply Inline Actions There should be a pattern which uses the complex pattern for the source modifiers arsenm: There should be a pattern which uses the complex pattern for the source modifiers
		>;

		def : Pat <
		(AMDGPUmul (VOP3NoMods0 f32:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod),
		(VOP3NoMods f32:$src1, i32:$src1_modifiers)),
		(V_MUL_F32_e64 $src0_modifiers, $src0, $src1_modifiers, $src1, $clamp, $omod)
		>;


	tstellarAMDAuthorUnsubmitted Done Reply Inline Actions Extra whitespace changes. tstellarAMD: Extra whitespace changes.
		tstellarAMDAuthorUnsubmitted Done Reply Inline Actions These are dead patterns since we are custom selecting the SDNodes. tstellarAMD: These are dead patterns since we are custom selecting the SDNodes.
	//===----------------------------------------------------------------------===//	//===----------------------------------------------------------------------===//
	// Target	// Target
Context not available.

test/CodeGen/AMDGPU/dump

This file was added.




				=== fdiv_f32
				Initial selection DAG: BB#0 'fdiv_f32:entry'
				SelectionDAG has 21 nodes:
				t0: ch = EntryToken
				t2: i64,ch = CopyFromReg t0, Register:i64 %vreg0
				t4: i64 = add t2, Constant:i64<36>
				t6: i64,ch = load<LD8[undef(addrspace=2)](nontemporal)(dereferenceable)(invariant)> t0, t4, undef:i64
				t7: i64,ch = merge_values t6, t6:1
				t9: i64 = add t2, Constant:i64<44>
				t10: f32,ch = load<LD4[undef(addrspace=2)](nontemporal)(dereferenceable)(invariant)> t0, t9, undef:i64
				t11: f32,ch = merge_values t10, t10:1
				t13: i64 = add t2, Constant:i64<48>
				t14: f32,ch = load<LD4[undef(addrspace=2)](nontemporal)(dereferenceable)(invariant)> t0, t13, undef:i64
				t15: f32,ch = merge_values t14, t14:1
				t18: i64 = Constant<0>
				t16: ch = TokenFactor t7:1, t11:1, t15:1
				t17: f32 = fdiv t11, t15
				t19: ch = store<ST4[%out(addrspace=1)]> t16, t17, t7, undef:i64
				t20: ch = ENDPGM t19


				Optimized lowered selection DAG: BB#0 'fdiv_f32:entry'
				SelectionDAG has 17 nodes:
				t0: ch = EntryToken
				t2: i64,ch = CopyFromReg t0, Register:i64 %vreg0
				t4: i64 = add t2, Constant:i64<36>
				t6: i64,ch = load<LD8[undef(addrspace=2)](nontemporal)(dereferenceable)(invariant)> t0, t4, undef:i64
				t9: i64 = add t2, Constant:i64<44>
				t10: f32,ch = load<LD4[undef(addrspace=2)](nontemporal)(dereferenceable)(invariant)> t0, t9, undef:i64
				t13: i64 = add t2, Constant:i64<48>
				t14: f32,ch = load<LD4[undef(addrspace=2)](nontemporal)(dereferenceable)(invariant)> t0, t13, undef:i64
				t16: ch = TokenFactor t6:1, t10:1, t14:1
				t17: f32 = fdiv t10, t14
				t19: ch = store<ST4[%out(addrspace=1)]> t16, t17, t6, undef:i64
				t20: ch = ENDPGM t19


				Type-legalized selection DAG: BB#0 'fdiv_f32:entry'
				SelectionDAG has 17 nodes:
				t0: ch = EntryToken
				t2: i64,ch = CopyFromReg t0, Register:i64 %vreg0
				t4: i64 = add t2, Constant:i64<36>
				t6: i64,ch = load<LD8[undef(addrspace=2)](nontemporal)(dereferenceable)(invariant)> t0, t4, undef:i64
				t9: i64 = add t2, Constant:i64<44>
				t10: f32,ch = load<LD4[undef(addrspace=2)](nontemporal)(dereferenceable)(invariant)> t0, t9, undef:i64
				t13: i64 = add t2, Constant:i64<48>
				t14: f32,ch = load<LD4[undef(addrspace=2)](nontemporal)(dereferenceable)(invariant)> t0, t13, undef:i64
				t16: ch = TokenFactor t6:1, t10:1, t14:1
				t17: f32 = fdiv t10, t14
				t19: ch = store<ST4[%out(addrspace=1)]> t16, t17, t6, undef:i64
				t20: ch = ENDPGM t19


				Legalized selection DAG: BB#0 'fdiv_f32:entry'
				SelectionDAG has 39 nodes:
				t0: ch = EntryToken
				t2: i64,ch = CopyFromReg t0, Register:i64 %vreg0
				t24: f32,i1 = DIV_SCALE t47, t47, t45
				t25: f32,i1 = DIV_SCALE t45, t47, t45
				t26: f32 = RCP t24
				t27: f32 = fneg t24
				t30: ch = SETREG t0, Constant:i32<1008>, Constant:i32<8>
				t31: f32,ch = FMA t30, t27, t26, ConstantFP:f32<1.000000e+00>
				t32: f32,ch = FMA t31:1, t31, t26, t26
				t33: f32,ch = FMUL t32:1, t25, t32
				t34: f32,ch = FMA t33:1, t27, t33, t25
				t35: f32,ch = FMA t34:1, t34, t32, t33
				t36: f32,ch = FMA t35:1, t27, t35, t25
				t4: i64 = add t2, Constant:i64<36>
				t42: v2i32,ch = load<LD8[undef(addrspace=2)](nontemporal)(dereferenceable)(invariant)> t0, t4, undef:i64
				t9: i64 = add t2, Constant:i64<44>
				t44: i32,ch = load<LD4[undef(addrspace=2)](nontemporal)(dereferenceable)(invariant)> t0, t9, undef:i64
				t45: f32 = bitcast t44
				t13: i64 = add t2, Constant:i64<48>
				t46: i32,ch = load<LD4[undef(addrspace=2)](nontemporal)(dereferenceable)(invariant)> t0, t13, undef:i64
				t47: f32 = bitcast t46
				t38: ch = SETREG t36:1, Constant:i32<960>, Constant:i32<8>
				t16: ch = TokenFactor t42:1, t44:1, t46:1
				t40: f32 = DIV_FMAS t36, t32, t35, t25:1
				t41: f32 = DIV_FIXUP t40, t47, t45
				t21: i32 = bitcast t41
				t43: i64 = bitcast t42
				t22: ch = store<ST4[%out(addrspace=1)]> t16, t21, t43, undef:i64
				t20: ch = ENDPGM t22
				t39: ch = TokenFactor t38, t20


				Optimized legalized selection DAG: BB#0 'fdiv_f32:entry'
				SelectionDAG has 39 nodes:
				t0: ch = EntryToken
				t2: i64,ch = CopyFromReg t0, Register:i64 %vreg0
				t24: f32,i1 = DIV_SCALE t47, t47, t45
				t25: f32,i1 = DIV_SCALE t45, t47, t45
				t26: f32 = RCP t24
				t27: f32 = fneg t24
				t30: ch = SETREG t0, Constant:i32<1008>, Constant:i32<8>
				t31: f32,ch = FMA t30, t27, t26, ConstantFP:f32<1.000000e+00>
				t32: f32,ch = FMA t31:1, t31, t26, t26
				t33: f32,ch = FMUL t32:1, t25, t32
				t34: f32,ch = FMA t33:1, t27, t33, t25
				t35: f32,ch = FMA t34:1, t34, t32, t33
				t36: f32,ch = FMA t35:1, t27, t35, t25
				t4: i64 = add t2, Constant:i64<36>
				t42: v2i32,ch = load<LD8[undef(addrspace=2)](nontemporal)(dereferenceable)(invariant)> t0, t4, undef:i64
				t9: i64 = add t2, Constant:i64<44>
				t44: i32,ch = load<LD4[undef(addrspace=2)](nontemporal)(dereferenceable)(invariant)> t0, t9, undef:i64
				t45: f32 = bitcast t44
				t13: i64 = add t2, Constant:i64<48>
				t46: i32,ch = load<LD4[undef(addrspace=2)](nontemporal)(dereferenceable)(invariant)> t0, t13, undef:i64
				t47: f32 = bitcast t46
				t38: ch = SETREG t36:1, Constant:i32<960>, Constant:i32<8>
				t16: ch = TokenFactor t42:1, t44:1, t46:1
				t40: f32 = DIV_FMAS t36, t32, t35, t25:1
				t41: f32 = DIV_FIXUP t40, t47, t45
				t21: i32 = bitcast t41
				t43: i64 = bitcast t42
				t22: ch = store<ST4[%out(addrspace=1)]> t16, t21, t43, undef:i64
				t20: ch = ENDPGM t22
				t39: ch = TokenFactor t38, t20


				===== Instruction selection begins: BB#0 'entry'
				ISEL: Starting pattern match on root node: t20: ch = ENDPGM t22

				Morphed node: t20: ch = S_ENDPGM t22

				ISEL: Match complete!
				ISEL: Starting pattern match on root node: t22: ch = store<ST4[%out(addrspace=1)]> t16, t21, t43, undef:i64

				Initial Opcode index to 2446
				Match failed at index 2452
				Continuing at 2482
				Skipped scope entry (due to false predicate) at index 2495, continuing at 2547
				Match failed at index 2554
				Continuing at 2571
				Match failed at index 2572
				Continuing at 2622
				Morphed node: t22: ch = BUFFER_STORE_DWORD_OFFSET<Mem:ST4[%out(addrspace=1)]> t21, t62, TargetConstant:i32<0>, TargetConstant:i16<0>, TargetConstant:i1<0>, TargetConstant:i1<0>, TargetConstant:i1<0>, t16

				ISEL: Match complete!
				ISEL: Starting pattern match on root node: t21: i32 = bitcast t41

				Initial Opcode index to 71374
				Skipped scope entry (due to false predicate) at index 71383, continuing at 71388
				ISEL: Match complete!
				ISEL: Starting pattern match on root node: t41: f32 = DIV_FIXUP t40, t47, t45

				Initial Opcode index to 85298
				TypeSwitch[f32] from 85301 to 85304
				Morphed node: t41: f32 = V_DIV_FIXUP_F32 TargetConstant:i32<0>, t40, TargetConstant:i32<0>, t47, TargetConstant:i32<0>, t45, TargetConstant:i32<0>, TargetConstant:i32<0>

				ISEL: Match complete!
				ISEL: Starting pattern match on root node: t38: ch = SETREG t36:1, Constant:i32<960>, Constant:i32<8>

				Initial Opcode index to 75665
				Morphed node: t38: ch = S_SETREG_B32 Constant:i32<960>, TargetConstant:i16<8>, t36:1

				ISEL: Match complete!
				ISEL: Starting pattern match on root node: t40: f32 = DIV_FMAS t36, t32, t35, t25:1

				Initial Opcode index to 85138
				TypeSwitch[f32] from 85144 to 85147
				Morphed node: t40: f32 = V_DIV_FMAS_F32 TargetConstant:i32<0>, t36, TargetConstant:i32<0>, t32, TargetConstant:i32<0>, t35, TargetConstant:i32<0>, TargetConstant:i32<0>, t65:1

				ISEL: Match complete!
				ISEL: Starting pattern match on root node: t36: f32,ch = FMA t35:1, t27, t35, t25

				Initial Opcode index to 82998
				Created node: t66: f32 = V_FMA_F32 TargetConstant:i32<0>, t27, TargetConstant:i32<0>, t35, TargetConstant:i32<0>, t25, TargetConstant:i1<0>, TargetConstant:i32<0>

				ISEL: Match complete!
				ISEL: Starting pattern match on root node: t35: f32,ch = FMA t34:1, t34, t32, t33

				Initial Opcode index to 82998
				Created node: t67: f32 = V_FMA_F32 TargetConstant:i32<0>, t34, TargetConstant:i32<0>, t32, TargetConstant:i32<0>, t33, TargetConstant:i1<0>, TargetConstant:i32<0>

				ISEL: Match complete!
				ISEL: Starting pattern match on root node: t34: f32,ch = FMA t33:1, t27, t33, t25

				Initial Opcode index to 82998
				Created node: t68: f32 = V_FMA_F32 TargetConstant:i32<0>, t27, TargetConstant:i32<0>, t33, TargetConstant:i32<0>, t25, TargetConstant:i1<0>, TargetConstant:i32<0>

				ISEL: Match complete!
				ISEL: Starting pattern match on root node: t33: f32,ch = FMUL t32:1, t25, t32

				Initial Opcode index to 76951
				Created node: t69: f32 = V_MUL_F32_e64 TargetConstant:i32<0>, t25, TargetConstant:i32<0>, t32, TargetConstant:i32<0>, TargetConstant:i32<0>

				ISEL: Match complete!
				ISEL: Starting pattern match on root node: t32: f32,ch = FMA t31:1, t31, t26, t26

				Initial Opcode index to 82998
				Created node: t70: f32 = V_FMA_F32 TargetConstant:i32<0>, t31, TargetConstant:i32<0>, t26, TargetConstant:i32<0>, t26, TargetConstant:i1<0>, TargetConstant:i32<0>

				ISEL: Match complete!
				ISEL: Starting pattern match on root node: t31: f32,ch = FMA t30, t27, t26, ConstantFP:f32<1.000000e+00>

				Initial Opcode index to 82998
				Created node: t71: f32 = V_FMA_F32 TargetConstant:i32<0>, t27, TargetConstant:i32<0>, t26, TargetConstant:i32<0>, ConstantFP:f32<1.000000e+00>, TargetConstant:i1<0>, TargetConstant:i32<0>

				ISEL: Match complete!
				ISEL: Starting pattern match on root node: t26: f32 = RCP t24

				Initial Opcode index to 78468
				Match failed at index 78472
				Continuing at 78706
				TypeSwitch[f32] from 78709 to 78713
				Skipped scope entry (due to false predicate) at index 78715, continuing at 78781
				Skipped scope entry (due to false predicate) at index 78782, continuing at 78848
				Skipped scope entry (due to false predicate) at index 78849, continuing at 78915
				Morphed node: t26: f32 = V_RCP_F32_e64 TargetConstant:i32<0>, t24, TargetConstant:i32<0>, TargetConstant:i32<0>

				ISEL: Match complete!
				ISEL: Starting pattern match on root node: t27: f32 = fneg t24

				Initial Opcode index to 78951
				Match failed at index 78954
				Continuing at 79056
				TypeSwitch[f32] from 79058 to 79061
				Created node: t73: i32 = V_MOV_B32_e32 TargetConstant:i32<-2147483648>

				Morphed node: t27: f32 = V_XOR_B32_e32 t24, t73

				ISEL: Match complete!
				ISEL: Starting pattern match on root node: t43: i64 = bitcast t42

				Initial Opcode index to 71374
				Skipped scope entry (due to false predicate) at index 71377, continuing at 71402
				Skipped scope entry (due to false predicate) at index 71403, continuing at 71428
				TypeSwitch[i64] from 71431 to 71434
				ISEL: Match complete!
				ISEL: Starting pattern match on root node: t45: f32 = bitcast t44

				Initial Opcode index to 71374
				Skipped scope entry (due to false predicate) at index 71377, continuing at 71402
				Skipped scope entry (due to false predicate) at index 71403, continuing at 71428
				Skipped scope entry (due to false predicate) at index 71429, continuing at 71463
				Skipped scope entry (due to false predicate) at index 71464, continuing at 71498
				Skipped scope entry (due to false predicate) at index 71505, continuing at 71510
				ISEL: Match complete!
				ISEL: Starting pattern match on root node: t47: f32 = bitcast t46

				Initial Opcode index to 71374
				Skipped scope entry (due to false predicate) at index 71377, continuing at 71402
				Skipped scope entry (due to false predicate) at index 71403, continuing at 71428
				Skipped scope entry (due to false predicate) at index 71429, continuing at 71463
				Skipped scope entry (due to false predicate) at index 71464, continuing at 71498
				Skipped scope entry (due to false predicate) at index 71505, continuing at 71510
				ISEL: Match complete!
				ISEL: Starting pattern match on root node: t42: v2i32,ch = load<LD8[undef(addrspace=2)](nontemporal)(dereferenceable)(invariant)> t0, t4, undef:i64

				Initial Opcode index to 5
				Match failed at index 19
				Continuing at 69
				Match failed at index 70
				Continuing at 97
				Match failed at index 98
				Continuing at 125
				Match failed at index 126
				Continuing at 153
				Match failed at index 154
				Continuing at 181
				Match failed at index 186
				Continuing at 230
				Match failed at index 231
				Continuing at 258
				Match failed at index 259
				Continuing at 286
				Match failed at index 287
				Continuing at 314
				Match failed at index 315
				Continuing at 368
				Match failed at index 369
				Continuing at 395
				Match failed at index 396
				Continuing at 422
				Match failed at index 423
				Continuing at 449
				Match failed at index 454
				Continuing at 496
				Match failed at index 497
				Continuing at 533
				Match failed at index 534
				Continuing at 570
				Match failed at index 571
				Continuing at 607
				Match failed at index 608
				Continuing at 644
				Match failed at index 647
				Continuing at 679
				Match failed at index 681
				Continuing at 1131
				Continuing at 1132
				Match failed at index 1136
				Continuing at 1270
				Match failed at index 1280
				Continuing at 1427
				Match failed at index 1429
				Continuing at 1665
				TypeSwitch[v2i32] from 1673 to 1676
				Morphed node: t42: v2i32,ch = S_LOAD_DWORDX2_IMM<Mem:LD8[undef(addrspace=2)](nontemporal)(dereferenceable)(invariant)> t2, TargetConstant:i32<9>, t0

				ISEL: Match complete!
				ISEL: Starting pattern match on root node: t44: i32,ch = load<LD4[undef(addrspace=2)](nontemporal)(dereferenceable)(invariant)> t0, t9, undef:i64

				Initial Opcode index to 5
				Morphed node: t44: i32,ch = S_LOAD_DWORD_IMM<Mem:LD4[undef(addrspace=2)](nontemporal)(dereferenceable)(invariant)> t2, TargetConstant:i32<11>, t0

				ISEL: Match complete!
				ISEL: Starting pattern match on root node: t46: i32,ch = load<LD4[undef(addrspace=2)](nontemporal)(dereferenceable)(invariant)> t0, t13, undef:i64

				Initial Opcode index to 5
				Morphed node: t46: i32,ch = S_LOAD_DWORD_IMM<Mem:LD4[undef(addrspace=2)](nontemporal)(dereferenceable)(invariant)> t2, TargetConstant:i32<12>, t0

				ISEL: Match complete!
				ISEL: Starting pattern match on root node: t30: ch = SETREG t0, Constant:i32<1008>, Constant:i32<8>

				Initial Opcode index to 75665
				Morphed node: t30: ch = S_SETREG_B32 Constant:i32<1008>, TargetConstant:i16<8>, t0

				ISEL: Match complete!
				ISEL: Starting pattern match on root node: t37: i32 = Constant<960>

				Initial Opcode index to 71293
				TypeSwitch[i32] from 71294 to 71297
				Morphed node: t37: i32 = S_MOV_B32 TargetConstant:i32<960>

				ISEL: Match complete!
				ISEL: Starting pattern match on root node: t29: i32 = Constant<1008>

				Initial Opcode index to 71293
				TypeSwitch[i32] from 71294 to 71297
				Morphed node: t29: i32 = S_MOV_B32 TargetConstant:i32<1008>

				ISEL: Match complete!
				ISEL: Starting pattern match on root node: t23: f32 = ConstantFP<1.000000e+00>

				Initial Opcode index to 79171
				TypeSwitch[f32] from 79172 to 79175
				Skipped scope entry (due to false predicate) at index 79177, continuing at 79191
				Morphed node: t23: f32 = V_MOV_B32_e32 TargetConstant:i32<1065353216>

				ISEL: Match complete!
				===== Instruction selection ends:
				Selected selection DAG: BB#0 'fdiv_f32:entry'
				SelectionDAG has 53 nodes:
				t0: ch = EntryToken
				t2: i64,ch = CopyFromReg t0, Register:i64 %vreg0
				t46: i32,ch = S_LOAD_DWORD_IMM<Mem:LD4[undef(addrspace=2)](nontemporal)(dereferenceable)(invariant)> t2, TargetConstant:i32<12>, t0
				t44: i32,ch = S_LOAD_DWORD_IMM<Mem:LD4[undef(addrspace=2)](nontemporal)(dereferenceable)(invariant)> t2, TargetConstant:i32<11>, t0
				t42: v2i32,ch = S_LOAD_DWORDX2_IMM<Mem:LD8[undef(addrspace=2)](nontemporal)(dereferenceable)(invariant)> t2, TargetConstant:i32<9>, t0
				t24: f32,i1 = V_DIV_SCALE_F32 TargetConstant:i32<0>, t46, TargetConstant:i32<0>, t46, TargetConstant:i32<0>, t44, TargetConstant:i32<0>, TargetConstant:i32<0>
				t25: f32,i1 = V_DIV_SCALE_F32 TargetConstant:i32<0>, t44, TargetConstant:i32<0>, t46, TargetConstant:i32<0>, t44, TargetConstant:i32<0>, TargetConstant:i32<0>
				t73: i32 = V_MOV_B32_e32 TargetConstant:i32<-2147483648>
				t27: f32 = V_XOR_B32_e32 t24, t73
				t26: f32 = V_RCP_F32_e64 TargetConstant:i32<0>, t24, TargetConstant:i32<0>, TargetConstant:i32<0>
				t68: f32 = V_FMA_F32 TargetConstant:i32<0>, t27, TargetConstant:i32<0>, t69, TargetConstant:i32<0>, t25, TargetConstant:i1<0>, TargetConstant:i32<0>
				t67: f32 = V_FMA_F32 TargetConstant:i32<0>, t68, TargetConstant:i32<0>, t70, TargetConstant:i32<0>, t69, TargetConstant:i1<0>, TargetConstant:i32<0>
				t69: f32 = V_MUL_F32_e64 TargetConstant:i32<0>, t25, TargetConstant:i32<0>, t70, TargetConstant:i32<0>, TargetConstant:i32<0>
				t23: f32 = V_MOV_B32_e32 TargetConstant:i32<1065353216>
				t71: f32 = V_FMA_F32 TargetConstant:i32<0>, t27, TargetConstant:i32<0>, t26, TargetConstant:i32<0>, t23, TargetConstant:i1<0>, TargetConstant:i32<0>
				t70: f32 = V_FMA_F32 TargetConstant:i32<0>, t71, TargetConstant:i32<0>, t26, TargetConstant:i32<0>, t26, TargetConstant:i1<0>, TargetConstant:i32<0>
				t37: i32 = S_MOV_B32 TargetConstant:i32<960>
				t29: i32 = S_MOV_B32 TargetConstant:i32<1008>
				t30: ch = S_SETREG_B32 t29, TargetConstant:i16<8>, t0
				t38: ch = S_SETREG_B32 t37, TargetConstant:i16<8>, t30
				t66: f32 = V_FMA_F32 TargetConstant:i32<0>, t27, TargetConstant:i32<0>, t67, TargetConstant:i32<0>, t25, TargetConstant:i1<0>, TargetConstant:i32<0>
				t65: ch,glue = CopyToReg t0, Register:i1 %VCC, t25:1
				t40: f32 = V_DIV_FMAS_F32 TargetConstant:i32<0>, t66, TargetConstant:i32<0>, t70, TargetConstant:i32<0>, t67, TargetConstant:i32<0>, TargetConstant:i32<0>, t65:1
				t41: f32 = V_DIV_FIXUP_F32 TargetConstant:i32<0>, t40, TargetConstant:i32<0>, t46, TargetConstant:i32<0>, t44, TargetConstant:i32<0>, TargetConstant:i32<0>
				t52: i32 = EXTRACT_SUBREG t42, TargetConstant:i32<1>
				t54: i32 = EXTRACT_SUBREG t42, TargetConstant:i32<2>
				t56: i32 = S_MOV_B32 TargetConstant:i32<-1>
				t58: i32 = S_MOV_B32 TargetConstant:i32<61440>
				t62: v4i32 = REG_SEQUENCE TargetConstant:i32<46>, t52, TargetConstant:i32<1>, t54, TargetConstant:i32<2>, t56, TargetConstant:i32<3>, t58, TargetConstant:i32<4>
				t16: ch = TokenFactor t42:1, t44:1, t46:1
				t22: ch = BUFFER_STORE_DWORD_OFFSET<Mem:ST4[%out(addrspace=1)]> t41, t62, TargetConstant:i32<0>, TargetConstant:i16<0>, TargetConstant:i1<0>, TargetConstant:i1<0>, TargetConstant:i1<0>, t16
				t20: ch = S_ENDPGM t22
				t39: ch = TokenFactor t38, t20


				Total amount of phi nodes to update: 0
				* MachineFunction at end of ISel *
				# Machine code for function fdiv_f32: IsSSA, TracksLiveness
				Function Live Ins: %SGPR0_SGPR1 in %vreg0

				BB#0: derived from LLVM BB %entry
				Live Ins: %SGPR0_SGPR1
				%vreg0<def> = COPY %SGPR0_SGPR1; SReg_64:%vreg0
				%vreg4<def> = S_LOAD_DWORDX2_IMM %vreg0, 9; mem:LD8[undef(addrspace=2)](nontemporal)(dereferenceable)(invariant) SReg_64:%vreg4,%vreg0
				%vreg5<def> = S_LOAD_DWORD_IMM %vreg0, 11; mem:LD4[undef(addrspace=2)](nontemporal)(dereferenceable)(invariant) SReg_32_XM0:%vreg5 SReg_64:%vreg0
				%vreg6<def> = S_LOAD_DWORD_IMM %vreg0, 12; mem:LD4[undef(addrspace=2)](nontemporal)(dereferenceable)(invariant) SReg_32_XM0:%vreg6 SReg_64:%vreg0
				%vreg7<def> = COPY %vreg4:sub1; SReg_32:%vreg7 SReg_64:%vreg4
				%vreg8<def> = COPY %vreg4:sub0; SReg_32:%vreg8 SReg_64:%vreg4
				%vreg9<def> = S_MOV_B32 61440; SReg_32:%vreg9
				%vreg10<def> = S_MOV_B32 -1; SReg_32:%vreg10
				%vreg11<def> = REG_SEQUENCE %vreg8<kill>, sub0, %vreg7<kill>, sub1, %vreg10<kill>, sub2, %vreg9<kill>, sub3; SReg_128:%vreg11 SReg_32:%vreg8,%vreg7,%vreg10,%vreg9
				%vreg12<def> = S_MOV_B32 1008; SReg_32:%vreg12
				S_SETREG_B32 %vreg12<kill>, 8; SReg_32:%vreg12
				%vreg13<def> = S_MOV_B32 960; SReg_32:%vreg13
				S_SETREG_B32 %vreg13<kill>, 8; SReg_32:%vreg13
				%vreg16<def> = COPY %vreg5; VGPR_32:%vreg16 SReg_32_XM0:%vreg5
				%vreg14<def>, %vreg15<def> = V_DIV_SCALE_F32 0, %vreg6, 0, %vreg6, 0, %vreg16, 0, 0, %EXEC<imp-use>; VGPR_32:%vreg14,%vreg16 SReg_64:%vreg15 SReg_32_XM0:%vreg6
				%vreg17<def> = V_MOV_B32_e32 -2147483648, %EXEC<imp-use>; VGPR_32:%vreg17
				%vreg18<def> = V_XOR_B32_e32 %vreg14, %vreg17<kill>, %EXEC<imp-use>; VGPR_32:%vreg18,%vreg14,%vreg17
				%vreg19<def> = V_RCP_F32_e64 0, %vreg14, 0, 0, %EXEC<imp-use>; VGPR_32:%vreg19,%vreg14
				%vreg20<def> = V_MOV_B32_e32 1065353216, %EXEC<imp-use>; VGPR_32:%vreg20
				%vreg21<def> = V_FMA_F32 0, %vreg18, 0, %vreg19, 0, %vreg20<kill>, 0, 0, %EXEC<imp-use>; VGPR_32:%vreg21,%vreg18,%vreg19,%vreg20
				%vreg22<def> = V_FMA_F32 0, %vreg21<kill>, 0, %vreg19, 0, %vreg19, 0, 0, %EXEC<imp-use>; VGPR_32:%vreg22,%vreg21,%vreg19,%vreg19
				%vreg25<def> = COPY %vreg6; VGPR_32:%vreg25 SReg_32_XM0:%vreg6
				%vreg23<def>, %vreg24<def> = V_DIV_SCALE_F32 0, %vreg5, 0, %vreg25, 0, %vreg5, 0, 0, %EXEC<imp-use>; VGPR_32:%vreg23,%vreg25 SReg_64:%vreg24 SReg_32_XM0:%vreg5
				%vreg26<def> = V_MUL_F32_e64 0, %vreg23, 0, %vreg22, 0, 0, %EXEC<imp-use>; VGPR_32:%vreg26,%vreg23,%vreg22
				%vreg27<def> = V_FMA_F32 0, %vreg18, 0, %vreg26, 0, %vreg23, 0, 0, %EXEC<imp-use>; VGPR_32:%vreg27,%vreg18,%vreg26,%vreg23
				%vreg28<def> = V_FMA_F32 0, %vreg27<kill>, 0, %vreg22, 0, %vreg26, 0, 0, %EXEC<imp-use>; VGPR_32:%vreg28,%vreg27,%vreg22,%vreg26
				%vreg29<def> = V_FMA_F32 0, %vreg18, 0, %vreg28, 0, %vreg23, 0, 0, %EXEC<imp-use>; VGPR_32:%vreg29,%vreg18,%vreg28,%vreg23
				%VCC<def> = COPY %vreg24; SReg_64:%vreg24
				%vreg30<def> = V_DIV_FMAS_F32 0, %vreg29<kill>, 0, %vreg22, 0, %vreg28, 0, 0, %VCC<imp-use>, %EXEC<imp-use>; VGPR_32:%vreg30,%vreg29,%vreg22,%vreg28
				%vreg32<def> = COPY %vreg5; VGPR_32:%vreg32 SReg_32_XM0:%vreg5
				%vreg31<def> = V_DIV_FIXUP_F32 0, %vreg30<kill>, 0, %vreg6, 0, %vreg32, 0, 0, %EXEC<imp-use>; VGPR_32:%vreg31,%vreg30,%vreg32 SReg_32_XM0:%vreg6
				BUFFER_STORE_DWORD_OFFSET %vreg31<kill>, %vreg11<kill>, 0, 0, 0, 0, 0, %EXEC<imp-use>; mem:ST4[%out(addrspace=1)] VGPR_32:%vreg31 SReg_128:%vreg11
				S_ENDPGM

				# End machine code for function fdiv_f32.

				.text
				.section .AMDGPU.config
				.long 47176
				.long 11272257
				.long 47180
				.long 132
				.long 47200
				.long 0
				.long 4
				.long 0
				.long 8
				.long 0
				.text
				.globl fdiv_f32
				.p2align 8
				.type fdiv_f32,@function
				fdiv_f32: ; @fdiv_f32
				; BB#0: ; %entry
				s_load_dword s2, s[0:1], 0xb
				s_load_dword s3, s[0:1], 0xc
				s_load_dwordx2 s[4:5], s[0:1], 0x9
				s_movk_i32 s0, 0x3f0
				s_setreg_b32 hwreg(8, 0, 1), s0
				s_waitcnt lgkmcnt(0)
				v_mov_b32_e32 v0, s2
				v_div_scale_f32 v1, s[0:1], s3, s3, v0
				v_rcp_f32_e32 v2, v1
				v_xor_b32_e32 v1, 0x80000000, v1
				s_movk_i32 s0, 0x3c0
				s_mov_b32 s7, 0xf000
				v_fma_f32 v3, v1, v2, 1.0
				v_fma_f32 v2, v3, v2, v2
				v_mov_b32_e32 v3, s3
				v_div_scale_f32 v3, vcc, s2, v3, s2
				v_mul_f32_e32 v4, v2, v3
				v_fma_f32 v5, v1, v4, v3
				v_fma_f32 v4, v5, v2, v4
				v_fma_f32 v1, v1, v4, v3
				v_div_fmas_f32 v1, v1, v2, v4
				s_mov_b32 s6, -1
				v_div_fixup_f32 v0, v1, s3, v0
				s_setreg_b32 hwreg(8, 0, 1), s0
				buffer_store_dword v0, off, s[4:7], 0
				s_endpgm
				.Lfunc_end0:
				.size fdiv_f32, .Lfunc_end0-fdiv_f32

				.section .AMDGPU.csdata
				; Kernel info:
				; codeLenInByte = 144
				; NumSgprs: 10
				; NumVgprs: 6
				; FloatMode: 192
				; IeeeMode: 1
				; ScratchSize: 0
				; LDSByteSize: 0 bytes/workgroup (compile time only)
				; SGPRBlocks: 1
				; VGPRBlocks: 1
				; NumSGPRsForWavesPerEU: 10
				; NumVGPRsForWavesPerEU: 6
				; ReservedVGPRFirst: 0
				; ReservedVGPRCount: 0
				; COMPUTE_PGM_RSRC2:USER_SGPR: 2
				; COMPUTE_PGM_RSRC2:TGID_X_EN: 1
				; COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
				; COMPUTE_PGM_RSRC2:TGID_Z_EN: 0
				; COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0

				.section ".note.GNU-stack"

This is an archive of the discontinued LLVM Phabricator instance.

AMDGPU : Add S_SETREG instructions to fix fdiv precision issues.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 77265

lib/Target/AMDGPU/AMDGPUISelLowering.h

lib/Target/AMDGPU/AMDGPUISelLowering.cpp

lib/Target/AMDGPU/AMDGPUInstrInfo.td

lib/Target/AMDGPU/SIISelLowering.cpp

lib/Target/AMDGPU/SOPInstructions.td

lib/Target/AMDGPU/VOP3Instructions.td

test/CodeGen/AMDGPU/dump

This is an archive of the discontinued LLVM Phabricator instance.

AMDGPU : Add S_SETREG instructions to fix fdiv precision issues.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 77265

lib/Target/AMDGPU/AMDGPUISelLowering.h

lib/Target/AMDGPU/AMDGPUISelLowering.cpp

lib/Target/AMDGPU/AMDGPUInstrInfo.td

lib/Target/AMDGPU/SIISelLowering.cpp

lib/Target/AMDGPU/SOPInstructions.td

lib/Target/AMDGPU/VOP3Instructions.td

test/CodeGen/AMDGPU/dump

AMDGPU : Add S_SETREG instructions to fix fdiv precision issues.
ClosedPublic