This is an archive of the discontinued LLVM Phabricator instance.

AMDGPU: Fold fsub [+-0] into fneg when folding source modifiers
ClosedPublic

Authored by arsenm on Jul 18 2023, 3:35 PM.

Download Raw Diff

Details

Reviewers

foad
rampitec
Pierre-vh
b-sumner

Group Reviewers

Restricted Project

Summary

This isn't always folded to fneg for a freestanding fsub depending on
the denormal mode. When matching source modifiers, we're implicitly
canonicalizing the input so we can fold it here.

Doesn't bother handling the VOP3P case since it's only relevant with
DAZ, which nobody really uses with f16.

For f64, tests show an existing bug where DAGCombiner tries to respect
the denormal mode for fsub -0, x, but not after it's lowered to
fadd -0, (fneg x).

Diff Detail

Event Timeline

arsenm created this revision.Jul 18 2023, 3:35 PM

Herald added a project: Restricted Project. · View Herald TranscriptJul 18 2023, 3:35 PM

Herald added subscribers: StephenFan, kerbowa, hiraditya and 5 others. · View Herald Transcript

arsenm requested review of this revision.Jul 18 2023, 3:35 PM

Herald added a project: Restricted Project. · View Herald TranscriptJul 18 2023, 3:35 PM

Herald added a subscriber: wdng. · View Herald Transcript

Harbormaster completed remote builds in B246374: Diff 541760.Jul 18 2023, 3:36 PM

rampitec accepted this revision.Jul 18 2023, 3:38 PM

This revision is now accepted and ready to land.Jul 18 2023, 3:38 PM

This is broken if the source modifier user is a select, need to filter out the non-canonicalizing cases

Filter out non-canonicalizing operations (class and select)

Herald added a subscriber: kosarev. · View Herald TranscriptJul 19 2023, 5:14 AM

arsenm requested review of this revision.Jul 19 2023, 5:14 AM

arsenm edited the summary of this revision. (Show Details)

Herald added a subscriber: steven.zhang. · View Herald TranscriptJul 19 2023, 5:15 AM

Harbormaster completed remote builds in B246515: Diff 541966.Jul 19 2023, 5:15 AM

arsenm added a child revision: D155741: AMDGPU: Implement new 2ulp fdiv lowering.Jul 19 2023, 12:34 PM

rampitec accepted this revision.Jul 19 2023, 12:50 PM

This revision is now accepted and ready to land.Jul 19 2023, 12:50 PM

fb54afd1b7a5287f521759badf0a72c5ab544ca8

arsenm mentioned this in rGfb54afd1b7a5: AMDGPU: Fold fsub [+-0] into fneg when folding source modifiers.Jul 20 2023, 4:30 PM

Revision Contents

Path

Size

llvm/

lib/

Target/

AMDGPU/

AMDGPUGISel.td

4 lines

AMDGPUISelDAGToDAG.h

3 lines

AMDGPUISelDAGToDAG.cpp

33 lines

AMDGPUInstructionSelector.h

9 lines

AMDGPUInstructionSelector.cpp

49 lines

9 lines

4 lines

2 lines

2 lines

test/

CodeGen/

AMDGPU/

fneg-combines.ll

16 lines

fneg-combines.new.ll

6 lines

fsub-as-fneg-src-modifier.ll

87 lines

llvm.exp.ll

4 lines

Diff 541966

llvm/lib/Target/AMDGPU/AMDGPUGISel.td

	Show All 25 Lines
	def gi_vop3mods0 :			def gi_vop3mods0 :
	GIComplexOperandMatcher<s32, "selectVOP3Mods0">,			GIComplexOperandMatcher<s32, "selectVOP3Mods0">,
	GIComplexPatternEquiv<VOP3Mods0>;			GIComplexPatternEquiv<VOP3Mods0>;

	def gi_vop3mods :			def gi_vop3mods :
	GIComplexOperandMatcher<s32, "selectVOP3Mods">,			GIComplexOperandMatcher<s32, "selectVOP3Mods">,
	GIComplexPatternEquiv<VOP3Mods>;			GIComplexPatternEquiv<VOP3Mods>;

				def gi_vop3modsnoncanonicalizing :
				GIComplexOperandMatcher<s32, "selectVOP3ModsNonCanonicalizing">,
				GIComplexPatternEquiv<VOP3ModsNonCanonicalizing>;

	def gi_vop3_no_mods :			def gi_vop3_no_mods :
	GIComplexOperandMatcher<s32, "selectVOP3NoMods">,			GIComplexOperandMatcher<s32, "selectVOP3NoMods">,
	GIComplexPatternEquiv<VOP3NoMods>;			GIComplexPatternEquiv<VOP3NoMods>;

	def gi_vop3omods :			def gi_vop3omods :
	GIComplexOperandMatcher<s32, "selectVOP3OMods">,			GIComplexOperandMatcher<s32, "selectVOP3OMods">,
	GIComplexPatternEquiv<VOP3OMods>;			GIComplexPatternEquiv<VOP3OMods>;

	▲ Show 20 Lines • Show All 336 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h

Show First 20 Lines • Show All 210 Lines • ▼ Show 20 Lines	bool SelectSMRDSgprImm(SDValue Addr, SDValue &SBase, SDValue &SOffset,
SDValue &Offset) const;		SDValue &Offset) const;
bool SelectSMRDBufferImm(SDValue N, SDValue &Offset) const;		bool SelectSMRDBufferImm(SDValue N, SDValue &Offset) const;
bool SelectSMRDBufferImm32(SDValue N, SDValue &Offset) const;		bool SelectSMRDBufferImm32(SDValue N, SDValue &Offset) const;
bool SelectSMRDBufferSgprImm(SDValue N, SDValue &SOffset,		bool SelectSMRDBufferSgprImm(SDValue N, SDValue &SOffset,
SDValue &Offset) const;		SDValue &Offset) const;
bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;		bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;

bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods,		bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods,
		bool IsCanonicalizing = true,
bool AllowAbs = true) const;		bool AllowAbs = true) const;
bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;		bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
		bool SelectVOP3ModsNonCanonicalizing(SDValue In, SDValue &Src,
		SDValue &SrcMods) const;
bool SelectVOP3BMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;		bool SelectVOP3BMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;		bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,		bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
SDValue &Clamp, SDValue &Omod) const;		SDValue &Clamp, SDValue &Omod) const;
bool SelectVOP3BMods0(SDValue In, SDValue &Src, SDValue &SrcMods,		bool SelectVOP3BMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
SDValue &Clamp, SDValue &Omod) const;		SDValue &Clamp, SDValue &Omod) const;
bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,		bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
SDValue &Clamp, SDValue &Omod) const;		SDValue &Clamp, SDValue &Omod) const;
▲ Show 20 Lines • Show All 58 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

Show First 20 Lines • Show All 2,564 Lines • ▼ Show 20 Lines	default:
break;		break;
}		}

SelectCode(N);		SelectCode(N);
}		}

bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,		bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
unsigned &Mods,		unsigned &Mods,
		bool IsCanonicalizing,
bool AllowAbs) const {		bool AllowAbs) const {
Mods = 0;		Mods = 0;
Src = In;		Src = In;

if (Src.getOpcode() == ISD::FNEG) {		if (Src.getOpcode() == ISD::FNEG) {
Mods \|= SISrcMods::NEG;		Mods \|= SISrcMods::NEG;
Src = Src.getOperand(0);		Src = Src.getOperand(0);
		} else if (Src.getOpcode() == ISD::FSUB && IsCanonicalizing) {
		// Fold fsub [+-]0 into fneg. This may not have folded depending on the
		// denormal mode, but we're implicitly canonicalizing in a source operand.
		auto *LHS = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
		if (LHS && LHS->isZero()) {
		Mods \|= SISrcMods::NEG;
		Src = Src.getOperand(1);
		}
}		}

if (AllowAbs && Src.getOpcode() == ISD::FABS) {		if (AllowAbs && Src.getOpcode() == ISD::FABS) {
Mods \|= SISrcMods::ABS;		Mods \|= SISrcMods::ABS;
Src = Src.getOperand(0);		Src = Src.getOperand(0);
}		}

return true;		return true;
}		}

bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,		bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
SDValue &SrcMods) const {		SDValue &SrcMods) const {
unsigned Mods;		unsigned Mods;
if (SelectVOP3ModsImpl(In, Src, Mods)) {		if (SelectVOP3ModsImpl(In, Src, Mods, /IsCanonicalizing=/true,
		/AllowAbs=/true)) {
		SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
		return true;
		}

		return false;
		}

		bool AMDGPUDAGToDAGISel::SelectVOP3ModsNonCanonicalizing(
		SDValue In, SDValue &Src, SDValue &SrcMods) const {
		unsigned Mods;
		if (SelectVOP3ModsImpl(In, Src, Mods, /IsCanonicalizing=/false,
		/AllowAbs=/true)) {
SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);		SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
return true;		return true;
}		}

return false;		return false;
}		}

bool AMDGPUDAGToDAGISel::SelectVOP3BMods(SDValue In, SDValue &Src,		bool AMDGPUDAGToDAGISel::SelectVOP3BMods(SDValue In, SDValue &Src,
SDValue &SrcMods) const {		SDValue &SrcMods) const {
unsigned Mods;		unsigned Mods;
if (SelectVOP3ModsImpl(In, Src, Mods, /* AllowAbs */ false)) {		if (SelectVOP3ModsImpl(In, Src, Mods,
		/IsCanonicalizing=/true,
		/AllowAbs=/false)) {
SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);		SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
return true;		return true;
}		}

return false;		return false;
}		}

bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {		bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
if (In.getOpcode() == ISD::FABS \|\| In.getOpcode() == ISD::FNEG)		if (In.getOpcode() == ISD::FABS \|\| In.getOpcode() == ISD::FNEG)
return false;		return false;

Src = In;		Src = In;
return true;		return true;
}		}

bool AMDGPUDAGToDAGISel::SelectVINTERPModsImpl(SDValue In, SDValue &Src,		bool AMDGPUDAGToDAGISel::SelectVINTERPModsImpl(SDValue In, SDValue &Src,
SDValue &SrcMods,		SDValue &SrcMods,
bool OpSel) const {		bool OpSel) const {
unsigned Mods;		unsigned Mods;
if (SelectVOP3ModsImpl(In, Src, Mods, /* AllowAbs */ false)) {		if (SelectVOP3ModsImpl(In, Src, Mods,
		/IsCanonicalizing=/true,
		/AllowAbs=/false)) {
if (OpSel)		if (OpSel)
Mods \|= SISrcMods::OP_SEL_0;		Mods \|= SISrcMods::OP_SEL_0;
SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);		SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
return true;		return true;
}		}

return false;		return false;
}		}
Show All 39 Lines	bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
return true;		return true;
}		}

bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,		bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
SDValue &SrcMods, bool IsDOT) const {		SDValue &SrcMods, bool IsDOT) const {
unsigned Mods = 0;		unsigned Mods = 0;
Src = In;		Src = In;

		// TODO: Handle G_FSUB 0 as fneg
if (Src.getOpcode() == ISD::FNEG) {		if (Src.getOpcode() == ISD::FNEG) {
Mods ^= (SISrcMods::NEG \| SISrcMods::NEG_HI);		Mods ^= (SISrcMods::NEG \| SISrcMods::NEG_HI);
Src = Src.getOperand(0);		Src = Src.getOperand(0);
}		}

if (Src.getOpcode() == ISD::BUILD_VECTOR && Src.getNumOperands() == 2 &&		if (Src.getOpcode() == ISD::BUILD_VECTOR && Src.getNumOperands() == 2 &&
(!IsDOT \|\| !Subtarget->hasDOTOpSelHazard())) {		(!IsDOT \|\| !Subtarget->hasDOTOpSelHazard())) {
unsigned VecMods = Mods;		unsigned VecMods = Mods;
▲ Show 20 Lines • Show All 307 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h

Show First 20 Lines • Show All 142 Lines • ▼ Show 20 Lines	private:
bool selectG_EXTRACT_VECTOR_ELT(MachineInstr &I) const;		bool selectG_EXTRACT_VECTOR_ELT(MachineInstr &I) const;
bool selectG_INSERT_VECTOR_ELT(MachineInstr &I) const;		bool selectG_INSERT_VECTOR_ELT(MachineInstr &I) const;
bool selectBufferLoadLds(MachineInstr &MI) const;		bool selectBufferLoadLds(MachineInstr &MI) const;
bool selectGlobalLoadLds(MachineInstr &MI) const;		bool selectGlobalLoadLds(MachineInstr &MI) const;
bool selectBVHIntrinsic(MachineInstr &I) const;		bool selectBVHIntrinsic(MachineInstr &I) const;
bool selectSMFMACIntrin(MachineInstr &I) const;		bool selectSMFMACIntrin(MachineInstr &I) const;
bool selectWaveAddress(MachineInstr &I) const;		bool selectWaveAddress(MachineInstr &I) const;

std::pair<Register, unsigned>		std::pair<Register, unsigned> selectVOP3ModsImpl(MachineOperand &Root,
selectVOP3ModsImpl(MachineOperand &Root, bool AllowAbs = true,		bool IsCanonicalizing = true,
		bool AllowAbs = true,
bool OpSel = false) const;		bool OpSel = false) const;

Register copyToVGPRIfSrcFolded(Register Src, unsigned Mods,		Register copyToVGPRIfSrcFolded(Register Src, unsigned Mods,
MachineOperand Root, MachineInstr *InsertPt,		MachineOperand Root, MachineInstr *InsertPt,
bool ForceVGPR = false) const;		bool ForceVGPR = false) const;

InstructionSelector::ComplexRendererFns		InstructionSelector::ComplexRendererFns
selectVCSRC(MachineOperand &Root) const;		selectVCSRC(MachineOperand &Root) const;

InstructionSelector::ComplexRendererFns		InstructionSelector::ComplexRendererFns
selectVSRC0(MachineOperand &Root) const;		selectVSRC0(MachineOperand &Root) const;

InstructionSelector::ComplexRendererFns		InstructionSelector::ComplexRendererFns
selectVOP3Mods0(MachineOperand &Root) const;		selectVOP3Mods0(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns		InstructionSelector::ComplexRendererFns
selectVOP3BMods0(MachineOperand &Root) const;		selectVOP3BMods0(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns		InstructionSelector::ComplexRendererFns
selectVOP3OMods(MachineOperand &Root) const;		selectVOP3OMods(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns		InstructionSelector::ComplexRendererFns
selectVOP3Mods(MachineOperand &Root) const;		selectVOP3Mods(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns		InstructionSelector::ComplexRendererFns
		selectVOP3ModsNonCanonicalizing(MachineOperand &Root) const;
		InstructionSelector::ComplexRendererFns
selectVOP3BMods(MachineOperand &Root) const;		selectVOP3BMods(MachineOperand &Root) const;

ComplexRendererFns selectVOP3NoMods(MachineOperand &Root) const;		ComplexRendererFns selectVOP3NoMods(MachineOperand &Root) const;

std::pair<Register, unsigned>		std::pair<Register, unsigned>
selectVOP3PModsImpl(Register Src, const MachineRegisterInfo &MRI,		selectVOP3PModsImpl(Register Src, const MachineRegisterInfo &MRI,
bool IsDOT = false) const;		bool IsDOT = false) const;

▲ Show 20 Lines • Show All 174 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Show First 20 Lines • Show All 3,480 Lines • ▼ Show 20 Lines
InstructionSelector::ComplexRendererFns		InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const {		AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const {
return {{		return {{
[=](MachineInstrBuilder &MIB) { MIB.add(Root); }		[=](MachineInstrBuilder &MIB) { MIB.add(Root); }
}};		}};

}		}

std::pair<Register, unsigned> AMDGPUInstructionSelector::selectVOP3ModsImpl(		std::pair<Register, unsigned>
MachineOperand &Root, bool AllowAbs, bool OpSel) const {		AMDGPUInstructionSelector::selectVOP3ModsImpl(MachineOperand &Root,
		bool IsCanonicalizing,
		bool AllowAbs, bool OpSel) const {
Register Src = Root.getReg();		Register Src = Root.getReg();
unsigned Mods = 0;		unsigned Mods = 0;
MachineInstr MI = getDefIgnoringCopies(Src, MRI);		MachineInstr MI = getDefIgnoringCopies(Src, MRI);

if (MI->getOpcode() == AMDGPU::G_FNEG) {		if (MI->getOpcode() == AMDGPU::G_FNEG) {
Src = MI->getOperand(1).getReg();		Src = MI->getOperand(1).getReg();
Mods \|= SISrcMods::NEG;		Mods \|= SISrcMods::NEG;
MI = getDefIgnoringCopies(Src, *MRI);		MI = getDefIgnoringCopies(Src, *MRI);
		} else if (MI->getOpcode() == AMDGPU::G_FSUB && IsCanonicalizing) {
		// Fold fsub [+-]0 into fneg. This may not have folded depending on the
		// denormal mode, but we're implicitly canonicalizing in a source operand.
		const ConstantFP *LHS =
		getConstantFPVRegVal(MI->getOperand(1).getReg(), *MRI);
		if (LHS && LHS->isZero()) {
		Mods \|= SISrcMods::NEG;
		Src = MI->getOperand(2).getReg();
		}
}		}

if (AllowAbs && MI->getOpcode() == AMDGPU::G_FABS) {		if (AllowAbs && MI->getOpcode() == AMDGPU::G_FABS) {
Src = MI->getOperand(1).getReg();		Src = MI->getOperand(1).getReg();
Mods \|= SISrcMods::ABS;		Mods \|= SISrcMods::ABS;
}		}

if (OpSel)		if (OpSel)
▲ Show 20 Lines • Show All 46 Lines • ▼ Show 20 Lines	return {{
[=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod		[=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod
}};		}};
}		}

InstructionSelector::ComplexRendererFns		InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectVOP3BMods0(MachineOperand &Root) const {		AMDGPUInstructionSelector::selectVOP3BMods0(MachineOperand &Root) const {
Register Src;		Register Src;
unsigned Mods;		unsigned Mods;
std::tie(Src, Mods) = selectVOP3ModsImpl(Root, /* AllowAbs */ false);		std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
		/IsCanonicalizing=/true,
		/AllowAbs=/false);

return {{		return {{
[=](MachineInstrBuilder &MIB) {		[=](MachineInstrBuilder &MIB) {
MIB.addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));		MIB.addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
},		},
[=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods		[=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods
[=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp		[=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
[=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod		[=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod
Show All 19 Lines	return {{
[=](MachineInstrBuilder &MIB) {		[=](MachineInstrBuilder &MIB) {
MIB.addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));		MIB.addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
},		},
[=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods		[=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
}};		}};
}		}

InstructionSelector::ComplexRendererFns		InstructionSelector::ComplexRendererFns
		AMDGPUInstructionSelector::selectVOP3ModsNonCanonicalizing(
		MachineOperand &Root) const {
		Register Src;
		unsigned Mods;
		std::tie(Src, Mods) = selectVOP3ModsImpl(Root, /IsCanonicalizing=/false);

		return {{
		[=](MachineInstrBuilder &MIB) {
		MIB.addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
		},
		[=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
		}};
		}

		InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectVOP3BMods(MachineOperand &Root) const {		AMDGPUInstructionSelector::selectVOP3BMods(MachineOperand &Root) const {
Register Src;		Register Src;
unsigned Mods;		unsigned Mods;
std::tie(Src, Mods) = selectVOP3ModsImpl(Root, /* AllowAbs */ false);		std::tie(Src, Mods) = selectVOP3ModsImpl(Root, /IsCanonicalizing=/true,
		/AllowAbs=/false);

return {{		return {{
[=](MachineInstrBuilder &MIB) {		[=](MachineInstrBuilder &MIB) {
MIB.addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));		MIB.addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
},		},
[=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods		[=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
}};		}};
}		}
Show All 19 Lines	if (MI && MI->getOpcode() == AMDGPU::G_FNEG &&
// It's possible to see an f32 fneg here, but unlikely.		// It's possible to see an f32 fneg here, but unlikely.
// TODO: Treat f32 fneg as only high bit.		// TODO: Treat f32 fneg as only high bit.
MRI.getType(Src) == LLT::fixed_vector(2, 16)) {		MRI.getType(Src) == LLT::fixed_vector(2, 16)) {
Mods ^= (SISrcMods::NEG \| SISrcMods::NEG_HI);		Mods ^= (SISrcMods::NEG \| SISrcMods::NEG_HI);
Src = MI->getOperand(1).getReg();		Src = MI->getOperand(1).getReg();
MI = MRI.getVRegDef(Src);		MI = MRI.getVRegDef(Src);
}		}

		// TODO: Handle G_FSUB 0 as fneg

// TODO: Match op_sel through g_build_vector_trunc and g_shuffle_vector.		// TODO: Match op_sel through g_build_vector_trunc and g_shuffle_vector.
(void)IsDOT; // DOTs do not use OPSEL on gfx940+, check ST.hasDOTOpSelHazard()		(void)IsDOT; // DOTs do not use OPSEL on gfx940+, check ST.hasDOTOpSelHazard()

// Packed instructions do not have abs modifiers.		// Packed instructions do not have abs modifiers.
Mods \|= SISrcMods::OP_SEL_1;		Mods \|= SISrcMods::OP_SEL_1;

return std::pair(Src, Mods);		return std::pair(Src, Mods);
}		}
▲ Show 20 Lines • Show All 70 Lines • ▼ Show 20 Lines	AMDGPUInstructionSelector::selectVOP3OpSelMods(MachineOperand &Root) const {
}};		}};
}		}

InstructionSelector::ComplexRendererFns		InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectVINTERPMods(MachineOperand &Root) const {		AMDGPUInstructionSelector::selectVINTERPMods(MachineOperand &Root) const {
Register Src;		Register Src;
unsigned Mods;		unsigned Mods;
std::tie(Src, Mods) = selectVOP3ModsImpl(Root,		std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
/* AllowAbs */ false,		/IsCanonicalizing=/true,
/* OpSel */ false);		/AllowAbs=/false,
		/OpSel=/false);

return {{		return {{
[=](MachineInstrBuilder &MIB) {		[=](MachineInstrBuilder &MIB) {
MIB.addReg(		MIB.addReg(
copyToVGPRIfSrcFolded(Src, Mods, Root, MIB, /* ForceVGPR */ true));		copyToVGPRIfSrcFolded(Src, Mods, Root, MIB, /* ForceVGPR */ true));
},		},
[=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods		[=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods
}};		}};
}		}

InstructionSelector::ComplexRendererFns		InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectVINTERPModsHi(MachineOperand &Root) const {		AMDGPUInstructionSelector::selectVINTERPModsHi(MachineOperand &Root) const {
Register Src;		Register Src;
unsigned Mods;		unsigned Mods;
std::tie(Src, Mods) = selectVOP3ModsImpl(Root,		std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
/* AllowAbs */ false,		/IsCanonicalizing=/true,
/* OpSel */ true);		/AllowAbs=/false,
		/OpSel=/true);

return {{		return {{
[=](MachineInstrBuilder &MIB) {		[=](MachineInstrBuilder &MIB) {
MIB.addReg(		MIB.addReg(
copyToVGPRIfSrcFolded(Src, Mods, Root, MIB, /* ForceVGPR */ true));		copyToVGPRIfSrcFolded(Src, Mods, Root, MIB, /* ForceVGPR */ true));
},		},
[=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods		[=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods
}};		}};
▲ Show 20 Lines • Show All 1,296 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/SIInstrInfo.td

	Show First 20 Lines • Show All 1,334 Lines • ▼ Show 20 Lines

	def DS1Addr1Offset : ComplexPattern<iPTR, 2, "SelectDS1Addr1Offset">;			def DS1Addr1Offset : ComplexPattern<iPTR, 2, "SelectDS1Addr1Offset">;
	def DS64Bit4ByteAligned : ComplexPattern<iPTR, 3, "SelectDS64Bit4ByteAligned">;			def DS64Bit4ByteAligned : ComplexPattern<iPTR, 3, "SelectDS64Bit4ByteAligned">;
	def DS128Bit8ByteAligned : ComplexPattern<iPTR, 3, "SelectDS128Bit8ByteAligned">;			def DS128Bit8ByteAligned : ComplexPattern<iPTR, 3, "SelectDS128Bit8ByteAligned">;

	def MOVRELOffset : ComplexPattern<iPTR, 2, "SelectMOVRELOffset">;			def MOVRELOffset : ComplexPattern<iPTR, 2, "SelectMOVRELOffset">;

	def VOP3Mods0 : ComplexPattern<untyped, 4, "SelectVOP3Mods0">;			def VOP3Mods0 : ComplexPattern<untyped, 4, "SelectVOP3Mods0">;

				// Modifiers for floating point instructions.
	def VOP3Mods : ComplexPattern<untyped, 2, "SelectVOP3Mods">;			def VOP3Mods : ComplexPattern<untyped, 2, "SelectVOP3Mods">;

				// VOP3 modifiers used for instructions that do not read canonicalized
				// floating point values (i.e. integer operations with FP source
				// modifiers)
				def VOP3ModsNonCanonicalizing : ComplexPattern<untyped, 2,
				"SelectVOP3ModsNonCanonicalizing">;

	def VOP3NoMods : ComplexPattern<untyped, 1, "SelectVOP3NoMods">;			def VOP3NoMods : ComplexPattern<untyped, 1, "SelectVOP3NoMods">;

	def VOP3OMods : ComplexPattern<untyped, 3, "SelectVOP3OMods">;			def VOP3OMods : ComplexPattern<untyped, 3, "SelectVOP3OMods">;

	def VOP3PMods : ComplexPattern<untyped, 2, "SelectVOP3PMods">;			def VOP3PMods : ComplexPattern<untyped, 2, "SelectVOP3PMods">;

	def VOP3PModsDOT : ComplexPattern<untyped, 2, "SelectVOP3PModsDOT">;			def VOP3PModsDOT : ComplexPattern<untyped, 2, "SelectVOP3PModsDOT">;
	def DotIUVOP3PMods : ComplexPattern<untyped, 1, "SelectDotIUVOP3PMods">;			def DotIUVOP3PMods : ComplexPattern<untyped, 1, "SelectDotIUVOP3PMods">;
	▲ Show 20 Lines • Show All 1,528 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/SIInstructions.td

Show First 20 Lines • Show All 1,122 Lines • ▼ Show 20 Lines	def : GCNPat <
(f32 (fadd (AMDGPUfmul_legacy (VOP3Mods f32:$src0, i32:$src0_mod),		(f32 (fadd (AMDGPUfmul_legacy (VOP3Mods f32:$src0, i32:$src0_mod),
(VOP3Mods f32:$src1, i32:$src1_mod)),		(VOP3Mods f32:$src1, i32:$src1_mod)),
(VOP3Mods f32:$src2, i32:$src2_mod))),		(VOP3Mods f32:$src2, i32:$src2_mod))),
(V_MAD_LEGACY_F32_e64 $src0_mod, $src0, $src1_mod, $src1,		(V_MAD_LEGACY_F32_e64 $src0_mod, $src0, $src1_mod, $src1,
$src2_mod, $src2, DSTCLAMP.NONE, DSTOMOD.NONE)		$src2_mod, $src2, DSTCLAMP.NONE, DSTOMOD.NONE)
>;		>;

class VOPSelectModsPat <ValueType vt> : GCNPat <		class VOPSelectModsPat <ValueType vt> : GCNPat <
(vt (select i1:$src0, (VOP3Mods vt:$src1, i32:$src1_mods),		(vt (select i1:$src0, (VOP3ModsNonCanonicalizing vt:$src1, i32:$src1_mods),
(VOP3Mods vt:$src2, i32:$src2_mods))),		(VOP3ModsNonCanonicalizing vt:$src2, i32:$src2_mods))),
(V_CNDMASK_B32_e64 FP32InputMods:$src2_mods, VSrc_b32:$src2,		(V_CNDMASK_B32_e64 FP32InputMods:$src2_mods, VSrc_b32:$src2,
FP32InputMods:$src1_mods, VSrc_b32:$src1, SSrc_i1:$src0)		FP32InputMods:$src1_mods, VSrc_b32:$src1, SSrc_i1:$src0)
>;		>;

class VOPSelectPat <ValueType vt> : GCNPat <		class VOPSelectPat <ValueType vt> : GCNPat <
(vt (select i1:$src0, vt:$src1, vt:$src2)),		(vt (select i1:$src0, vt:$src1, vt:$src2)),
(V_CNDMASK_B32_e64 0, VSrc_b32:$src2, 0, VSrc_b32:$src1, SSrc_i1:$src0)		(V_CNDMASK_B32_e64 0, VSrc_b32:$src2, 0, VSrc_b32:$src1, SSrc_i1:$src0)
>;		>;
▲ Show 20 Lines • Show All 2,464 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/VOPCInstructions.td

Show First 20 Lines • Show All 825 Lines • ▼ Show 20 Lines	def _t16 : VOPC_Class_NoSdst_Profile<sched, f16, i16> {
let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret;		let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret;
}		}
}		}

class getVOPCClassPat64 <VOPProfile P> {		class getVOPCClassPat64 <VOPProfile P> {
list<dag> ret =		list<dag> ret =
[(set i1:$sdst,		[(set i1:$sdst,
(AMDGPUfp_class		(AMDGPUfp_class
(P.Src0VT (VOP3Mods P.Src0VT:$src0, i32:$src0_modifiers)),		(P.Src0VT (VOP3ModsNonCanonicalizing P.Src0VT:$src0, i32:$src0_modifiers)),
i32:$src1))];		i32:$src1))];
}		}


// Special case for class instructions which only have modifiers on		// Special case for class instructions which only have modifiers on
// the 1st source operand.		// the 1st source operand.
multiclass VOPC_Class_Pseudos <string opName, VOPC_Profile p, bit DefExec,		multiclass VOPC_Class_Pseudos <string opName, VOPC_Profile p, bit DefExec,
bit DefVcc = 1> {		bit DefVcc = 1> {
▲ Show 20 Lines • Show All 1,581 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/VOPInstructions.td

	Show First 20 Lines • Show All 1,481 Lines • ▼ Show 20 Lines
	include "VOPCInstructions.td"			include "VOPCInstructions.td"
	include "VOP1Instructions.td"			include "VOP1Instructions.td"
	include "VOP2Instructions.td"			include "VOP2Instructions.td"
	include "VOP3Instructions.td"			include "VOP3Instructions.td"
	include "VOP3PInstructions.td"			include "VOP3PInstructions.td"
	include "VOPDInstructions.td"			include "VOPDInstructions.td"

	class ClassPat<Instruction inst, ValueType vt> : GCNPat <			class ClassPat<Instruction inst, ValueType vt> : GCNPat <
	(is_fpclass (vt (VOP3Mods vt:$src0, i32:$src0_mods)), (i32 timm:$mask)),			(is_fpclass (vt (VOP3ModsNonCanonicalizing vt:$src0, i32:$src0_mods)), (i32 timm:$mask)),
	(inst i32:$src0_mods, vt:$src0, (V_MOV_B32_e32 timm:$mask))			(inst i32:$src0_mods, vt:$src0, (V_MOV_B32_e32 timm:$mask))
	>;			>;

	def : ClassPat<V_CMP_CLASS_F16_e64, f16> {			def : ClassPat<V_CMP_CLASS_F16_e64, f16> {
	let OtherPredicates = [NotHasTrue16BitInsts, Has16BitInsts];			let OtherPredicates = [NotHasTrue16BitInsts, Has16BitInsts];
	}			}

	def : ClassPat<V_CMP_CLASS_F16_t16_e64, f16> {			def : ClassPat<V_CMP_CLASS_F16_t16_e64, f16> {
	Show All 39 Lines

llvm/test/CodeGen/AMDGPU/fneg-combines.ll

Show First 20 Lines • Show All 2,602 Lines • ▼ Show 20 Lines	bb:
%i3 = call fast <2 x float> @llvm.fma.v2f32(<2 x float> %arg1, <2 x float> %arg2, <2 x float> zeroinitializer)		%i3 = call fast <2 x float> @llvm.fma.v2f32(<2 x float> %arg1, <2 x float> %arg2, <2 x float> zeroinitializer)
%i4 = fadd fast <2 x float> %i3, %arg		%i4 = fadd fast <2 x float> %i3, %arg
%i5 = fneg <2 x float> %i4		%i5 = fneg <2 x float> %i4
%i6 = fmul fast <2 x float> %i5, %arg2		%i6 = fmul fast <2 x float> %i5, %arg2
ret <2 x float> %i6		ret <2 x float> %i6
}		}

; This expects denormal flushing, so can't turn this fmul into fneg		; This expects denormal flushing, so can't turn this fmul into fneg
; TODO: Keeping this as fmul saves encoding size
; GCN-LABEL: {{^}}nnan_fmul_neg1_to_fneg:		; GCN-LABEL: {{^}}nnan_fmul_neg1_to_fneg:
; GCN: v_sub_f32_e32 [[TMP:v[0-9]+]], 0x80000000, v0		; GCN: s_waitcnt
; GCN-NEXT: v_mul_f32_e32 v0, [[TMP]], v1		; GCN-NEXT: v_mul_f32_e64 v0, -v0, v1
define float @nnan_fmul_neg1_to_fneg(float %x, float %y) #0 {		define float @nnan_fmul_neg1_to_fneg(float %x, float %y) #0 {
%mul = fmul float %x, -1.0		%mul = fmul float %x, -1.0
%add = fmul nnan float %mul, %y		%add = fmul nnan float %mul, %y
ret float %add		ret float %add
}		}

; It's legal to turn this fmul into an fneg since denormals are		; It's legal to turn this fmul into an fneg since denormals are
; preserved and we know an snan can't happen from the flag.		; preserved and we know an snan can't happen from the flag.
; GCN-LABEL: {{^}}denormal_fmul_neg1_to_fneg:		; GCN-LABEL: {{^}}denormal_fmul_neg1_to_fneg:
; GCN: v_mul_f32_e64 v0, -v0, v1		; GCN: v_mul_f32_e64 v0, -v0, v1
; GCN-NEXT: s_setpc_b64		; GCN-NEXT: s_setpc_b64
define float @denormal_fmul_neg1_to_fneg(float %x, float %y) {		define float @denormal_fmul_neg1_to_fneg(float %x, float %y) {
%mul = fmul nnan float %x, -1.0		%mul = fmul nnan float %x, -1.0
%add = fmul float %mul, %y		%add = fmul float %mul, %y
ret float %add		ret float %add
}		}

; know the source can't be an snan		; know the source can't be an snan
; GCN-LABEL: {{^}}denorm_snan_fmul_neg1_to_fneg:		; GCN-LABEL: {{^}}denorm_snan_fmul_neg1_to_fneg:
; GCN: v_mul_f32_e64 [[TMP:v[0-9]+]], v0, -v0		; GCN: s_waitcnt
; GCN: v_mul_f32_e32 v0, [[TMP]], v1		; GCN-NEXT: v_mul_f32_e64 [[TMP:v[0-9]+]], v0, -v0
		; GCN-NEXT: v_mul_f32_e32 v0, [[TMP]], v1
; GCN-NEXT: s_setpc_b64		; GCN-NEXT: s_setpc_b64
define float @denorm_snan_fmul_neg1_to_fneg(float %x, float %y) {		define float @denorm_snan_fmul_neg1_to_fneg(float %x, float %y) {
%canonical = fmul float %x, %x		%canonical = fmul float %x, %x
%mul = fmul float %canonical, -1.0		%mul = fmul float %canonical, -1.0
%add = fmul float %mul, %y		%add = fmul float %mul, %y
ret float %add		ret float %add
}		}

; GCN-LABEL: {{^}}flush_snan_fmul_neg1_to_fneg:		; GCN-LABEL: {{^}}flush_snan_fmul_neg1_to_fneg:
; GCN: v_mul_f32_e32 [[TMP0:v[0-9]+]], 1.0, v0		; GCN: s_waitcnt
; GCN: v_sub_f32_e32 [[TMP1:v[0-9]+]], 0x80000000, [[TMP0]]		; GCN-NEXT: v_mul_f32_e32 [[TMP:v[0-9]+]], 1.0, v0
; GCN-NEXT: v_mul_f32_e32 v0, [[TMP1]], v1		; GCN-NEXT: v_mul_f32_e64 v0, -[[TMP]], v1
define float @flush_snan_fmul_neg1_to_fneg(float %x, float %y) #0 {		define float @flush_snan_fmul_neg1_to_fneg(float %x, float %y) #0 {
%quiet = call float @llvm.canonicalize.f32(float %x)		%quiet = call float @llvm.canonicalize.f32(float %x)
%mul = fmul float %quiet, -1.0		%mul = fmul float %quiet, -1.0
%add = fmul float %mul, %y		%add = fmul float %mul, %y
ret float %add		ret float %add
}		}

; GCN-LABEL: {{^}}fadd_select_fneg_fneg_f32:		; GCN-LABEL: {{^}}fadd_select_fneg_fneg_f32:
▲ Show 20 Lines • Show All 186 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll

Show First 20 Lines • Show All 2,635 Lines • ▼ Show 20 Lines
}		}

; This expects denormal flushing, so can't turn this fmul into fneg		; This expects denormal flushing, so can't turn this fmul into fneg
; TODO: Keeping this as fmul saves encoding size		; TODO: Keeping this as fmul saves encoding size
define float @nnan_fmul_neg1_to_fneg(float %x, float %y) #0 {		define float @nnan_fmul_neg1_to_fneg(float %x, float %y) #0 {
; GCN-LABEL: nnan_fmul_neg1_to_fneg:		; GCN-LABEL: nnan_fmul_neg1_to_fneg:
; GCN: ; %bb.0:		; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_sub_f32_e32 v0, 0x80000000, v0		; GCN-NEXT: v_mul_f32_e64 v0, -v0, v1
; GCN-NEXT: v_mul_f32_e32 v0, v0, v1
; GCN-NEXT: s_setpc_b64 s[30:31]		; GCN-NEXT: s_setpc_b64 s[30:31]
%mul = fmul float %x, -1.0		%mul = fmul float %x, -1.0
%add = fmul nnan float %mul, %y		%add = fmul nnan float %mul, %y
ret float %add		ret float %add
}		}

; It's legal to turn this fmul into an fneg since denormals are		; It's legal to turn this fmul into an fneg since denormals are
; preserved and we know an snan can't happen from the flag.		; preserved and we know an snan can't happen from the flag.
Show All 22 Lines	; GCN-NEXT: s_setpc_b64 s[30:31]
ret float %add		ret float %add
}		}

define float @flush_snan_fmul_neg1_to_fneg(float %x, float %y) #0 {		define float @flush_snan_fmul_neg1_to_fneg(float %x, float %y) #0 {
; GCN-LABEL: flush_snan_fmul_neg1_to_fneg:		; GCN-LABEL: flush_snan_fmul_neg1_to_fneg:
; GCN: ; %bb.0:		; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0		; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GCN-NEXT: v_sub_f32_e32 v0, 0x80000000, v0		; GCN-NEXT: v_mul_f32_e64 v0, -v0, v1
; GCN-NEXT: v_mul_f32_e32 v0, v0, v1
; GCN-NEXT: s_setpc_b64 s[30:31]		; GCN-NEXT: s_setpc_b64 s[30:31]
%quiet = call float @llvm.canonicalize.f32(float %x)		%quiet = call float @llvm.canonicalize.f32(float %x)
%mul = fmul float %quiet, -1.0		%mul = fmul float %quiet, -1.0
%add = fmul float %mul, %y		%add = fmul float %mul, %y
ret float %add		ret float %add
}		}

define float @fadd_select_fneg_fneg_f32(i32 %arg0, float %x, float %y, float %z) {		define float @fadd_select_fneg_fneg_f32(i32 %arg0, float %x, float %y, float %z) {
▲ Show 20 Lines • Show All 756 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/fsub-as-fneg-src-modifier.ll

Show First 20 Lines • Show All 43 Lines • ▼ Show 20 Lines
; GISEL-NEXT: v_mul_f32_e32 v0, v0, v1		; GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
; GISEL-NEXT: s_setpc_b64 s[30:31]		; GISEL-NEXT: s_setpc_b64 s[30:31]
%sub = fsub float %v0, -0.0		%sub = fsub float %v0, -0.0
%mul = fmul float %sub, %v1		%mul = fmul float %sub, %v1
ret float %mul		ret float %mul
}		}

define float @fold_f32_fsub_into_fneg_modifier_ieee_pos0(float %v0, float %v1) #0 {		define float @fold_f32_fsub_into_fneg_modifier_ieee_pos0(float %v0, float %v1) #0 {
; CHECK-LABEL: fold_f32_fsub_into_fneg_modifier_ieee_pos0:		; SDAG-LABEL: fold_f32_fsub_into_fneg_modifier_ieee_pos0:
; CHECK: ; %bb.0:		; SDAG: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_sub_f32_e32 v0, 0, v0		; SDAG-NEXT: v_mul_f32_e64 v0, -v0, v1
; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1		; SDAG-NEXT: s_setpc_b64 s[30:31]
; CHECK-NEXT: s_setpc_b64 s[30:31]		;
		; GISEL-LABEL: fold_f32_fsub_into_fneg_modifier_ieee_pos0:
		; GISEL: ; %bb.0:
		; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
		; GISEL-NEXT: v_sub_f32_e32 v0, 0, v0
		; GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
		; GISEL-NEXT: s_setpc_b64 s[30:31]
%sub = fsub float 0.0, %v0		%sub = fsub float 0.0, %v0
%mul = fmul float %sub, %v1		%mul = fmul float %sub, %v1
ret float %mul		ret float %mul
}		}

define float @fold_f32_fsub_into_fneg_modifier_daz_pos0(float %v0, float %v1) #1 {		define float @fold_f32_fsub_into_fneg_modifier_daz_pos0(float %v0, float %v1) #1 {
; CHECK-LABEL: fold_f32_fsub_into_fneg_modifier_daz_pos0:		; SDAG-LABEL: fold_f32_fsub_into_fneg_modifier_daz_pos0:
; CHECK: ; %bb.0:		; SDAG: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_sub_f32_e32 v0, 0, v0		; SDAG-NEXT: v_mul_f32_e64 v0, -v0, v1
; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1		; SDAG-NEXT: s_setpc_b64 s[30:31]
; CHECK-NEXT: s_setpc_b64 s[30:31]		;
		; GISEL-LABEL: fold_f32_fsub_into_fneg_modifier_daz_pos0:
		; GISEL: ; %bb.0:
		; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
		; GISEL-NEXT: v_sub_f32_e32 v0, 0, v0
		; GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
		; GISEL-NEXT: s_setpc_b64 s[30:31]
%sub = fsub float 0.0, %v0		%sub = fsub float 0.0, %v0
%mul = fmul float %sub, %v1		%mul = fmul float %sub, %v1
ret float %mul		ret float %mul
}		}

define float @no_fold_f32_fsub_into_fneg_modifier_daz_commuted(float %v0, float %v1) #1 {		define float @no_fold_f32_fsub_into_fneg_modifier_daz_commuted(float %v0, float %v1) #1 {
; SDAG-LABEL: no_fold_f32_fsub_into_fneg_modifier_daz_commuted:		; SDAG-LABEL: no_fold_f32_fsub_into_fneg_modifier_daz_commuted:
; SDAG: ; %bb.0:		; SDAG: ; %bb.0:
Show All 30 Lines	; GISEL-NEXT: s_setpc_b64 s[30:31]
%mul = fmul float %sub, %v1		%mul = fmul float %sub, %v1
ret float %mul		ret float %mul
}		}

define float @fold_f32_fsub_into_fneg_modifier_daz(float %v0, float %v1) #1 {		define float @fold_f32_fsub_into_fneg_modifier_daz(float %v0, float %v1) #1 {
; SDAG-LABEL: fold_f32_fsub_into_fneg_modifier_daz:		; SDAG-LABEL: fold_f32_fsub_into_fneg_modifier_daz:
; SDAG: ; %bb.0:		; SDAG: ; %bb.0:
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT: v_sub_f32_e32 v0, 0x80000000, v0		; SDAG-NEXT: v_mul_f32_e64 v0, -v0, v1
; SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
; SDAG-NEXT: s_setpc_b64 s[30:31]		; SDAG-NEXT: s_setpc_b64 s[30:31]
;		;
; GISEL-LABEL: fold_f32_fsub_into_fneg_modifier_daz:		; GISEL-LABEL: fold_f32_fsub_into_fneg_modifier_daz:
; GISEL: ; %bb.0:		; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0		; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0
; GISEL-NEXT: v_mul_f32_e32 v0, v0, v1		; GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
; GISEL-NEXT: s_setpc_b64 s[30:31]		; GISEL-NEXT: s_setpc_b64 s[30:31]
Show All 19 Lines	; GISEL-NEXT: s_setpc_b64 s[30:31]
%mul = fmul nsz float %sub, %v1		%mul = fmul nsz float %sub, %v1
ret float %mul		ret float %mul
}		}

define float @fold_f32_fsub_into_fneg_modifier_daz_nsz(float %v0, float %v1) #1 {		define float @fold_f32_fsub_into_fneg_modifier_daz_nsz(float %v0, float %v1) #1 {
; SDAG-LABEL: fold_f32_fsub_into_fneg_modifier_daz_nsz:		; SDAG-LABEL: fold_f32_fsub_into_fneg_modifier_daz_nsz:
; SDAG: ; %bb.0:		; SDAG: ; %bb.0:
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT: v_sub_f32_e32 v0, 0x80000000, v0		; SDAG-NEXT: v_mul_f32_e64 v0, -v0, v1
; SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
; SDAG-NEXT: s_setpc_b64 s[30:31]		; SDAG-NEXT: s_setpc_b64 s[30:31]
;		;
; GISEL-LABEL: fold_f32_fsub_into_fneg_modifier_daz_nsz:		; GISEL-LABEL: fold_f32_fsub_into_fneg_modifier_daz_nsz:
; GISEL: ; %bb.0:		; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0		; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0
; GISEL-NEXT: v_mul_f32_e32 v0, v0, v1		; GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
; GISEL-NEXT: s_setpc_b64 s[30:31]		; GISEL-NEXT: s_setpc_b64 s[30:31]
%sub = fsub nsz float -0.0, %v0		%sub = fsub nsz float -0.0, %v0
%mul = fmul nsz float %sub, %v1		%mul = fmul nsz float %sub, %v1
ret float %mul		ret float %mul
}		}

define float @fold_f32_fsub_into_fneg_modifier_dynamic(float %v0, float %v1) #2 {		define float @fold_f32_fsub_into_fneg_modifier_dynamic(float %v0, float %v1) #2 {
; SDAG-LABEL: fold_f32_fsub_into_fneg_modifier_dynamic:		; SDAG-LABEL: fold_f32_fsub_into_fneg_modifier_dynamic:
; SDAG: ; %bb.0:		; SDAG: ; %bb.0:
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT: v_sub_f32_e32 v0, 0x80000000, v0		; SDAG-NEXT: v_mul_f32_e64 v0, -v0, v1
; SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
; SDAG-NEXT: s_setpc_b64 s[30:31]		; SDAG-NEXT: s_setpc_b64 s[30:31]
;		;
; GISEL-LABEL: fold_f32_fsub_into_fneg_modifier_dynamic:		; GISEL-LABEL: fold_f32_fsub_into_fneg_modifier_dynamic:
; GISEL: ; %bb.0:		; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0		; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0
; GISEL-NEXT: v_mul_f32_e32 v0, v0, v1		; GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
; GISEL-NEXT: s_setpc_b64 s[30:31]		; GISEL-NEXT: s_setpc_b64 s[30:31]
%sub = fsub float -0.0, %v0		%sub = fsub float -0.0, %v0
%mul = fmul float %sub, %v1		%mul = fmul float %sub, %v1
ret float %mul		ret float %mul
}		}

define float @fold_f32_fsub_into_fneg_modifier_dynamic_nsz(float %v0, float %v1) #2 {		define float @fold_f32_fsub_into_fneg_modifier_dynamic_nsz(float %v0, float %v1) #2 {
; SDAG-LABEL: fold_f32_fsub_into_fneg_modifier_dynamic_nsz:		; SDAG-LABEL: fold_f32_fsub_into_fneg_modifier_dynamic_nsz:
; SDAG: ; %bb.0:		; SDAG: ; %bb.0:
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT: v_sub_f32_e32 v0, 0x80000000, v0		; SDAG-NEXT: v_mul_f32_e64 v0, -v0, v1
; SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
; SDAG-NEXT: s_setpc_b64 s[30:31]		; SDAG-NEXT: s_setpc_b64 s[30:31]
;		;
; GISEL-LABEL: fold_f32_fsub_into_fneg_modifier_dynamic_nsz:		; GISEL-LABEL: fold_f32_fsub_into_fneg_modifier_dynamic_nsz:
; GISEL: ; %bb.0:		; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0		; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0
; GISEL-NEXT: v_mul_f32_e32 v0, v0, v1		; GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
; GISEL-NEXT: s_setpc_b64 s[30:31]		; GISEL-NEXT: s_setpc_b64 s[30:31]
Show All 22 Lines	; GISEL-NEXT: s_setpc_b64 s[30:31]
%mul = fmul <2 x float> %sub, %v1		%mul = fmul <2 x float> %sub, %v1
ret <2 x float> %mul		ret <2 x float> %mul
}		}

define <2 x float> @fold_v2f32_fsub_into_fneg_modifier_daz(<2 x float> %v0, <2 x float> %v1) #1 {		define <2 x float> @fold_v2f32_fsub_into_fneg_modifier_daz(<2 x float> %v0, <2 x float> %v1) #1 {
; SDAG-LABEL: fold_v2f32_fsub_into_fneg_modifier_daz:		; SDAG-LABEL: fold_v2f32_fsub_into_fneg_modifier_daz:
; SDAG: ; %bb.0:		; SDAG: ; %bb.0:
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT: v_sub_f32_e32 v1, 0x80000000, v1		; SDAG-NEXT: v_mul_f32_e64 v0, -v0, v2
; SDAG-NEXT: v_sub_f32_e32 v0, 0x80000000, v0		; SDAG-NEXT: v_mul_f32_e64 v1, -v1, v3
; SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
; SDAG-NEXT: v_mul_f32_e32 v1, v1, v3
; SDAG-NEXT: s_setpc_b64 s[30:31]		; SDAG-NEXT: s_setpc_b64 s[30:31]
;		;
; GISEL-LABEL: fold_v2f32_fsub_into_fneg_modifier_daz:		; GISEL-LABEL: fold_v2f32_fsub_into_fneg_modifier_daz:
; GISEL: ; %bb.0:		; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0		; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0
; GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1		; GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1
; GISEL-NEXT: v_mul_f32_e32 v0, v0, v2		; GISEL-NEXT: v_mul_f32_e32 v0, v0, v2
Show All 24 Lines	; GISEL-NEXT: s_setpc_b64 s[30:31]
%mul = fmul nsz <2 x float> %sub, %v1		%mul = fmul nsz <2 x float> %sub, %v1
ret <2 x float> %mul		ret <2 x float> %mul
}		}

define <2 x float> @fold_v2f32_fsub_into_fneg_modifier_daz_nsz(<2 x float> %v0, <2 x float> %v1) #1 {		define <2 x float> @fold_v2f32_fsub_into_fneg_modifier_daz_nsz(<2 x float> %v0, <2 x float> %v1) #1 {
; SDAG-LABEL: fold_v2f32_fsub_into_fneg_modifier_daz_nsz:		; SDAG-LABEL: fold_v2f32_fsub_into_fneg_modifier_daz_nsz:
; SDAG: ; %bb.0:		; SDAG: ; %bb.0:
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT: v_sub_f32_e32 v1, 0x80000000, v1		; SDAG-NEXT: v_mul_f32_e64 v0, -v0, v2
; SDAG-NEXT: v_sub_f32_e32 v0, 0x80000000, v0		; SDAG-NEXT: v_mul_f32_e64 v1, -v1, v3
; SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
; SDAG-NEXT: v_mul_f32_e32 v1, v1, v3
; SDAG-NEXT: s_setpc_b64 s[30:31]		; SDAG-NEXT: s_setpc_b64 s[30:31]
;		;
; GISEL-LABEL: fold_v2f32_fsub_into_fneg_modifier_daz_nsz:		; GISEL-LABEL: fold_v2f32_fsub_into_fneg_modifier_daz_nsz:
; GISEL: ; %bb.0:		; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0		; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0
; GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1		; GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1
; GISEL-NEXT: v_mul_f32_e32 v0, v0, v2		; GISEL-NEXT: v_mul_f32_e32 v0, v0, v2
; GISEL-NEXT: v_mul_f32_e32 v1, v1, v3		; GISEL-NEXT: v_mul_f32_e32 v1, v1, v3
; GISEL-NEXT: s_setpc_b64 s[30:31]		; GISEL-NEXT: s_setpc_b64 s[30:31]
%sub = fsub nsz <2 x float> <float -0.0, float -0.0>, %v0		%sub = fsub nsz <2 x float> <float -0.0, float -0.0>, %v0
%mul = fmul nsz <2 x float> %sub, %v1		%mul = fmul nsz <2 x float> %sub, %v1
ret <2 x float> %mul		ret <2 x float> %mul
}		}

define <2 x float> @fold_v2f32_fsub_into_fneg_modifier_dynamic(<2 x float> %v0, <2 x float> %v1) #2 {		define <2 x float> @fold_v2f32_fsub_into_fneg_modifier_dynamic(<2 x float> %v0, <2 x float> %v1) #2 {
; SDAG-LABEL: fold_v2f32_fsub_into_fneg_modifier_dynamic:		; SDAG-LABEL: fold_v2f32_fsub_into_fneg_modifier_dynamic:
; SDAG: ; %bb.0:		; SDAG: ; %bb.0:
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT: v_sub_f32_e32 v1, 0x80000000, v1		; SDAG-NEXT: v_mul_f32_e64 v0, -v0, v2
; SDAG-NEXT: v_sub_f32_e32 v0, 0x80000000, v0		; SDAG-NEXT: v_mul_f32_e64 v1, -v1, v3
; SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
; SDAG-NEXT: v_mul_f32_e32 v1, v1, v3
; SDAG-NEXT: s_setpc_b64 s[30:31]		; SDAG-NEXT: s_setpc_b64 s[30:31]
;		;
; GISEL-LABEL: fold_v2f32_fsub_into_fneg_modifier_dynamic:		; GISEL-LABEL: fold_v2f32_fsub_into_fneg_modifier_dynamic:
; GISEL: ; %bb.0:		; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0		; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0
; GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1		; GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1
; GISEL-NEXT: v_mul_f32_e32 v0, v0, v2		; GISEL-NEXT: v_mul_f32_e32 v0, v0, v2
; GISEL-NEXT: v_mul_f32_e32 v1, v1, v3		; GISEL-NEXT: v_mul_f32_e32 v1, v1, v3
; GISEL-NEXT: s_setpc_b64 s[30:31]		; GISEL-NEXT: s_setpc_b64 s[30:31]
%sub = fsub <2 x float> <float -0.0, float -0.0>, %v0		%sub = fsub <2 x float> <float -0.0, float -0.0>, %v0
%mul = fmul <2 x float> %sub, %v1		%mul = fmul <2 x float> %sub, %v1
ret <2 x float> %mul		ret <2 x float> %mul
}		}

define <2 x float> @fold_v2f32_fsub_into_fneg_modifier_dynamic_nsz(<2 x float> %v0, <2 x float> %v1) #2 {		define <2 x float> @fold_v2f32_fsub_into_fneg_modifier_dynamic_nsz(<2 x float> %v0, <2 x float> %v1) #2 {
; SDAG-LABEL: fold_v2f32_fsub_into_fneg_modifier_dynamic_nsz:		; SDAG-LABEL: fold_v2f32_fsub_into_fneg_modifier_dynamic_nsz:
; SDAG: ; %bb.0:		; SDAG: ; %bb.0:
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT: v_sub_f32_e32 v1, 0x80000000, v1		; SDAG-NEXT: v_mul_f32_e64 v0, -v0, v2
; SDAG-NEXT: v_sub_f32_e32 v0, 0x80000000, v0		; SDAG-NEXT: v_mul_f32_e64 v1, -v1, v3
; SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
; SDAG-NEXT: v_mul_f32_e32 v1, v1, v3
; SDAG-NEXT: s_setpc_b64 s[30:31]		; SDAG-NEXT: s_setpc_b64 s[30:31]
;		;
; GISEL-LABEL: fold_v2f32_fsub_into_fneg_modifier_dynamic_nsz:		; GISEL-LABEL: fold_v2f32_fsub_into_fneg_modifier_dynamic_nsz:
; GISEL: ; %bb.0:		; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0		; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0
; GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1		; GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1
; GISEL-NEXT: v_mul_f32_e32 v0, v0, v2		; GISEL-NEXT: v_mul_f32_e32 v0, v0, v2
Show All 22 Lines	; GISEL-NEXT: s_setpc_b64 s[30:31]
%mul = fmul half %sub, %v1		%mul = fmul half %sub, %v1
ret half %mul		ret half %mul
}		}

define half @fold_f16_fsub_into_fneg_modifier_daz(half %v0, half %v1) #1 {		define half @fold_f16_fsub_into_fneg_modifier_daz(half %v0, half %v1) #1 {
; SDAG-LABEL: fold_f16_fsub_into_fneg_modifier_daz:		; SDAG-LABEL: fold_f16_fsub_into_fneg_modifier_daz:
; SDAG: ; %bb.0:		; SDAG: ; %bb.0:
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT: v_sub_f16_e32 v0, 0x8000, v0		; SDAG-NEXT: v_mul_f16_e64 v0, -v0, v1
; SDAG-NEXT: v_mul_f16_e32 v0, v0, v1
; SDAG-NEXT: s_setpc_b64 s[30:31]		; SDAG-NEXT: s_setpc_b64 s[30:31]
;		;
; GISEL-LABEL: fold_f16_fsub_into_fneg_modifier_daz:		; GISEL-LABEL: fold_f16_fsub_into_fneg_modifier_daz:
; GISEL: ; %bb.0:		; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0		; GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0
; GISEL-NEXT: v_mul_f16_e32 v0, v0, v1		; GISEL-NEXT: v_mul_f16_e32 v0, v0, v1
; GISEL-NEXT: s_setpc_b64 s[30:31]		; GISEL-NEXT: s_setpc_b64 s[30:31]
Show All 19 Lines	; GISEL-NEXT: s_setpc_b64 s[30:31]
%mul = fmul nsz half %sub, %v1		%mul = fmul nsz half %sub, %v1
ret half %mul		ret half %mul
}		}

define half @fold_f16_fsub_into_fneg_modifier_daz_nsz(half %v0, half %v1) #1 {		define half @fold_f16_fsub_into_fneg_modifier_daz_nsz(half %v0, half %v1) #1 {
; SDAG-LABEL: fold_f16_fsub_into_fneg_modifier_daz_nsz:		; SDAG-LABEL: fold_f16_fsub_into_fneg_modifier_daz_nsz:
; SDAG: ; %bb.0:		; SDAG: ; %bb.0:
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT: v_sub_f16_e32 v0, 0x8000, v0		; SDAG-NEXT: v_mul_f16_e64 v0, -v0, v1
; SDAG-NEXT: v_mul_f16_e32 v0, v0, v1
; SDAG-NEXT: s_setpc_b64 s[30:31]		; SDAG-NEXT: s_setpc_b64 s[30:31]
;		;
; GISEL-LABEL: fold_f16_fsub_into_fneg_modifier_daz_nsz:		; GISEL-LABEL: fold_f16_fsub_into_fneg_modifier_daz_nsz:
; GISEL: ; %bb.0:		; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0		; GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0
; GISEL-NEXT: v_mul_f16_e32 v0, v0, v1		; GISEL-NEXT: v_mul_f16_e32 v0, v0, v1
; GISEL-NEXT: s_setpc_b64 s[30:31]		; GISEL-NEXT: s_setpc_b64 s[30:31]
%sub = fsub nsz half -0.0, %v0		%sub = fsub nsz half -0.0, %v0
%mul = fmul nsz half %sub, %v1		%mul = fmul nsz half %sub, %v1
ret half %mul		ret half %mul
}		}

define half @fold_f16_fsub_into_fneg_modifier_dynamic(half %v0, half %v1) #2 {		define half @fold_f16_fsub_into_fneg_modifier_dynamic(half %v0, half %v1) #2 {
; SDAG-LABEL: fold_f16_fsub_into_fneg_modifier_dynamic:		; SDAG-LABEL: fold_f16_fsub_into_fneg_modifier_dynamic:
; SDAG: ; %bb.0:		; SDAG: ; %bb.0:
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT: v_sub_f16_e32 v0, 0x8000, v0		; SDAG-NEXT: v_mul_f16_e64 v0, -v0, v1
; SDAG-NEXT: v_mul_f16_e32 v0, v0, v1
; SDAG-NEXT: s_setpc_b64 s[30:31]		; SDAG-NEXT: s_setpc_b64 s[30:31]
;		;
; GISEL-LABEL: fold_f16_fsub_into_fneg_modifier_dynamic:		; GISEL-LABEL: fold_f16_fsub_into_fneg_modifier_dynamic:
; GISEL: ; %bb.0:		; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0		; GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0
; GISEL-NEXT: v_mul_f16_e32 v0, v0, v1		; GISEL-NEXT: v_mul_f16_e32 v0, v0, v1
; GISEL-NEXT: s_setpc_b64 s[30:31]		; GISEL-NEXT: s_setpc_b64 s[30:31]
%sub = fsub half -0.0, %v0		%sub = fsub half -0.0, %v0
%mul = fmul half %sub, %v1		%mul = fmul half %sub, %v1
ret half %mul		ret half %mul
}		}

define half @fold_f16_fsub_into_fneg_modifier_dynamic_nsz(half %v0, half %v1) #2 {		define half @fold_f16_fsub_into_fneg_modifier_dynamic_nsz(half %v0, half %v1) #2 {
; SDAG-LABEL: fold_f16_fsub_into_fneg_modifier_dynamic_nsz:		; SDAG-LABEL: fold_f16_fsub_into_fneg_modifier_dynamic_nsz:
; SDAG: ; %bb.0:		; SDAG: ; %bb.0:
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT: v_sub_f16_e32 v0, 0x8000, v0		; SDAG-NEXT: v_mul_f16_e64 v0, -v0, v1
; SDAG-NEXT: v_mul_f16_e32 v0, v0, v1
; SDAG-NEXT: s_setpc_b64 s[30:31]		; SDAG-NEXT: s_setpc_b64 s[30:31]
;		;
; GISEL-LABEL: fold_f16_fsub_into_fneg_modifier_dynamic_nsz:		; GISEL-LABEL: fold_f16_fsub_into_fneg_modifier_dynamic_nsz:
; GISEL: ; %bb.0:		; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0		; GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0
; GISEL-NEXT: v_mul_f16_e32 v0, v0, v1		; GISEL-NEXT: v_mul_f16_e32 v0, v0, v1
; GISEL-NEXT: s_setpc_b64 s[30:31]		; GISEL-NEXT: s_setpc_b64 s[30:31]
▲ Show 20 Lines • Show All 835 Lines • ▼ Show 20 Lines	; GISEL-NEXT: s_setpc_b64 s[30:31]
%p0_0 = call float @llvm.amdgcn.interp.p1(float %sub, i32 0, i32 0, i32 %v1)		%p0_0 = call float @llvm.amdgcn.interp.p1(float %sub, i32 0, i32 0, i32 %v1)
ret float %p0_0		ret float %p0_0
}		}

define amdgpu_gfx float @fold_f16_fsub_into_fneg_modifier_interp_daz(float %v0, i32 inreg %m0) #1 {		define amdgpu_gfx float @fold_f16_fsub_into_fneg_modifier_interp_daz(float %v0, i32 inreg %m0) #1 {
; SDAG-LABEL: fold_f16_fsub_into_fneg_modifier_interp_daz:		; SDAG-LABEL: fold_f16_fsub_into_fneg_modifier_interp_daz:
; SDAG: ; %bb.0:		; SDAG: ; %bb.0:
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT: v_sub_f32_e32 v0, 0x80000000, v0
; SDAG-NEXT: s_mov_b32 m0, s4		; SDAG-NEXT: s_mov_b32 m0, s4
; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3		; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
; SDAG-NEXT: v_interp_p1ll_f16 v0, v0, attr2.y		; SDAG-NEXT: v_interp_p1ll_f16 v0, -v0, attr2.y
; SDAG-NEXT: s_setpc_b64 s[30:31]		; SDAG-NEXT: s_setpc_b64 s[30:31]
;		;
; GISEL-LABEL: fold_f16_fsub_into_fneg_modifier_interp_daz:		; GISEL-LABEL: fold_f16_fsub_into_fneg_modifier_interp_daz:
; GISEL: ; %bb.0:		; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0		; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0
; GISEL-NEXT: s_mov_b32 m0, s4		; GISEL-NEXT: s_mov_b32 m0, s4
; GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3		; GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
Show All 21 Lines

llvm/test/CodeGen/AMDGPU/llvm.exp.ll

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 5,787 Lines • ▼ Show 20 Lines	; CM-NEXT: PAD
ret float %result		ret float %result
}		}

define float @v_exp_f32_undef() {		define float @v_exp_f32_undef() {
; VI-SDAG-LABEL: v_exp_f32_undef:		; VI-SDAG-LABEL: v_exp_f32_undef:
; VI-SDAG: ; %bb.0:		; VI-SDAG: ; %bb.0:
; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-SDAG-NEXT: v_rndne_f32_e32 v0, 0		; VI-SDAG-NEXT: v_rndne_f32_e32 v0, 0
; VI-SDAG-NEXT: v_sub_f32_e32 v1, 0, v0		; VI-SDAG-NEXT: s_mov_b32 s4, 0x7fc00000
; VI-SDAG-NEXT: v_add_f32_e32 v1, 0x7fc00000, v1		; VI-SDAG-NEXT: v_add_f32_e64 v1, -v0, s4
; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1		; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v0, v0		; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v0, v0
; VI-SDAG-NEXT: v_ldexp_f32 v0, v1, v0		; VI-SDAG-NEXT: v_ldexp_f32 v0, v1, v0
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]		; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;		;
; VI-GISEL-LABEL: v_exp_f32_undef:		; VI-GISEL-LABEL: v_exp_f32_undef:
; VI-GISEL: ; %bb.0:		; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
▲ Show 20 Lines • Show All 1,840 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

AMDGPU: Fold fsub [+-0] into fneg when folding source modifiersClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 541966

llvm/lib/Target/AMDGPU/AMDGPUGISel.td

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

llvm/lib/Target/AMDGPU/SIInstrInfo.td

llvm/lib/Target/AMDGPU/SIInstructions.td

llvm/lib/Target/AMDGPU/VOPCInstructions.td

llvm/lib/Target/AMDGPU/VOPInstructions.td

llvm/test/CodeGen/AMDGPU/fneg-combines.ll

llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll

llvm/test/CodeGen/AMDGPU/fsub-as-fneg-src-modifier.ll

llvm/test/CodeGen/AMDGPU/llvm.exp.ll

AMDGPU: Fold fsub [+-0] into fneg when folding source modifiers
ClosedPublic