This is an archive of the discontinued LLVM Phabricator instance.

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
4658	This seems a bit of a hack. Can we at least get a comment saying this is working around the model somehow? Can Sanjay comment on how the model was meant to be used where targets have native step support?
4681	Why do you need this? Is this to preserve sign-of-zero? You don't need this if the original operation was marked nsz or fast.

This revision now requires changes to proceed.Nov 11 2016, 12:44 AM

spatel added inline comments.Nov 11 2016, 7:10 AM

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
4658	There might be a better API, but this is more or less what I imagined if you want to disable the generic refinement: /// A target may choose to implement its own refinement within this function. /// If that's true, then return '0' as the number of RefinementSteps to avoid /// any further refinement of the estimate.

evandro added inline comments.Nov 11 2016, 8:37 AM

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
4681	I'm not sure what you're pointing out here, so allow me to be verbose. When taking `sqrt(0.0)`, the instruction sequence above ends up with a `NaN`. Therefore, `0.0` is a special case when the result is hamfisted for correctness. Instead of materializing `0.0`, which requires extra instructions, since the argument is known to be `0.0`, I chose to use it instead. Yes, as a result its sign is preserved, but this is not the reason why I consider this to be a more sensible choice.

All concerns addressed from my side.

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
4681	Ah, I understand now, thanks!

This revision is now accepted and ready to land.Nov 11 2016, 9:01 AM

Closed by commit rL286907: [AArch64] Compute the Newton series for reciprocals natively (authored by evandro). · Explain WhyNov 14 2016, 3:38 PM

This revision was automatically updated to reflect the committed changes.

evandro marked 5 inline comments as done.

Revision Contents

Path

Size

llvm/

lib/

Target/

AArch64/

AArch64ISelLowering.h

6 lines

AArch64ISelLowering.cpp

51 lines

AArch64InstrInfo.td

24 lines

test/

CodeGen/

AArch64/

recp-fastmath.ll

34 lines

sqrt-fastmath.ll

93 lines

Diff 77536

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Show First 20 Lines • Show All 181 Lines • ▼ Show 20 Lines	enum NodeType : unsigned {
/// generated to compensate for the byte-swapping. But sometimes we do		/// generated to compensate for the byte-swapping. But sometimes we do
/// need to re-interpret the data in SIMD vector registers in big-endian		/// need to re-interpret the data in SIMD vector registers in big-endian
/// mode without emitting such REV instructions.		/// mode without emitting such REV instructions.
NVCAST,		NVCAST,

SMULL,		SMULL,
UMULL,		UMULL,

// Reciprocal estimates.		// Reciprocal estimates and steps.
FRECPE,		FRECPE, FRECPS,
FRSQRTE,		FRSQRTE, FRSQRTS,

// NEON Load/Store with post-increment base updates		// NEON Load/Store with post-increment base updates
LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,		LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,
LD3post,		LD3post,
LD4post,		LD4post,
ST2post,		ST2post,
ST3post,		ST3post,
ST4post,		ST4post,
▲ Show 20 Lines • Show All 402 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 953 Lines • ▼ Show 20 Lines	const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
case AArch64ISD::LD2LANEpost: return "AArch64ISD::LD2LANEpost";		case AArch64ISD::LD2LANEpost: return "AArch64ISD::LD2LANEpost";
case AArch64ISD::LD3LANEpost: return "AArch64ISD::LD3LANEpost";		case AArch64ISD::LD3LANEpost: return "AArch64ISD::LD3LANEpost";
case AArch64ISD::LD4LANEpost: return "AArch64ISD::LD4LANEpost";		case AArch64ISD::LD4LANEpost: return "AArch64ISD::LD4LANEpost";
case AArch64ISD::ST2LANEpost: return "AArch64ISD::ST2LANEpost";		case AArch64ISD::ST2LANEpost: return "AArch64ISD::ST2LANEpost";
case AArch64ISD::ST3LANEpost: return "AArch64ISD::ST3LANEpost";		case AArch64ISD::ST3LANEpost: return "AArch64ISD::ST3LANEpost";
case AArch64ISD::ST4LANEpost: return "AArch64ISD::ST4LANEpost";		case AArch64ISD::ST4LANEpost: return "AArch64ISD::ST4LANEpost";
case AArch64ISD::SMULL: return "AArch64ISD::SMULL";		case AArch64ISD::SMULL: return "AArch64ISD::SMULL";
case AArch64ISD::UMULL: return "AArch64ISD::UMULL";		case AArch64ISD::UMULL: return "AArch64ISD::UMULL";
case AArch64ISD::FRSQRTE: return "AArch64ISD::FRSQRTE";
case AArch64ISD::FRECPE: return "AArch64ISD::FRECPE";		case AArch64ISD::FRECPE: return "AArch64ISD::FRECPE";
		case AArch64ISD::FRECPS: return "AArch64ISD::FRECPS";
		case AArch64ISD::FRSQRTE: return "AArch64ISD::FRSQRTE";
		case AArch64ISD::FRSQRTS: return "AArch64ISD::FRSQRTS";
}		}
return nullptr;		return nullptr;
}		}

MachineBasicBlock *		MachineBasicBlock *
AArch64TargetLowering::EmitF128CSEL(MachineInstr &MI,		AArch64TargetLowering::EmitF128CSEL(MachineInstr &MI,
MachineBasicBlock *MBB) const {		MachineBasicBlock *MBB) const {
// We materialise the F128CSEL pseudo-instruction as some control flow and a		// We materialise the F128CSEL pseudo-instruction as some control flow and a
▲ Show 20 Lines • Show All 3,676 Lines • ▼ Show 20 Lines	SDValue AArch64TargetLowering::getSqrtEstimate(SDValue Operand,
SelectionDAG &DAG, int Enabled,		SelectionDAG &DAG, int Enabled,
int &ExtraSteps,		int &ExtraSteps,
bool &UseOneConst,		bool &UseOneConst,
bool Reciprocal) const {		bool Reciprocal) const {
if (Enabled == ReciprocalEstimate::Enabled \|\|		if (Enabled == ReciprocalEstimate::Enabled \|\|
(Enabled == ReciprocalEstimate::Unspecified && Subtarget->useRSqrt()))		(Enabled == ReciprocalEstimate::Unspecified && Subtarget->useRSqrt()))
if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRSQRTE, Operand,		if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRSQRTE, Operand,
DAG, ExtraSteps)) {		DAG, ExtraSteps)) {
UseOneConst = true;		SDLoc DL(Operand);
		jmolloyUnsubmitted Done Reply Inline Actions This seems a bit of a hack. Can we at least get a comment saying this is working around the model somehow? Can Sanjay comment on how the model was meant to be used where targets have native step support? jmolloy: This seems a bit of a hack. Can we at least get a comment saying this is working around the…
		spatelUnsubmitted Done Reply Inline Actions There might be a better API, but this is more or less what I imagined if you want to disable the generic refinement: /// A target may choose to implement its own refinement within this function. /// If that's true, then return '0' as the number of RefinementSteps to avoid /// any further refinement of the estimate. spatel: There might be a better API, but this is more or less what I imagined if you want to disable…
		EVT VT = Operand.getValueType();

		SDNodeFlags Flags;
		Flags.setUnsafeAlgebra(true);

		// Newton reciprocal square root iteration: E * 0.5 * (3 - X * E^2)
		// AArch64 reciprocal square root iteration instruction: 0.5 * (3 - M * N)
		for (int i = ExtraSteps; i > 0; --i) {
		SDValue Step = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Estimate,
		&Flags);
		Step = DAG.getNode(AArch64ISD::FRSQRTS, DL, VT, Operand, Step, &Flags);
		Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, &Flags);
		}

		if (!Reciprocal) {
		EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
		VT);
		SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
		SDValue Eq = DAG.getSetCC(DL, CCVT, Operand, FPZero, ISD::SETEQ);

		Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate, &Flags);
		// Correct the result if the operand is 0.0.
		Estimate = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL,
		jmolloyUnsubmitted Done Reply Inline Actions Why do you need this? Is this to preserve sign-of-zero? You don't need this if the original operation was marked nsz or fast. jmolloy: Why do you need this? Is this to preserve sign-of-zero? You don't need this if the original…
		evandroAuthorUnsubmitted Done Reply Inline Actions I'm not sure what you're pointing out here, so allow me to be verbose. When taking `sqrt(0.0)`, the instruction sequence above ends up with a `NaN`. Therefore, `0.0` is a special case when the result is hamfisted for correctness. Instead of materializing `0.0`, which requires extra instructions, since the argument is known to be `0.0`, I chose to use it instead. Yes, as a result its sign is preserved, but this is not the reason why I consider this to be a more sensible choice. evandro: I'm not sure what you're pointing out here, so allow me to be verbose. When taking `sqrt(0.0)`…
		jmolloyUnsubmitted Done Reply Inline Actions Ah, I understand now, thanks! jmolloy: Ah, I understand now, thanks!
		VT, Eq, Operand, Estimate);
		}

		ExtraSteps = 0;
return Estimate;		return Estimate;
}		}

return SDValue();		return SDValue();
}		}

SDValue AArch64TargetLowering::getRecipEstimate(SDValue Operand,		SDValue AArch64TargetLowering::getRecipEstimate(SDValue Operand,
SelectionDAG &DAG, int Enabled,		SelectionDAG &DAG, int Enabled,
int &ExtraSteps) const {		int &ExtraSteps) const {
if (Enabled == ReciprocalEstimate::Enabled)		if (Enabled == ReciprocalEstimate::Enabled)
if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRECPE, Operand,		if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRECPE, Operand,
DAG, ExtraSteps))		DAG, ExtraSteps)) {
		SDLoc DL(Operand);
		EVT VT = Operand.getValueType();

		SDNodeFlags Flags;
		Flags.setUnsafeAlgebra(true);

		// Newton reciprocal iteration: E * (2 - X * E)
		// AArch64 reciprocal iteration instruction: (2 - M * N)
		for (int i = ExtraSteps; i > 0; --i) {
		SDValue Step = DAG.getNode(AArch64ISD::FRECPS, DL, VT, Operand,
		Estimate, &Flags);
		Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, &Flags);
		}

		ExtraSteps = 0;
return Estimate;		return Estimate;
		}

return SDValue();		return SDValue();
}		}

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// AArch64 Inline Assembly Support		// AArch64 Inline Assembly Support
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

▲ Show 20 Lines • Show All 5,784 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64InstrInfo.td

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 281 Lines • ▼ Show 20 Lines
def AArch64NvCast : SDNode<"AArch64ISD::NVCAST", SDTUnaryOp>;		def AArch64NvCast : SDNode<"AArch64ISD::NVCAST", SDTUnaryOp>;

def SDT_AArch64mull : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,		def SDT_AArch64mull : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
SDTCisSameAs<1, 2>]>;		SDTCisSameAs<1, 2>]>;
def AArch64smull : SDNode<"AArch64ISD::SMULL", SDT_AArch64mull>;		def AArch64smull : SDNode<"AArch64ISD::SMULL", SDT_AArch64mull>;
def AArch64umull : SDNode<"AArch64ISD::UMULL", SDT_AArch64mull>;		def AArch64umull : SDNode<"AArch64ISD::UMULL", SDT_AArch64mull>;

def AArch64frecpe : SDNode<"AArch64ISD::FRECPE", SDTFPUnaryOp>;		def AArch64frecpe : SDNode<"AArch64ISD::FRECPE", SDTFPUnaryOp>;
		def AArch64frecps : SDNode<"AArch64ISD::FRECPS", SDTFPBinOp>;
def AArch64frsqrte : SDNode<"AArch64ISD::FRSQRTE", SDTFPUnaryOp>;		def AArch64frsqrte : SDNode<"AArch64ISD::FRSQRTE", SDTFPUnaryOp>;
		def AArch64frsqrts : SDNode<"AArch64ISD::FRSQRTS", SDTFPBinOp>;

def AArch64saddv : SDNode<"AArch64ISD::SADDV", SDT_AArch64UnaryVec>;		def AArch64saddv : SDNode<"AArch64ISD::SADDV", SDT_AArch64UnaryVec>;
def AArch64uaddv : SDNode<"AArch64ISD::UADDV", SDT_AArch64UnaryVec>;		def AArch64uaddv : SDNode<"AArch64ISD::UADDV", SDT_AArch64UnaryVec>;
def AArch64sminv : SDNode<"AArch64ISD::SMINV", SDT_AArch64UnaryVec>;		def AArch64sminv : SDNode<"AArch64ISD::SMINV", SDT_AArch64UnaryVec>;
def AArch64uminv : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>;		def AArch64uminv : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>;
def AArch64smaxv : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>;		def AArch64smaxv : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>;
def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>;		def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>;

▲ Show 20 Lines • Show All 3,118 Lines • ▼ Show 20 Lines	def : Pat<(v4f32 (AArch64frecpe (v4f32 FPR128:$Rn))),
(FRECPEv4f32 FPR128:$Rn)>;		(FRECPEv4f32 FPR128:$Rn)>;
def : Pat<(f64 (AArch64frecpe (f64 FPR64:$Rn))),		def : Pat<(f64 (AArch64frecpe (f64 FPR64:$Rn))),
(FRECPEv1i64 FPR64:$Rn)>;		(FRECPEv1i64 FPR64:$Rn)>;
def : Pat<(v1f64 (AArch64frecpe (v1f64 FPR64:$Rn))),		def : Pat<(v1f64 (AArch64frecpe (v1f64 FPR64:$Rn))),
(FRECPEv1i64 FPR64:$Rn)>;		(FRECPEv1i64 FPR64:$Rn)>;
def : Pat<(v2f64 (AArch64frecpe (v2f64 FPR128:$Rn))),		def : Pat<(v2f64 (AArch64frecpe (v2f64 FPR128:$Rn))),
(FRECPEv2f64 FPR128:$Rn)>;		(FRECPEv2f64 FPR128:$Rn)>;

		def : Pat<(f32 (AArch64frecps (f32 FPR32:$Rn), (f32 FPR32:$Rm))),
		(FRECPS32 FPR32:$Rn, FPR32:$Rm)>;
		def : Pat<(v2f32 (AArch64frecps (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
		(FRECPSv2f32 V64:$Rn, V64:$Rm)>;
		def : Pat<(v4f32 (AArch64frecps (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm))),
		(FRECPSv4f32 FPR128:$Rn, FPR128:$Rm)>;
		def : Pat<(f64 (AArch64frecps (f64 FPR64:$Rn), (f64 FPR64:$Rm))),
		(FRECPS64 FPR64:$Rn, FPR64:$Rm)>;
		def : Pat<(v2f64 (AArch64frecps (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))),
		(FRECPSv2f64 FPR128:$Rn, FPR128:$Rm)>;

def : Pat<(f32 (int_aarch64_neon_frecpx (f32 FPR32:$Rn))),		def : Pat<(f32 (int_aarch64_neon_frecpx (f32 FPR32:$Rn))),
(FRECPXv1i32 FPR32:$Rn)>;		(FRECPXv1i32 FPR32:$Rn)>;
def : Pat<(f64 (int_aarch64_neon_frecpx (f64 FPR64:$Rn))),		def : Pat<(f64 (int_aarch64_neon_frecpx (f64 FPR64:$Rn))),
(FRECPXv1i64 FPR64:$Rn)>;		(FRECPXv1i64 FPR64:$Rn)>;

def : Pat<(f32 (int_aarch64_neon_frsqrte (f32 FPR32:$Rn))),		def : Pat<(f32 (int_aarch64_neon_frsqrte (f32 FPR32:$Rn))),
(FRSQRTEv1i32 FPR32:$Rn)>;		(FRSQRTEv1i32 FPR32:$Rn)>;
def : Pat<(f64 (int_aarch64_neon_frsqrte (f64 FPR64:$Rn))),		def : Pat<(f64 (int_aarch64_neon_frsqrte (f64 FPR64:$Rn))),
Show All 9 Lines	def : Pat<(v4f32 (AArch64frsqrte (v4f32 FPR128:$Rn))),
(FRSQRTEv4f32 FPR128:$Rn)>;		(FRSQRTEv4f32 FPR128:$Rn)>;
def : Pat<(f64 (AArch64frsqrte (f64 FPR64:$Rn))),		def : Pat<(f64 (AArch64frsqrte (f64 FPR64:$Rn))),
(FRSQRTEv1i64 FPR64:$Rn)>;		(FRSQRTEv1i64 FPR64:$Rn)>;
def : Pat<(v1f64 (AArch64frsqrte (v1f64 FPR64:$Rn))),		def : Pat<(v1f64 (AArch64frsqrte (v1f64 FPR64:$Rn))),
(FRSQRTEv1i64 FPR64:$Rn)>;		(FRSQRTEv1i64 FPR64:$Rn)>;
def : Pat<(v2f64 (AArch64frsqrte (v2f64 FPR128:$Rn))),		def : Pat<(v2f64 (AArch64frsqrte (v2f64 FPR128:$Rn))),
(FRSQRTEv2f64 FPR128:$Rn)>;		(FRSQRTEv2f64 FPR128:$Rn)>;

		def : Pat<(f32 (AArch64frsqrts (f32 FPR32:$Rn), (f32 FPR32:$Rm))),
		(FRSQRTS32 FPR32:$Rn, FPR32:$Rm)>;
		def : Pat<(v2f32 (AArch64frsqrts (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
		(FRSQRTSv2f32 V64:$Rn, V64:$Rm)>;
		def : Pat<(v4f32 (AArch64frsqrts (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm))),
		(FRSQRTSv4f32 FPR128:$Rn, FPR128:$Rm)>;
		def : Pat<(f64 (AArch64frsqrts (f64 FPR64:$Rn), (f64 FPR64:$Rm))),
		(FRSQRTS64 FPR64:$Rn, FPR64:$Rm)>;
		def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))),
		(FRSQRTSv2f64 FPR128:$Rn, FPR128:$Rm)>;

// If an integer is about to be converted to a floating point value,		// If an integer is about to be converted to a floating point value,
// just load it on the floating point unit.		// just load it on the floating point unit.
// Here are the patterns for 8 and 16-bits to float.		// Here are the patterns for 8 and 16-bits to float.
// 8-bits -> float.		// 8-bits -> float.
multiclass UIntToFPROLoadPat<ValueType DstTy, ValueType SrcTy,		multiclass UIntToFPROLoadPat<ValueType DstTy, ValueType SrcTy,
SDPatternOperator loadop, Instruction UCVTF,		SDPatternOperator loadop, Instruction UCVTF,
ROAddrMode ro, Instruction LDRW, Instruction LDRX,		ROAddrMode ro, Instruction LDRW, Instruction LDRX,
SubRegIndex sub> {		SubRegIndex sub> {
▲ Show 20 Lines • Show All 2,649 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/recp-fastmath.ll

	Show All 10 Lines
	}			}

	define float @frecp1(float %x) #1 {			define float @frecp1(float %x) #1 {
	%div = fdiv fast float 1.0, %x			%div = fdiv fast float 1.0, %x
	ret float %div			ret float %div

	; CHECK-LABEL: frecp1:			; CHECK-LABEL: frecp1:
	; CHECK-NEXT: BB#0			; CHECK-NEXT: BB#0
	; CHECK-NEXT: frecpe			; CHECK-NEXT: frecpe [[R:s[0-7]]]
	; CHECK-NEXT: fmov			; CHECK-NEXT: frecps {{s[0-7](, s[0-7])?}}, [[R]]
	}			}

	define <2 x float> @f2recp0(<2 x float> %x) #0 {			define <2 x float> @f2recp0(<2 x float> %x) #0 {
	%div = fdiv fast <2 x float> <float 1.0, float 1.0>, %x			%div = fdiv fast <2 x float> <float 1.0, float 1.0>, %x
	ret <2 x float> %div			ret <2 x float> %div

	; CHECK-LABEL: f2recp0:			; CHECK-LABEL: f2recp0:
	; CHECK-NEXT: BB#0			; CHECK-NEXT: BB#0
	; CHECK-NEXT: fmov			; CHECK-NEXT: fmov
	; CHECK-NEXT: fdiv			; CHECK-NEXT: fdiv
	}			}

	define <2 x float> @f2recp1(<2 x float> %x) #1 {			define <2 x float> @f2recp1(<2 x float> %x) #1 {
	%div = fdiv fast <2 x float> <float 1.0, float 1.0>, %x			%div = fdiv fast <2 x float> <float 1.0, float 1.0>, %x
	ret <2 x float> %div			ret <2 x float> %div

	; CHECK-LABEL: f2recp1:			; CHECK-LABEL: f2recp1:
	; CHECK-NEXT: BB#0			; CHECK-NEXT: BB#0
	; CHECK-NEXT: fmov			; CHECK-NEXT: frecpe [[R:v[0-7]\.2s]]
	; CHECK-NEXT: frecpe			; CHECK-NEXT: frecps {{v[0-7]\.2s(, v[0-7].2s)?}}, [[R]]
	}			}

	define <4 x float> @f4recp0(<4 x float> %x) #0 {			define <4 x float> @f4recp0(<4 x float> %x) #0 {
	%div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x			%div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
	ret <4 x float> %div			ret <4 x float> %div

	; CHECK-LABEL: f4recp0:			; CHECK-LABEL: f4recp0:
	; CHECK-NEXT: BB#0			; CHECK-NEXT: BB#0
	; CHECK-NEXT: fmov			; CHECK-NEXT: fmov
	; CHECK-NEXT: fdiv			; CHECK-NEXT: fdiv
	}			}

	define <4 x float> @f4recp1(<4 x float> %x) #1 {			define <4 x float> @f4recp1(<4 x float> %x) #1 {
	%div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x			%div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
	ret <4 x float> %div			ret <4 x float> %div

	; CHECK-LABEL: f4recp1:			; CHECK-LABEL: f4recp1:
	; CHECK-NEXT: BB#0			; CHECK-NEXT: BB#0
	; CHECK-NEXT: fmov			; CHECK-NEXT: frecpe [[R:v[0-7]\.4s]]
	; CHECK-NEXT: frecpe			; CHECK-NEXT: frecps {{v[0-7]\.4s(, v[0-7].4s)?}}, [[R]]
	}			}

	define <8 x float> @f8recp0(<8 x float> %x) #0 {			define <8 x float> @f8recp0(<8 x float> %x) #0 {
	%div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x			%div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
	ret <8 x float> %div			ret <8 x float> %div

	; CHECK-LABEL: f8recp0:			; CHECK-LABEL: f8recp0:
	; CHECK-NEXT: BB#0			; CHECK-NEXT: BB#0
	; CHECK-NEXT: fmov			; CHECK-NEXT: fmov
	; CHECK-NEXT: fdiv			; CHECK-NEXT: fdiv
	; CHECK-NEXT: fdiv			; CHECK-NEXT: fdiv
	}			}

	define <8 x float> @f8recp1(<8 x float> %x) #1 {			define <8 x float> @f8recp1(<8 x float> %x) #1 {
	%div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x			%div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
	ret <8 x float> %div			ret <8 x float> %div

	; CHECK-LABEL: f8recp1:			; CHECK-LABEL: f8recp1:
	; CHECK-NEXT: BB#0			; CHECK-NEXT: BB#0
	; CHECK-NEXT: fmov			; CHECK-NEXT: frecpe [[RA:v[0-7]\.4s]]
	; CHECK-NEXT: frecpe			; CHECK-NEXT: frecpe [[RB:v[0-7]\.4s]]
	; CHECK: frecpe			; CHECK-NEXT: frecps {{v[0-7]\.4s(, v[0-7].4s)?}}, [[RA]]
				; CHECK: frecps {{v[0-7]\.4s(, v[0-7].4s)?}}, [[RB]]
	}			}

	define double @drecp0(double %x) #0 {			define double @drecp0(double %x) #0 {
	%div = fdiv fast double 1.0, %x			%div = fdiv fast double 1.0, %x
	ret double %div			ret double %div

	; CHECK-LABEL: drecp0:			; CHECK-LABEL: drecp0:
	; CHECK-NEXT: BB#0			; CHECK-NEXT: BB#0
	; CHECK-NEXT: fmov			; CHECK-NEXT: fmov
	; CHECK-NEXT: fdiv			; CHECK-NEXT: fdiv
	}			}

	define double @drecp1(double %x) #1 {			define double @drecp1(double %x) #1 {
	%div = fdiv fast double 1.0, %x			%div = fdiv fast double 1.0, %x
	ret double %div			ret double %div

	; CHECK-LABEL: drecp1:			; CHECK-LABEL: drecp1:
	; CHECK-NEXT: BB#0			; CHECK-NEXT: BB#0
	; CHECK-NEXT: frecpe			; CHECK-NEXT: frecpe [[R:d[0-7]]]
	; CHECK-NEXT: fmov			; CHECK-NEXT: frecps {{d[0-7](, d[0-7])?}}, [[R]]
	}			}

	define <2 x double> @d2recp0(<2 x double> %x) #0 {			define <2 x double> @d2recp0(<2 x double> %x) #0 {
	%div = fdiv fast <2 x double> <double 1.0, double 1.0>, %x			%div = fdiv fast <2 x double> <double 1.0, double 1.0>, %x
	ret <2 x double> %div			ret <2 x double> %div

	; CHECK-LABEL: d2recp0:			; CHECK-LABEL: d2recp0:
	; CHECK-NEXT: BB#0			; CHECK-NEXT: BB#0
	; CHECK-NEXT: fmov			; CHECK-NEXT: fmov
	; CHECK-NEXT: fdiv			; CHECK-NEXT: fdiv
	}			}

	define <2 x double> @d2recp1(<2 x double> %x) #1 {			define <2 x double> @d2recp1(<2 x double> %x) #1 {
	%div = fdiv fast <2 x double> <double 1.0, double 1.0>, %x			%div = fdiv fast <2 x double> <double 1.0, double 1.0>, %x
	ret <2 x double> %div			ret <2 x double> %div

	; CHECK-LABEL: d2recp1:			; CHECK-LABEL: d2recp1:
	; CHECK-NEXT: BB#0			; CHECK-NEXT: BB#0
	; CHECK-NEXT: fmov			; CHECK-NEXT: frecpe [[R:v[0-7]\.2d]]
	; CHECK-NEXT: frecpe			; CHECK-NEXT: frecps {{v[0-7]\.2d(, v[0-7].2d)?}}, [[R]]
	}			}

	define <4 x double> @d4recp0(<4 x double> %x) #0 {			define <4 x double> @d4recp0(<4 x double> %x) #0 {
	%div = fdiv fast <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, %x			%div = fdiv fast <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, %x
	ret <4 x double> %div			ret <4 x double> %div

	; CHECK-LABEL: d4recp0:			; CHECK-LABEL: d4recp0:
	; CHECK-NEXT: BB#0			; CHECK-NEXT: BB#0
	; CHECK-NEXT: fmov			; CHECK-NEXT: fmov
	; CHECK-NEXT: fdiv			; CHECK-NEXT: fdiv
	; CHECK-NEXT: fdiv			; CHECK-NEXT: fdiv
	}			}

	define <4 x double> @d4recp1(<4 x double> %x) #1 {			define <4 x double> @d4recp1(<4 x double> %x) #1 {
	%div = fdiv fast <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, %x			%div = fdiv fast <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, %x
	ret <4 x double> %div			ret <4 x double> %div

	; CHECK-LABEL: d4recp1:			; CHECK-LABEL: d4recp1:
	; CHECK-NEXT: BB#0			; CHECK-NEXT: BB#0
	; CHECK-NEXT: fmov			; CHECK-NEXT: frecpe [[RA:v[0-7]\.2d]]
	; CHECK-NEXT: frecpe			; CHECK-NEXT: frecpe [[RB:v[0-7]\.2d]]
	; CHECK: frecpe			; CHECK-NEXT: frecps {{v[0-7]\.2d(, v[0-7].2d)?}}, [[RA]]
				; CHECK: frecps {{v[0-7]\.2d(, v[0-7].2d)?}}, [[RB]]
	}			}

	attributes #0 = { nounwind "unsafe-fp-math"="true" }			attributes #0 = { nounwind "unsafe-fp-math"="true" }
	attributes #1 = { nounwind "unsafe-fp-math"="true" "reciprocal-estimates"="div,vec-div" }			attributes #1 = { nounwind "unsafe-fp-math"="true" "reciprocal-estimates"="div,vec-div" }

llvm/test/CodeGen/AArch64/sqrt-fastmath.ll

Show All 13 Lines	define float @fsqrt(float %a) #0 {
ret float %1		ret float %1

; FAULT-LABEL: fsqrt:		; FAULT-LABEL: fsqrt:
; FAULT-NEXT: BB#0		; FAULT-NEXT: BB#0
; FAULT-NEXT: fsqrt		; FAULT-NEXT: fsqrt

; CHECK-LABEL: fsqrt:		; CHECK-LABEL: fsqrt:
; CHECK-NEXT: BB#0		; CHECK-NEXT: BB#0
; CHECK-NEXT: fmov		; CHECK-NEXT: frsqrte [[RA:s[0-7]]]
; CHECK-NEXT: frsqrte		; CHECK-NEXT: fmul [[RB:s[0-7]]], [[RA]], [[RA]]
		; CHECK-NEXT: frsqrts {{s[0-7](, s[0-7])?}}, [[RB]]
		; CHECK: fcmp s0, #0
}		}

define <2 x float> @f2sqrt(<2 x float> %a) #0 {		define <2 x float> @f2sqrt(<2 x float> %a) #0 {
%1 = tail call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> %a)		%1 = tail call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> %a)
ret <2 x float> %1		ret <2 x float> %1

; FAULT-LABEL: f2sqrt:		; FAULT-LABEL: f2sqrt:
; FAULT-NEXT: BB#0		; FAULT-NEXT: BB#0
; FAULT-NEXT: fsqrt		; FAULT-NEXT: fsqrt

; CHECK-LABEL: f2sqrt:		; CHECK-LABEL: f2sqrt:
; CHECK-NEXT: BB#0		; CHECK-NEXT: BB#0
; CHECK-NEXT: fmov		; CHECK-NEXT: frsqrte [[RA:v[0-7]\.2s]]
; CHECK-NEXT: mov		; CHECK-NEXT: fmul [[RB:v[0-7]\.2s]], [[RA]], [[RA]]
; CHECK-NEXT: frsqrte		; CHECK-NEXT: frsqrts {{v[0-7]\.2s(, v[0-7]\.2s)?}}, [[RB]]
		; CHECK: fcmeq {{v[0-7]\.2s, v0\.2s}}, #0
}		}

define <4 x float> @f4sqrt(<4 x float> %a) #0 {		define <4 x float> @f4sqrt(<4 x float> %a) #0 {
%1 = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)		%1 = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
ret <4 x float> %1		ret <4 x float> %1

; FAULT-LABEL: f4sqrt:		; FAULT-LABEL: f4sqrt:
; FAULT-NEXT: BB#0		; FAULT-NEXT: BB#0
; FAULT-NEXT: fsqrt		; FAULT-NEXT: fsqrt

; CHECK-LABEL: f4sqrt:		; CHECK-LABEL: f4sqrt:
; CHECK-NEXT: BB#0		; CHECK-NEXT: BB#0
; CHECK-NEXT: fmov		; CHECK-NEXT: frsqrte [[RA:v[0-7]\.4s]]
; CHECK-NEXT: mov		; CHECK-NEXT: fmul [[RB:v[0-7]\.4s]], [[RA]], [[RA]]
; CHECK-NEXT: frsqrte		; CHECK-NEXT: frsqrts {{v[0-7]\.4s(, v[0-7]\.4s)?}}, [[RB]]
		; CHECK: fcmeq {{v[0-7]\.4s, v0\.4s}}, #0
}		}

define <8 x float> @f8sqrt(<8 x float> %a) #0 {		define <8 x float> @f8sqrt(<8 x float> %a) #0 {
%1 = tail call fast <8 x float> @llvm.sqrt.v8f32(<8 x float> %a)		%1 = tail call fast <8 x float> @llvm.sqrt.v8f32(<8 x float> %a)
ret <8 x float> %1		ret <8 x float> %1

; FAULT-LABEL: f8sqrt:		; FAULT-LABEL: f8sqrt:
; FAULT-NEXT: BB#0		; FAULT-NEXT: BB#0
; FAULT-NEXT: fsqrt		; FAULT-NEXT: fsqrt
; FAULT-NEXT: fsqrt		; FAULT-NEXT: fsqrt

; CHECK-LABEL: f8sqrt:		; CHECK-LABEL: f8sqrt:
; CHECK-NEXT: BB#0		; CHECK-NEXT: BB#0
; CHECK-NEXT: fmov		; CHECK-NEXT: frsqrte [[RA:v[0-7]\.4s]]
; CHECK-NEXT: mov		; CHECK: fmul [[RB:v[0-7]\.4s]], [[RA]], [[RA]]
; CHECK-NEXT: frsqrte		; CHECK: frsqrts {{v[0-7]\.4s(, v[0-7]\.4s)?}}, [[RB]]
; CHECK: frsqrte		; CHECK: fcmeq {{v[0-7]\.4s, v[0-1]\.4s}}, #0
}		}

define double @dsqrt(double %a) #0 {		define double @dsqrt(double %a) #0 {
%1 = tail call fast double @llvm.sqrt.f64(double %a)		%1 = tail call fast double @llvm.sqrt.f64(double %a)
ret double %1		ret double %1

; FAULT-LABEL: dsqrt:		; FAULT-LABEL: dsqrt:
; FAULT-NEXT: BB#0		; FAULT-NEXT: BB#0
; FAULT-NEXT: fsqrt		; FAULT-NEXT: fsqrt

; CHECK-LABEL: dsqrt:		; CHECK-LABEL: dsqrt:
; CHECK-NEXT: BB#0		; CHECK-NEXT: BB#0
; CHECK-NEXT: fmov		; CHECK-NEXT: frsqrte [[RA:d[0-7]]]
; CHECK-NEXT: frsqrte		; CHECK-NEXT: fmul [[RB:d[0-7]]], [[RA]], [[RA]]
		; CHECK-NEXT: frsqrts {{d[0-7](, d[0-7])?}}, [[RB]]
		; CHECK: fcmp d0, #0
}		}

define <2 x double> @d2sqrt(<2 x double> %a) #0 {		define <2 x double> @d2sqrt(<2 x double> %a) #0 {
%1 = tail call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> %a)		%1 = tail call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> %a)
ret <2 x double> %1		ret <2 x double> %1

; FAULT-LABEL: d2sqrt:		; FAULT-LABEL: d2sqrt:
; FAULT-NEXT: BB#0		; FAULT-NEXT: BB#0
; FAULT-NEXT: fsqrt		; FAULT-NEXT: fsqrt

; CHECK-LABEL: d2sqrt:		; CHECK-LABEL: d2sqrt:
; CHECK-NEXT: BB#0		; CHECK-NEXT: BB#0
; CHECK-NEXT: fmov		; CHECK-NEXT: frsqrte [[RA:v[0-7]\.2d]]
; CHECK-NEXT: mov		; CHECK-NEXT: fmul [[RB:v[0-7]\.2d]], [[RA]], [[RA]]
; CHECK-NEXT: frsqrte		; CHECK-NEXT: frsqrts {{v[0-7]\.2d(, v[0-7]\.2d)?}}, [[RB]]
		; CHECK: fcmeq {{v[0-7]\.2d, v0\.2d}}, #0
}		}

define <4 x double> @d4sqrt(<4 x double> %a) #0 {		define <4 x double> @d4sqrt(<4 x double> %a) #0 {
%1 = tail call fast <4 x double> @llvm.sqrt.v4f64(<4 x double> %a)		%1 = tail call fast <4 x double> @llvm.sqrt.v4f64(<4 x double> %a)
ret <4 x double> %1		ret <4 x double> %1

; FAULT-LABEL: d4sqrt:		; FAULT-LABEL: d4sqrt:
; FAULT-NEXT: BB#0		; FAULT-NEXT: BB#0
; FAULT-NEXT: fsqrt		; FAULT-NEXT: fsqrt
; FAULT-NEXT: fsqrt		; FAULT-NEXT: fsqrt

; CHECK-LABEL: d4sqrt:		; CHECK-LABEL: d4sqrt:
; CHECK-NEXT: BB#0		; CHECK-NEXT: BB#0
; CHECK-NEXT: fmov		; CHECK-NEXT: frsqrte [[RA:v[0-7]\.2d]]
; CHECK-NEXT: mov		; CHECK: fmul [[RB:v[0-7]\.2d]], [[RA]], [[RA]]
; CHECK-NEXT: frsqrte		; CHECK: frsqrts {{v[0-7]\.2d(, v[0-7]\.2d)?}}, [[RB]]
; CHECK: frsqrte		; CHECK: fcmeq {{v[0-7]\.2d, v[0-1]\.2d}}, #0
}		}

define float @frsqrt(float %a) #0 {		define float @frsqrt(float %a) #0 {
%1 = tail call fast float @llvm.sqrt.f32(float %a)		%1 = tail call fast float @llvm.sqrt.f32(float %a)
%2 = fdiv fast float 1.000000e+00, %1		%2 = fdiv fast float 1.000000e+00, %1
ret float %2		ret float %2

; FAULT-LABEL: frsqrt:		; FAULT-LABEL: frsqrt:
; FAULT-NEXT: BB#0		; FAULT-NEXT: BB#0
; FAULT-NEXT: fsqrt		; FAULT-NEXT: fsqrt

; CHECK-LABEL: frsqrt:		; CHECK-LABEL: frsqrt:
; CHECK-NEXT: BB#0		; CHECK-NEXT: BB#0
; CHECK-NEXT: fmov		; CHECK-NEXT: frsqrte [[RA:s[0-7]]]
; CHECK-NEXT: frsqrte		; CHECK-NEXT: fmul [[RB:s[0-7]]], [[RA]], [[RA]]
		; CHECK-NEXT: frsqrts {{s[0-7](, s[0-7])?}}, [[RB]]
		; CHECK-NOT: fcmp {{s[0-7]}}, #0
}		}

define <2 x float> @f2rsqrt(<2 x float> %a) #0 {		define <2 x float> @f2rsqrt(<2 x float> %a) #0 {
%1 = tail call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> %a)		%1 = tail call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> %a)
%2 = fdiv fast <2 x float> <float 1.000000e+00, float 1.000000e+00>, %1		%2 = fdiv fast <2 x float> <float 1.000000e+00, float 1.000000e+00>, %1
ret <2 x float> %2		ret <2 x float> %2

; FAULT-LABEL: f2rsqrt:		; FAULT-LABEL: f2rsqrt:
; FAULT-NEXT: BB#0		; FAULT-NEXT: BB#0
; FAULT-NEXT: fsqrt		; FAULT-NEXT: fsqrt

; CHECK-LABEL: f2rsqrt:		; CHECK-LABEL: f2rsqrt:
; CHECK-NEXT: BB#0		; CHECK-NEXT: BB#0
; CHECK-NEXT: fmov		; CHECK-NEXT: frsqrte [[RA:v[0-7]\.2s]]
; CHECK-NEXT: frsqrte		; CHECK-NEXT: fmul [[RB:v[0-7]\.2s]], [[RA]], [[RA]]
		; CHECK-NEXT: frsqrts {{v[0-7]\.2s(, v[0-7]\.2s)?}}, [[RB]]
		; CHECK-NOT: fcmeq {{v[0-7]\.2s, v0\.2s}}, #0
}		}

define <4 x float> @f4rsqrt(<4 x float> %a) #0 {		define <4 x float> @f4rsqrt(<4 x float> %a) #0 {
%1 = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)		%1 = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
%2 = fdiv fast <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %1		%2 = fdiv fast <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %1
ret <4 x float> %2		ret <4 x float> %2

; FAULT-LABEL: f4rsqrt:		; FAULT-LABEL: f4rsqrt:
; FAULT-NEXT: BB#0		; FAULT-NEXT: BB#0
; FAULT-NEXT: fsqrt		; FAULT-NEXT: fsqrt

; CHECK-LABEL: f4rsqrt:		; CHECK-LABEL: f4rsqrt:
; CHECK-NEXT: BB#0		; CHECK-NEXT: BB#0
; CHECK-NEXT: fmov		; CHECK-NEXT: frsqrte [[RA:v[0-7]\.4s]]
; CHECK-NEXT: frsqrte		; CHECK-NEXT: fmul [[RB:v[0-7]\.4s]], [[RA]], [[RA]]
		; CHECK-NEXT: frsqrts {{v[0-7]\.4s(, v[0-7]\.4s)?}}, [[RB]]
		; CHECK-NOT: fcmeq {{v[0-7]\.4s, v0\.4s}}, #0
}		}

define <8 x float> @f8rsqrt(<8 x float> %a) #0 {		define <8 x float> @f8rsqrt(<8 x float> %a) #0 {
%1 = tail call fast <8 x float> @llvm.sqrt.v8f32(<8 x float> %a)		%1 = tail call fast <8 x float> @llvm.sqrt.v8f32(<8 x float> %a)
%2 = fdiv fast <8 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %1		%2 = fdiv fast <8 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %1
ret <8 x float> %2		ret <8 x float> %2

; FAULT-LABEL: f8rsqrt:		; FAULT-LABEL: f8rsqrt:
; FAULT-NEXT: BB#0		; FAULT-NEXT: BB#0
; FAULT-NEXT: fsqrt		; FAULT-NEXT: fsqrt
; FAULT-NEXT: fsqrt		; FAULT-NEXT: fsqrt

; CHECK-LABEL: f8rsqrt:		; CHECK-LABEL: f8rsqrt:
; CHECK-NEXT: BB#0		; CHECK-NEXT: BB#0
; CHECK-NEXT: fmov		; CHECK-NEXT: frsqrte [[RA:v[0-7]\.4s]]
; CHECK-NEXT: frsqrte		; CHECK: fmul [[RB:v[0-7]\.4s]], [[RA]], [[RA]]
; CHECK: frsqrte		; CHECK: frsqrts {{v[0-7]\.4s(, v[0-7]\.4s)?}}, [[RB]]
		; CHECK-NOT: fcmeq {{v[0-7]\.4s, v0\.4s}}, #0
}		}

define double @drsqrt(double %a) #0 {		define double @drsqrt(double %a) #0 {
%1 = tail call fast double @llvm.sqrt.f64(double %a)		%1 = tail call fast double @llvm.sqrt.f64(double %a)
%2 = fdiv fast double 1.000000e+00, %1		%2 = fdiv fast double 1.000000e+00, %1
ret double %2		ret double %2

; FAULT-LABEL: drsqrt:		; FAULT-LABEL: drsqrt:
; FAULT-NEXT: BB#0		; FAULT-NEXT: BB#0
; FAULT-NEXT: fsqrt		; FAULT-NEXT: fsqrt

; CHECK-LABEL: drsqrt:		; CHECK-LABEL: drsqrt:
; CHECK-NEXT: BB#0		; CHECK-NEXT: BB#0
; CHECK-NEXT: fmov		; CHECK-NEXT: frsqrte [[RA:d[0-7]]]
; CHECK-NEXT: frsqrte		; CHECK-NEXT: fmul [[RB:d[0-7]]], [[RA]], [[RA]]
		; CHECK-NEXT: frsqrts {{d[0-7](, d[0-7])?}}, [[RB]]
		; CHECK-NOT: fcmp d0, #0
}		}

define <2 x double> @d2rsqrt(<2 x double> %a) #0 {		define <2 x double> @d2rsqrt(<2 x double> %a) #0 {
%1 = tail call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> %a)		%1 = tail call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> %a)
%2 = fdiv fast <2 x double> <double 1.000000e+00, double 1.000000e+00>, %1		%2 = fdiv fast <2 x double> <double 1.000000e+00, double 1.000000e+00>, %1
ret <2 x double> %2		ret <2 x double> %2

; FAULT-LABEL: d2rsqrt:		; FAULT-LABEL: d2rsqrt:
; FAULT-NEXT: BB#0		; FAULT-NEXT: BB#0
; FAULT-NEXT: fsqrt		; FAULT-NEXT: fsqrt

; CHECK-LABEL: d2rsqrt:		; CHECK-LABEL: d2rsqrt:
; CHECK-NEXT: BB#0		; CHECK-NEXT: BB#0
; CHECK-NEXT: fmov		; CHECK-NEXT: frsqrte [[RA:v[0-7]\.2d]]
; CHECK-NEXT: frsqrte		; CHECK-NEXT: fmul [[RB:v[0-7]\.2d]], [[RA]], [[RA]]
		; CHECK-NEXT: frsqrts {{v[0-7]\.2d(, v[0-7]\.2d)?}}, [[RB]]
		; CHECK-NOT: fcmeq {{v[0-7]\.2d, v0\.2d}}, #0
}		}

define <4 x double> @d4rsqrt(<4 x double> %a) #0 {		define <4 x double> @d4rsqrt(<4 x double> %a) #0 {
%1 = tail call fast <4 x double> @llvm.sqrt.v4f64(<4 x double> %a)		%1 = tail call fast <4 x double> @llvm.sqrt.v4f64(<4 x double> %a)
%2 = fdiv fast <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, %1		%2 = fdiv fast <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, %1
ret <4 x double> %2		ret <4 x double> %2

; FAULT-LABEL: d4rsqrt:		; FAULT-LABEL: d4rsqrt:
; FAULT-NEXT: BB#0		; FAULT-NEXT: BB#0
; FAULT-NEXT: fsqrt		; FAULT-NEXT: fsqrt
; FAULT-NEXT: fsqrt		; FAULT-NEXT: fsqrt

; CHECK-LABEL: d4rsqrt:		; CHECK-LABEL: d4rsqrt:
; CHECK-NEXT: BB#0		; CHECK-NEXT: BB#0
; CHECK-NEXT: fmov		; CHECK-NEXT: frsqrte [[RA:v[0-7]\.2d]]
; CHECK-NEXT: frsqrte		; CHECK: fmul [[RB:v[0-7]\.2d]], [[RA]], [[RA]]
; CHECK: frsqrte		; CHECK: frsqrts {{v[0-7]\.2d(, v[0-7]\.2d)?}}, [[RB]]
		; CHECK-NOT: fcmeq {{v[0-7]\.2d, v0\.2d}}, #0
}		}

attributes #0 = { nounwind "unsafe-fp-math"="true" }		attributes #0 = { nounwind "unsafe-fp-math"="true" }

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64] Compute the Newton series for reciprocals nativelyClosedPublic

Details

Diff Detail

Event Timeline