This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
llvm/
-
lib/Target/AMDGPU/
-
Target/
-
AMDGPU/
3/4
AMDGPUCodeGenPrepare.cpp
-
test/CodeGen/AMDGPU/
-
CodeGen/
-
AMDGPU/
-
fract-match.ll

Differential D150011

AMDGPU: Pattern match fract instructions in AMDGPUCodeGenPrepare
ClosedPublic

Authored by arsenm on May 5 2023, 5:31 PM.

Download Raw Diff

Details

Reviewers

foad
b-sumner
Pierre-vh

Group Reviewers

Restricted Project

Summary

This will allow eliminating the intrinsic uses in the device
libraries, which will remove a subtarget dependency on the f16
version of the intrinsic.

We previously had some wrong patterns for this under unsafe math
which I've removed.

Do it in IR partially to take advantage of the much better isKnownNeverNaN
handling, and partially out of laziness to avoid repeating this in the DAG
and GlobalISel path. Plus I think this should be done much earlier. Ideally
this would be in InstCombine, but you can't introduce target intrinsics
from a generic instruction rooted pattern.

Diff Detail

Event Timeline

arsenm created this revision.May 5 2023, 5:31 PM

Herald added a project: Restricted Project. · View Herald TranscriptMay 5 2023, 5:31 PM

Herald added subscribers: kosarev, StephenFan, kerbowa and 6 others. · View Herald Transcript

arsenm requested review of this revision.May 5 2023, 5:31 PM

Herald added a project: Restricted Project. · View Herald TranscriptMay 5 2023, 5:31 PM

Herald added a subscriber: wdng. · View Herald Transcript

Harbormaster completed remote builds in B230354: Diff 520009.May 5 2023, 5:31 PM

I just have some minor nits. I can't comment on the correctness of the logic so I will leave it up to someone with more experience w.r.t FP ops to approve

We previously had some wrong patterns for this under unsafe math which I've removed.

Would it be possible to split that in a separate patch? Without that it's a bit difficult to tell if a test changed because of the combine, or the pattern.

llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
228	nit: isn't `IRBuilderBase` better?
1423	(optional) small nit: maybe use `!match` and an early return to reduce nesting? Could also do `!Fract` and early return.
1680	nit: use `unsigned` as it's a size type?

arsenm mentioned this in D150203: AMDGPU: Drop broken fast math patterns for fract matching.May 9 2023, 8:26 AM

arsenm updated this revision to Diff 520714.May 9 2023, 8:38 AM

arsenm marked 2 inline comments as done.

arsenm added inline comments.

llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
228	Don't see the point of making the type name longer here, it's not going to use any other IRBuilder

Harbormaster completed remote builds in B230883: Diff 520714.May 9 2023, 8:39 AM

I don't have any other comments and I don't see any obvious regressions so LGTM. Though, it might be a good idea to wait for another reviewer with more experience with FP ops to comment before landing.

This revision is now accepted and ready to land.May 17 2023, 2:46 AM

0d0ed9a355ff0d4ca7268084fb3990c402e07641

Revision Contents

Path

Size

llvm/

lib/

Target/

AMDGPU/

AMDGPUCodeGenPrepare.cpp

141 lines

test/

CodeGen/

AMDGPU/

fract-match.ll

1248 lines

Diff 520714

llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp

Show All 11 Lines
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

#include "AMDGPU.h"		#include "AMDGPU.h"
#include "AMDGPUTargetMachine.h"		#include "AMDGPUTargetMachine.h"
#include "SIModeRegisterDefaults.h"		#include "SIModeRegisterDefaults.h"
#include "llvm/Analysis/AssumptionCache.h"		#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/ConstantFolding.h"		#include "llvm/Analysis/ConstantFolding.h"
		#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/UniformityAnalysis.h"		#include "llvm/Analysis/UniformityAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"		#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/TargetPassConfig.h"		#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Dominators.h"		#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"		#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"		#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"		#include "llvm/IR/IntrinsicsAMDGPU.h"
		#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"		#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"		#include "llvm/Pass.h"
#include "llvm/Support/KnownBits.h"		#include "llvm/Support/KnownBits.h"
#include "llvm/Transforms/Utils/IntegerDivision.h"		#include "llvm/Transforms/Utils/IntegerDivision.h"
		#include "llvm/Transforms/Utils/Local.h"

#define DEBUG_TYPE "amdgpu-codegenprepare"		#define DEBUG_TYPE "amdgpu-codegenprepare"

using namespace llvm;		using namespace llvm;
		using namespace llvm::PatternMatch;

namespace {		namespace {

static cl::opt<bool> WidenLoads(		static cl::opt<bool> WidenLoads(
"amdgpu-codegenprepare-widen-constant-loads",		"amdgpu-codegenprepare-widen-constant-loads",
cl::desc("Widen sub-dword constant address space loads in AMDGPUCodeGenPrepare"),		cl::desc("Widen sub-dword constant address space loads in AMDGPUCodeGenPrepare"),
cl::ReallyHidden,		cl::ReallyHidden,
cl::init(false));		cl::init(false));
Show All 39 Lines	static cl::opt<bool> DisableIDivExpand(
"amdgpu-codegenprepare-disable-idiv-expansion",		"amdgpu-codegenprepare-disable-idiv-expansion",
cl::desc("Prevent expanding integer division in AMDGPUCodeGenPrepare"),		cl::desc("Prevent expanding integer division in AMDGPUCodeGenPrepare"),
cl::ReallyHidden,		cl::ReallyHidden,
cl::init(false));		cl::init(false));

class AMDGPUCodeGenPrepare : public FunctionPass,		class AMDGPUCodeGenPrepare : public FunctionPass,
public InstVisitor<AMDGPUCodeGenPrepare, bool> {		public InstVisitor<AMDGPUCodeGenPrepare, bool> {
const GCNSubtarget *ST = nullptr;		const GCNSubtarget *ST = nullptr;
		const TargetLibraryInfo *TLInfo = nullptr;
AssumptionCache *AC = nullptr;		AssumptionCache *AC = nullptr;
DominatorTree *DT = nullptr;		DominatorTree *DT = nullptr;
UniformityInfo *UA = nullptr;		UniformityInfo *UA = nullptr;
Module *Mod = nullptr;		Module *Mod = nullptr;
const DataLayout *DL = nullptr;		const DataLayout *DL = nullptr;
bool HasUnsafeFPMath = false;		bool HasUnsafeFPMath = false;
bool HasFP32Denormals = false;		bool HasFP32Denormals = false;

Show All 16 Lines	class AMDGPUCodeGenPrepare : public FunctionPass,
/// \returns True if the condition of 'select' operation \p I comes from a		/// \returns True if the condition of 'select' operation \p I comes from a
/// signed 'icmp' operation, false otherwise.		/// signed 'icmp' operation, false otherwise.
bool isSigned(const SelectInst &I) const;		bool isSigned(const SelectInst &I) const;

/// \returns True if type \p T needs to be promoted to 32 bit integer type,		/// \returns True if type \p T needs to be promoted to 32 bit integer type,
/// false otherwise.		/// false otherwise.
bool needsPromotionToI32(const Type *T) const;		bool needsPromotionToI32(const Type *T) const;

		/// Return true if \p T is a legal scalar floating point type.
		bool isLegalFloatingTy(const Type *T) const;

/// Promotes uniform binary operation \p I to equivalent 32 bit binary		/// Promotes uniform binary operation \p I to equivalent 32 bit binary
/// operation.		/// operation.
///		///
/// \details \p I's base element bit width must be greater than 1 and less		/// \details \p I's base element bit width must be greater than 1 and less
/// than or equal 16. Promotion is done by sign or zero extending operands to		/// than or equal 16. Promotion is done by sign or zero extending operands to
/// 32 bits, replacing \p I with equivalent 32 bit binary operation, and		/// 32 bits, replacing \p I with equivalent 32 bit binary operation, and
/// truncating the result of 32 bit binary operation back to \p I's original		/// truncating the result of 32 bit binary operation back to \p I's original
/// type. Division operation is not promoted.		/// type. Division operation is not promoted.
▲ Show 20 Lines • Show All 81 Lines • ▼ Show 20 Lines	class AMDGPUCodeGenPrepare : public FunctionPass,
/// \details \p Widen scalar load for uniform, small type loads from constant		/// \details \p Widen scalar load for uniform, small type loads from constant
// memory / to a full 32-bits and then truncate the input to allow a scalar		// memory / to a full 32-bits and then truncate the input to allow a scalar
// load instead of a vector load.		// load instead of a vector load.
//		//
/// \returns True.		/// \returns True.

bool canWidenScalarExtLoad(LoadInst &I) const;		bool canWidenScalarExtLoad(LoadInst &I) const;

		Value *matchFractPat(IntrinsicInst &I);
		Value applyFractPat(IRBuilder<> &Builder, Value FractArg);
		Pierre-vhUnsubmitted Not Done Reply Inline Actions nit: isn't `IRBuilderBase` better? Pierre-vh: nit: isn't `IRBuilderBase` better?
		arsenmAuthorUnsubmitted Done Reply Inline Actions Don't see the point of making the type name longer here, it's not going to use any other IRBuilder arsenm: Don't see the point of making the type name longer here, it's not going to use any other…

public:		public:
static char ID;		static char ID;

AMDGPUCodeGenPrepare() : FunctionPass(ID) {}		AMDGPUCodeGenPrepare() : FunctionPass(ID) {}

bool visitFDiv(BinaryOperator &I);		bool visitFDiv(BinaryOperator &I);
bool visitXor(BinaryOperator &I);		bool visitXor(BinaryOperator &I);

bool visitInstruction(Instruction &I) { return false; }		bool visitInstruction(Instruction &I) { return false; }
bool visitBinaryOperator(BinaryOperator &I);		bool visitBinaryOperator(BinaryOperator &I);
bool visitLoadInst(LoadInst &I);		bool visitLoadInst(LoadInst &I);
bool visitICmpInst(ICmpInst &I);		bool visitICmpInst(ICmpInst &I);
bool visitSelectInst(SelectInst &I);		bool visitSelectInst(SelectInst &I);
bool visitPHINode(PHINode &I);		bool visitPHINode(PHINode &I);

bool visitIntrinsicInst(IntrinsicInst &I);		bool visitIntrinsicInst(IntrinsicInst &I);
bool visitBitreverseIntrinsicInst(IntrinsicInst &I);		bool visitBitreverseIntrinsicInst(IntrinsicInst &I);
		bool visitMinNum(IntrinsicInst &I);

bool doInitialization(Module &M) override;		bool doInitialization(Module &M) override;
bool runOnFunction(Function &F) override;		bool runOnFunction(Function &F) override;

StringRef getPassName() const override { return "AMDGPU IR optimizations"; }		StringRef getPassName() const override { return "AMDGPU IR optimizations"; }

void getAnalysisUsage(AnalysisUsage &AU) const override {		void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();		AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<UniformityInfoWrapperPass>();		AU.addRequired<UniformityInfoWrapperPass>();
		AU.addRequired<TargetLibraryInfoWrapperPass>();

// FIXME: Division expansion needs to preserve the dominator tree.		// FIXME: Division expansion needs to preserve the dominator tree.
if (!ExpandDiv64InIR)		if (!ExpandDiv64InIR)
AU.setPreservesAll();		AU.setPreservesAll();
}		}
};		};

} // end anonymous namespace		} // end anonymous namespace
Show All 39 Lines	if (ST->hasVOP3PInsts())
return false;		return false;

return needsPromotionToI32(VT->getElementType());		return needsPromotionToI32(VT->getElementType());
}		}

return false;		return false;
}		}

		bool AMDGPUCodeGenPrepare::isLegalFloatingTy(const Type *Ty) const {
		return Ty->isFloatTy() \|\| Ty->isDoubleTy() \|\|
		(Ty->isHalfTy() && ST->has16BitInsts());
		}

// Return true if the op promoted to i32 should have nsw set.		// Return true if the op promoted to i32 should have nsw set.
static bool promotedOpIsNSW(const Instruction &I) {		static bool promotedOpIsNSW(const Instruction &I) {
switch (I.getOpcode()) {		switch (I.getOpcode()) {
case Instruction::Shl:		case Instruction::Shl:
case Instruction::Add:		case Instruction::Add:
case Instruction::Sub:		case Instruction::Sub:
return true;		return true;
case Instruction::Mul:		case Instruction::Mul:
▲ Show 20 Lines • Show All 1,076 Lines • ▼ Show 20 Lines	bool AMDGPUCodeGenPrepare::visitICmpInst(ICmpInst &I) {
if (ST->has16BitInsts() && needsPromotionToI32(I.getOperand(0)->getType()) &&		if (ST->has16BitInsts() && needsPromotionToI32(I.getOperand(0)->getType()) &&
UA->isUniform(&I))		UA->isUniform(&I))
Changed \|= promoteUniformOpToI32(I);		Changed \|= promoteUniformOpToI32(I);

return Changed;		return Changed;
}		}

bool AMDGPUCodeGenPrepare::visitSelectInst(SelectInst &I) {		bool AMDGPUCodeGenPrepare::visitSelectInst(SelectInst &I) {
bool Changed = false;		if (ST->has16BitInsts() && needsPromotionToI32(I.getType())) {
		if (UA->isUniform(&I))
		return promoteUniformOpToI32(I);
		return false;
		}

if (ST->has16BitInsts() && needsPromotionToI32(I.getType()) &&		Value *Cond = I.getCondition();
UA->isUniform(&I))		Value *TrueVal = I.getTrueValue();
Changed \|= promoteUniformOpToI32(I);		Value *FalseVal = I.getFalseValue();
		Value *CmpVal;
		FCmpInst::Predicate Pred;

return Changed;		// Match fract pattern with nan check.
		if (!match(Cond, m_FCmp(Pred, m_Value(CmpVal), m_NonNaN())))
		Pierre-vhUnsubmitted Done Reply Inline Actions (optional) small nit: maybe use `!match` and an early return to reduce nesting? Could also do `!Fract` and early return. Pierre-vh: (optional) small nit: maybe use `!match` and an early return to reduce nesting? Could also do `!
		return false;

		IRBuilder<> Builder(&I);
		Builder.setFastMathFlags(I.getFastMathFlags());

		auto *IITrue = dyn_cast<IntrinsicInst>(TrueVal);
		auto *IIFalse = dyn_cast<IntrinsicInst>(FalseVal);

		Value *Fract = nullptr;
		if (Pred == FCmpInst::FCMP_UNO && TrueVal == CmpVal && IIFalse &&
		CmpVal == matchFractPat(*IIFalse)) {
		// isnan(x) ? x : fract(x)
		Fract = applyFractPat(Builder, CmpVal);
		} else if (Pred == FCmpInst::FCMP_ORD && FalseVal == CmpVal && IITrue &&
		CmpVal == matchFractPat(*IITrue)) {
		// !isnan(x) ? fract(x) : x
		Fract = applyFractPat(Builder, CmpVal);
		} else
		return false;

		Fract->takeName(&I);
		I.replaceAllUsesWith(Fract);
		RecursivelyDeleteTriviallyDeadInstructions(&I, TLInfo);
		return true;
}		}

// Helper for breaking large PHIs that returns true when an extractelement on V		// Helper for breaking large PHIs that returns true when an extractelement on V
// is likely to be folded away by the DAG combiner.		// is likely to be folded away by the DAG combiner.
static bool isInterestingPHIIncomingValue(Value V, FixedVectorType FVT) {		static bool isInterestingPHIIncomingValue(Value V, FixedVectorType FVT) {
InsertElementInst *IE = dyn_cast<InsertElementInst>(V);		InsertElementInst *IE = dyn_cast<InsertElementInst>(V);

// Constants & InsertElements chains are interesting.		// Constants & InsertElements chains are interesting.
▲ Show 20 Lines • Show All 152 Lines • ▼ Show 20 Lines	bool AMDGPUCodeGenPrepare::visitPHINode(PHINode &I) {
I.eraseFromParent();		I.eraseFromParent();
return true;		return true;
}		}

bool AMDGPUCodeGenPrepare::visitIntrinsicInst(IntrinsicInst &I) {		bool AMDGPUCodeGenPrepare::visitIntrinsicInst(IntrinsicInst &I) {
switch (I.getIntrinsicID()) {		switch (I.getIntrinsicID()) {
case Intrinsic::bitreverse:		case Intrinsic::bitreverse:
return visitBitreverseIntrinsicInst(I);		return visitBitreverseIntrinsicInst(I);
		case Intrinsic::minnum:
		return visitMinNum(I);
default:		default:
return false;		return false;
}		}
}		}

bool AMDGPUCodeGenPrepare::visitBitreverseIntrinsicInst(IntrinsicInst &I) {		bool AMDGPUCodeGenPrepare::visitBitreverseIntrinsicInst(IntrinsicInst &I) {
bool Changed = false;		bool Changed = false;

if (ST->has16BitInsts() && needsPromotionToI32(I.getType()) &&		if (ST->has16BitInsts() && needsPromotionToI32(I.getType()) &&
UA->isUniform(&I))		UA->isUniform(&I))
Changed \|= promoteUniformBitreverseToI32(I);		Changed \|= promoteUniformBitreverseToI32(I);

return Changed;		return Changed;
}		}

		/// Match non-nan fract pattern.
		/// minnum(fsub(x, floor(x)), nextafter(1.0, -1.0)
		///
		/// If fract is a useful instruction for the subtarget. Does not account for the
		/// nan handling; the instruction has a nan check on the input value.
		Value *AMDGPUCodeGenPrepare::matchFractPat(IntrinsicInst &I) {
		if (ST->hasFractBug())
		return nullptr;

		if (I.getIntrinsicID() != Intrinsic::minnum)
		return nullptr;

		Type *Ty = I.getType();
		if (!isLegalFloatingTy(Ty->getScalarType()))
		return nullptr;

		Value *Arg0 = I.getArgOperand(0);
		Value *Arg1 = I.getArgOperand(1);

		const APFloat *C;
		if (!match(Arg1, m_APFloat(C)))
		return nullptr;

		APFloat One(1.0);
		bool LosesInfo;
		One.convert(C->getSemantics(), APFloat::rmNearestTiesToEven, &LosesInfo);

		// Match nextafter(1.0, -1)
		One.next(true);
		if (One != *C)
		return nullptr;

		Value *FloorSrc;
		if (match(Arg0, m_FSub(m_Value(FloorSrc),
		m_Intrinsic<Intrinsic::floor>(m_Deferred(FloorSrc)))))
		return FloorSrc;
		return nullptr;
		}

		Value *AMDGPUCodeGenPrepare::applyFractPat(IRBuilder<> &Builder,
		Value *FractArg) {
		SmallVector<Value *, 4> FractVals;
		extractValues(Builder, FractVals, FractArg);

		SmallVector<Value *, 4> ResultVals(FractVals.size());

		Type *Ty = FractArg->getType()->getScalarType();
		for (unsigned I = 0, E = FractVals.size(); I != E; ++I) {
		Pierre-vhUnsubmitted Done Reply Inline Actions nit: use `unsigned` as it's a size type? Pierre-vh: nit: use `unsigned` as it's a size type?
		ResultVals[I] =
		Builder.CreateIntrinsic(Intrinsic::amdgcn_fract, {Ty}, {FractVals[I]});
		}

		return insertValues(Builder, FractArg->getType(), ResultVals);
		}

		bool AMDGPUCodeGenPrepare::visitMinNum(IntrinsicInst &I) {
		Value *FractArg = matchFractPat(I);
		if (!FractArg)
		return false;

		// Match pattern for fract intrinsic in contexts where the nan check has been
		// optimized out (and hope the knowledge the source can't be nan wasn't lost).
		if (!I.hasNoNaNs() && !isKnownNeverNaN(FractArg, TLInfo))
		return false;

		IRBuilder<> Builder(&I);
		FastMathFlags FMF = I.getFastMathFlags();
		FMF.setNoNaNs();
		Builder.setFastMathFlags(FMF);

		Value *Fract = applyFractPat(Builder, FractArg);
		Fract->takeName(&I);
		I.replaceAllUsesWith(Fract);

		RecursivelyDeleteTriviallyDeadInstructions(&I, TLInfo);
		return true;
		}

bool AMDGPUCodeGenPrepare::doInitialization(Module &M) {		bool AMDGPUCodeGenPrepare::doInitialization(Module &M) {
Mod = &M;		Mod = &M;
DL = &Mod->getDataLayout();		DL = &Mod->getDataLayout();
return false;		return false;
}		}

bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) {		bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) {
if (skipFunction(F))		if (skipFunction(F))
return false;		return false;

auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();		auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
if (!TPC)		if (!TPC)
return false;		return false;

const AMDGPUTargetMachine &TM = TPC->getTM<AMDGPUTargetMachine>();		const AMDGPUTargetMachine &TM = TPC->getTM<AMDGPUTargetMachine>();
ST = &TM.getSubtarget<GCNSubtarget>(F);		ST = &TM.getSubtarget<GCNSubtarget>(F);
		TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);		AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
UA = &getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();		UA = &getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();

auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();		auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
DT = DTWP ? &DTWP->getDomTree() : nullptr;		DT = DTWP ? &DTWP->getDomTree() : nullptr;

HasUnsafeFPMath = hasUnsafeFPMath(F);		HasUnsafeFPMath = hasUnsafeFPMath(F);

Show All 25 Lines	bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) {
}		}

return MadeChange;		return MadeChange;
}		}

INITIALIZE_PASS_BEGIN(AMDGPUCodeGenPrepare, DEBUG_TYPE,		INITIALIZE_PASS_BEGIN(AMDGPUCodeGenPrepare, DEBUG_TYPE,
"AMDGPU IR optimizations", false, false)		"AMDGPU IR optimizations", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)		INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
		INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)		INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
INITIALIZE_PASS_END(AMDGPUCodeGenPrepare, DEBUG_TYPE, "AMDGPU IR optimizations",		INITIALIZE_PASS_END(AMDGPUCodeGenPrepare, DEBUG_TYPE, "AMDGPU IR optimizations",
false, false)		false, false)

char AMDGPUCodeGenPrepare::ID = 0;		char AMDGPUCodeGenPrepare::ID = 0;

FunctionPass *llvm::createAMDGPUCodeGenPreparePass() {		FunctionPass *llvm::createAMDGPUCodeGenPreparePass() {
return new AMDGPUCodeGenPrepare();		return new AMDGPUCodeGenPrepare();
}		}

llvm/test/CodeGen/AMDGPU/fract-match.ll

Show All 15 Lines
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s \| FileCheck -check-prefixes=GCN,GFX11 %s		; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s \| FileCheck -check-prefixes=GCN,GFX11 %s

; Test patterns to match v_fract_* instructions.		; Test patterns to match v_fract_* instructions.

; Expansion as it appears in the library with the extra output for		; Expansion as it appears in the library with the extra output for
; floor. We can fold in the nan check into the instruction, but the		; floor. We can fold in the nan check into the instruction, but the
; inf check must remain.		; inf check must remain.
define float @safe_math_fract_f32(float %x, ptr addrspace(1) nocapture writeonly %ip) {		define float @safe_math_fract_f32(float %x, ptr addrspace(1) nocapture writeonly %ip) {
; IR-LABEL: define float @safe_math_fract_f32		; GFX6-IR-LABEL: define float @safe_math_fract_f32
; IR-SAME: (float [[X:%.]], ptr addrspace(1) nocapture writeonly [[IP:%.]]) #[[ATTR0:[0-9]+]] {		; GFX6-IR-SAME: (float [[X:%.]], ptr addrspace(1) nocapture writeonly [[IP:%.]]) #[[ATTR0:[0-9]+]] {
; IR-NEXT: entry:		; GFX6-IR-NEXT: entry:
; IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])		; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
; IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]		; GFX6-IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
; IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)		; GFX6-IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
; IR-NEXT: [[UNO:%.*]] = fcmp uno float [[X]], 0.000000e+00		; GFX6-IR-NEXT: [[UNO:%.*]] = fcmp uno float [[X]], 0.000000e+00
; IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], float [[X]], float [[MIN]]		; GFX6-IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], float [[X]], float [[MIN]]
; IR-NEXT: [[FABS:%.*]] = tail call float @llvm.fabs.f32(float [[X]])		; GFX6-IR-NEXT: [[FABS:%.*]] = tail call float @llvm.fabs.f32(float [[X]])
; IR-NEXT: [[CMPINF:%.*]] = fcmp oeq float [[FABS]], 0x7FF0000000000000		; GFX6-IR-NEXT: [[CMPINF:%.*]] = fcmp oeq float [[FABS]], 0x7FF0000000000000
; IR-NEXT: [[COND6:%.*]] = select i1 [[CMPINF]], float 0.000000e+00, float [[COND]]		; GFX6-IR-NEXT: [[COND6:%.*]] = select i1 [[CMPINF]], float 0.000000e+00, float [[COND]]
; IR-NEXT: store float [[FLOOR]], ptr addrspace(1) [[IP]], align 4		; GFX6-IR-NEXT: store float [[FLOOR]], ptr addrspace(1) [[IP]], align 4
; IR-NEXT: ret float [[COND6]]		; GFX6-IR-NEXT: ret float [[COND6]]
		;
		; IR-FRACT-LABEL: define float @safe_math_fract_f32
		; IR-FRACT-SAME: (float [[X:%.]], ptr addrspace(1) nocapture writeonly [[IP:%.]]) #[[ATTR0:[0-9]+]] {
		; IR-FRACT-NEXT: entry:
		; IR-FRACT-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
		; IR-FRACT-NEXT: [[COND:%.*]] = call float @llvm.amdgcn.fract.f32(float [[X]])
		; IR-FRACT-NEXT: [[FABS:%.*]] = tail call float @llvm.fabs.f32(float [[X]])
		; IR-FRACT-NEXT: [[CMPINF:%.*]] = fcmp oeq float [[FABS]], 0x7FF0000000000000
		; IR-FRACT-NEXT: [[COND6:%.*]] = select i1 [[CMPINF]], float 0.000000e+00, float [[COND]]
		; IR-FRACT-NEXT: store float [[FLOOR]], ptr addrspace(1) [[IP]], align 4
		; IR-FRACT-NEXT: ret float [[COND6]]
;		;
; GFX6-LABEL: safe_math_fract_f32:		; GFX6-LABEL: safe_math_fract_f32:
; GFX6: ; %bb.0: ; %entry		; GFX6: ; %bb.0: ; %entry
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_floor_f32_e32 v3, v0		; GFX6-NEXT: v_floor_f32_e32 v3, v0
; GFX6-NEXT: v_sub_f32_e32 v4, v0, v3		; GFX6-NEXT: v_sub_f32_e32 v4, v0, v3
; GFX6-NEXT: v_min_f32_e32 v4, 0x3f7fffff, v4		; GFX6-NEXT: v_min_f32_e32 v4, 0x3f7fffff, v4
; GFX6-NEXT: v_cmp_u_f32_e32 vcc, v0, v0		; GFX6-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
; GFX6-NEXT: s_mov_b32 s8, 0x7f800000		; GFX6-NEXT: s_mov_b32 s8, 0x7f800000
; GFX6-NEXT: s_mov_b32 s6, 0		; GFX6-NEXT: s_mov_b32 s6, 0
; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc		; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc
; GFX6-NEXT: v_cmp_neq_f32_e64 vcc, \|v0\|, s8		; GFX6-NEXT: v_cmp_neq_f32_e64 vcc, \|v0\|, s8
; GFX6-NEXT: s_mov_b32 s7, 0xf000		; GFX6-NEXT: s_mov_b32 s7, 0xf000
; GFX6-NEXT: s_mov_b32 s4, s6		; GFX6-NEXT: s_mov_b32 s4, s6
; GFX6-NEXT: s_mov_b32 s5, s6		; GFX6-NEXT: s_mov_b32 s5, s6
; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc		; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
; GFX6-NEXT: buffer_store_dword v3, v[1:2], s[4:7], 0 addr64		; GFX6-NEXT: buffer_store_dword v3, v[1:2], s[4:7], 0 addr64
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)		; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT: s_setpc_b64 s[30:31]		; GFX6-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX7-LABEL: safe_math_fract_f32:		; GFX7-LABEL: safe_math_fract_f32:
; GFX7: ; %bb.0: ; %entry		; GFX7: ; %bb.0: ; %entry
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_floor_f32_e32 v3, v0
; GFX7-NEXT: v_sub_f32_e32 v4, v0, v3
; GFX7-NEXT: v_min_f32_e32 v4, 0x3f7fffff, v4
; GFX7-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
; GFX7-NEXT: s_mov_b32 s8, 0x7f800000		; GFX7-NEXT: s_mov_b32 s8, 0x7f800000
; GFX7-NEXT: s_mov_b32 s6, 0		; GFX7-NEXT: s_mov_b32 s6, 0
; GFX7-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc		; GFX7-NEXT: v_fract_f32_e32 v4, v0
; GFX7-NEXT: v_cmp_neq_f32_e64 vcc, \|v0\|, s8		; GFX7-NEXT: v_cmp_neq_f32_e64 vcc, \|v0\|, s8
; GFX7-NEXT: s_mov_b32 s7, 0xf000		; GFX7-NEXT: s_mov_b32 s7, 0xf000
; GFX7-NEXT: s_mov_b32 s4, s6		; GFX7-NEXT: s_mov_b32 s4, s6
; GFX7-NEXT: s_mov_b32 s5, s6		; GFX7-NEXT: s_mov_b32 s5, s6
		; GFX7-NEXT: v_floor_f32_e32 v3, v0
; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc		; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
; GFX7-NEXT: buffer_store_dword v3, v[1:2], s[4:7], 0 addr64		; GFX7-NEXT: buffer_store_dword v3, v[1:2], s[4:7], 0 addr64
; GFX7-NEXT: s_waitcnt vmcnt(0)		; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: s_setpc_b64 s[30:31]		; GFX7-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX8-LABEL: safe_math_fract_f32:		; GFX8-LABEL: safe_math_fract_f32:
; GFX8: ; %bb.0: ; %entry		; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_floor_f32_e32 v3, v0
; GFX8-NEXT: v_sub_f32_e32 v4, v0, v3
; GFX8-NEXT: v_min_f32_e32 v4, 0x3f7fffff, v4
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
; GFX8-NEXT: s_mov_b32 s4, 0x7f800000		; GFX8-NEXT: s_mov_b32 s4, 0x7f800000
; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc		; GFX8-NEXT: v_fract_f32_e32 v4, v0
; GFX8-NEXT: v_cmp_neq_f32_e64 vcc, \|v0\|, s4		; GFX8-NEXT: v_cmp_neq_f32_e64 vcc, \|v0\|, s4
		; GFX8-NEXT: v_floor_f32_e32 v3, v0
; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc		; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
; GFX8-NEXT: global_store_dword v[1:2], v3, off		; GFX8-NEXT: global_store_dword v[1:2], v3, off
; GFX8-NEXT: s_waitcnt vmcnt(0)		; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]		; GFX8-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX11-LABEL: safe_math_fract_f32:		; GFX11-LABEL: safe_math_fract_f32:
; GFX11: ; %bb.0: ; %entry		; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0		; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_floor_f32_e32 v3, v0		; GFX11-NEXT: v_fract_f32_e32 v3, v0
; GFX11-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) \| instskip(SKIP_2) \| instid1(VALU_DEP_1)
; GFX11-NEXT: v_sub_f32_e32 v4, v0, v3
; GFX11-NEXT: global_store_b32 v[1:2], v3, off
; GFX11-NEXT: v_min_f32_e32 v4, 0x3f7fffff, v4
; GFX11-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc_lo
; GFX11-NEXT: v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, \|v0\|		; GFX11-NEXT: v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, \|v0\|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)		; GFX11-NEXT: v_floor_f32_e32 v4, v0
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc_lo		; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
		; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc_lo
		; GFX11-NEXT: global_store_b32 v[1:2], v4, off
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0		; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]		; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:		entry:
%floor = tail call float @llvm.floor.f32(float %x)		%floor = tail call float @llvm.floor.f32(float %x)
%sub = fsub float %x, %floor		%sub = fsub float %x, %floor
%min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)		%min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
%uno = fcmp uno float %x, 0.000000e+00		%uno = fcmp uno float %x, 0.000000e+00
%cond = select i1 %uno, float %x, float %min		%cond = select i1 %uno, float %x, float %min
%fabs = tail call float @llvm.fabs.f32(float %x)		%fabs = tail call float @llvm.fabs.f32(float %x)
%cmpinf = fcmp oeq float %fabs, 0x7FF0000000000000		%cmpinf = fcmp oeq float %fabs, 0x7FF0000000000000
%cond6 = select i1 %cmpinf, float 0.000000e+00, float %cond		%cond6 = select i1 %cmpinf, float 0.000000e+00, float %cond
store float %floor, ptr addrspace(1) %ip, align 4		store float %floor, ptr addrspace(1) %ip, align 4
ret float %cond6		ret float %cond6
}		}

define float @safe_math_fract_f32_noinf_check(float %x, ptr addrspace(1) nocapture writeonly %ip) {		define float @safe_math_fract_f32_noinf_check(float %x, ptr addrspace(1) nocapture writeonly %ip) {
; IR-LABEL: define float @safe_math_fract_f32_noinf_check		; GFX6-IR-LABEL: define float @safe_math_fract_f32_noinf_check
; IR-SAME: (float [[X:%.]], ptr addrspace(1) nocapture writeonly [[IP:%.]]) #[[ATTR0]] {		; GFX6-IR-SAME: (float [[X:%.]], ptr addrspace(1) nocapture writeonly [[IP:%.]]) #[[ATTR0]] {
; IR-NEXT: entry:		; GFX6-IR-NEXT: entry:
; IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])		; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
; IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]		; GFX6-IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
; IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)		; GFX6-IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
; IR-NEXT: [[UNO:%.*]] = fcmp uno float [[X]], 0.000000e+00		; GFX6-IR-NEXT: [[UNO:%.*]] = fcmp uno float [[X]], 0.000000e+00
; IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], float [[X]], float [[MIN]]		; GFX6-IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], float [[X]], float [[MIN]]
; IR-NEXT: store float [[FLOOR]], ptr addrspace(1) [[IP]], align 4		; GFX6-IR-NEXT: store float [[FLOOR]], ptr addrspace(1) [[IP]], align 4
; IR-NEXT: ret float [[COND]]		; GFX6-IR-NEXT: ret float [[COND]]
		;
		; IR-FRACT-LABEL: define float @safe_math_fract_f32_noinf_check
		; IR-FRACT-SAME: (float [[X:%.]], ptr addrspace(1) nocapture writeonly [[IP:%.]]) #[[ATTR0]] {
		; IR-FRACT-NEXT: entry:
		; IR-FRACT-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
		; IR-FRACT-NEXT: [[COND:%.*]] = call float @llvm.amdgcn.fract.f32(float [[X]])
		; IR-FRACT-NEXT: store float [[FLOOR]], ptr addrspace(1) [[IP]], align 4
		; IR-FRACT-NEXT: ret float [[COND]]
;		;
; GFX6-LABEL: safe_math_fract_f32_noinf_check:		; GFX6-LABEL: safe_math_fract_f32_noinf_check:
; GFX6: ; %bb.0: ; %entry		; GFX6: ; %bb.0: ; %entry
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_floor_f32_e32 v3, v0		; GFX6-NEXT: v_floor_f32_e32 v3, v0
; GFX6-NEXT: v_sub_f32_e32 v4, v0, v3		; GFX6-NEXT: v_sub_f32_e32 v4, v0, v3
; GFX6-NEXT: s_mov_b32 s6, 0		; GFX6-NEXT: s_mov_b32 s6, 0
; GFX6-NEXT: v_min_f32_e32 v4, 0x3f7fffff, v4		; GFX6-NEXT: v_min_f32_e32 v4, 0x3f7fffff, v4
; GFX6-NEXT: v_cmp_u_f32_e32 vcc, v0, v0		; GFX6-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
; GFX6-NEXT: s_mov_b32 s7, 0xf000		; GFX6-NEXT: s_mov_b32 s7, 0xf000
; GFX6-NEXT: s_mov_b32 s4, s6		; GFX6-NEXT: s_mov_b32 s4, s6
; GFX6-NEXT: s_mov_b32 s5, s6		; GFX6-NEXT: s_mov_b32 s5, s6
; GFX6-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc		; GFX6-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
; GFX6-NEXT: buffer_store_dword v3, v[1:2], s[4:7], 0 addr64		; GFX6-NEXT: buffer_store_dword v3, v[1:2], s[4:7], 0 addr64
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)		; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT: s_setpc_b64 s[30:31]		; GFX6-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX7-LABEL: safe_math_fract_f32_noinf_check:		; GFX7-LABEL: safe_math_fract_f32_noinf_check:
; GFX7: ; %bb.0: ; %entry		; GFX7: ; %bb.0: ; %entry
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_floor_f32_e32 v3, v0
; GFX7-NEXT: v_sub_f32_e32 v4, v0, v3
; GFX7-NEXT: s_mov_b32 s6, 0		; GFX7-NEXT: s_mov_b32 s6, 0
; GFX7-NEXT: v_min_f32_e32 v4, 0x3f7fffff, v4
; GFX7-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
; GFX7-NEXT: s_mov_b32 s7, 0xf000		; GFX7-NEXT: s_mov_b32 s7, 0xf000
; GFX7-NEXT: s_mov_b32 s4, s6		; GFX7-NEXT: s_mov_b32 s4, s6
; GFX7-NEXT: s_mov_b32 s5, s6		; GFX7-NEXT: s_mov_b32 s5, s6
; GFX7-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc		; GFX7-NEXT: v_floor_f32_e32 v3, v0
		; GFX7-NEXT: v_fract_f32_e32 v0, v0
; GFX7-NEXT: buffer_store_dword v3, v[1:2], s[4:7], 0 addr64		; GFX7-NEXT: buffer_store_dword v3, v[1:2], s[4:7], 0 addr64
; GFX7-NEXT: s_waitcnt vmcnt(0)		; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: s_setpc_b64 s[30:31]		; GFX7-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX8-LABEL: safe_math_fract_f32_noinf_check:		; GFX8-LABEL: safe_math_fract_f32_noinf_check:
; GFX8: ; %bb.0: ; %entry		; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_floor_f32_e32 v3, v0		; GFX8-NEXT: v_floor_f32_e32 v3, v0
; GFX8-NEXT: v_sub_f32_e32 v4, v0, v3		; GFX8-NEXT: v_fract_f32_e32 v0, v0
; GFX8-NEXT: v_min_f32_e32 v4, 0x3f7fffff, v4
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
; GFX8-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
; GFX8-NEXT: global_store_dword v[1:2], v3, off		; GFX8-NEXT: global_store_dword v[1:2], v3, off
; GFX8-NEXT: s_waitcnt vmcnt(0)		; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]		; GFX8-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX11-LABEL: safe_math_fract_f32_noinf_check:		; GFX11-LABEL: safe_math_fract_f32_noinf_check:
; GFX11: ; %bb.0: ; %entry		; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0		; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_floor_f32_e32 v3, v0		; GFX11-NEXT: v_floor_f32_e32 v3, v0
; GFX11-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0		; GFX11-NEXT: v_fract_f32_e32 v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) \| instskip(SKIP_2) \| instid1(VALU_DEP_1)
; GFX11-NEXT: v_sub_f32_e32 v4, v0, v3
; GFX11-NEXT: global_store_b32 v[1:2], v3, off		; GFX11-NEXT: global_store_b32 v[1:2], v3, off
; GFX11-NEXT: v_min_f32_e32 v4, 0x3f7fffff, v4
; GFX11-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0		; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]		; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:		entry:
%floor = tail call float @llvm.floor.f32(float %x)		%floor = tail call float @llvm.floor.f32(float %x)
%sub = fsub float %x, %floor		%sub = fsub float %x, %floor
%min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)		%min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
%uno = fcmp uno float %x, 0.000000e+00		%uno = fcmp uno float %x, 0.000000e+00
%cond = select i1 %uno, float %x, float %min		%cond = select i1 %uno, float %x, float %min
store float %floor, ptr addrspace(1) %ip, align 4		store float %floor, ptr addrspace(1) %ip, align 4
ret float %cond		ret float %cond
}		}

; Cannot match fract without a nan check or no-nans.		; Cannot match fract without a nan check or no-nans.
define float @no_nan_check_math_fract_f32(float %x, ptr addrspace(1) nocapture writeonly %ip) {		define float @no_nan_check_math_fract_f32(float %x, ptr addrspace(1) nocapture writeonly %ip) {
; IR-LABEL: define float @no_nan_check_math_fract_f32		; IR-LABEL: define float @no_nan_check_math_fract_f32
; IR-SAME: (float [[X:%.]], ptr addrspace(1) nocapture writeonly [[IP:%.]]) #[[ATTR0]] {		; IR-SAME: (float [[X:%.]], ptr addrspace(1) nocapture writeonly [[IP:%.]]) #[[ATTR0:[0-9]+]] {
; IR-NEXT: entry:		; IR-NEXT: entry:
; IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])		; IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
; IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]		; IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
; IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)		; IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
; IR-NEXT: [[FABS:%.*]] = tail call float @llvm.fabs.f32(float [[X]])		; IR-NEXT: [[FABS:%.*]] = tail call float @llvm.fabs.f32(float [[X]])
; IR-NEXT: [[CMPINF:%.*]] = fcmp oeq float [[FABS]], 0x7FF0000000000000		; IR-NEXT: [[CMPINF:%.*]] = fcmp oeq float [[FABS]], 0x7FF0000000000000
; IR-NEXT: [[COND6:%.*]] = select i1 [[CMPINF]], float 0.000000e+00, float [[MIN]]		; IR-NEXT: [[COND6:%.*]] = select i1 [[CMPINF]], float 0.000000e+00, float [[MIN]]
; IR-NEXT: store float [[FLOOR]], ptr addrspace(1) [[IP]], align 4		; IR-NEXT: store float [[FLOOR]], ptr addrspace(1) [[IP]], align 4
▲ Show 20 Lines • Show All 66 Lines • ▼ Show 20 Lines	entry:
%fabs = tail call float @llvm.fabs.f32(float %x)		%fabs = tail call float @llvm.fabs.f32(float %x)
%cmpinf = fcmp oeq float %fabs, 0x7FF0000000000000		%cmpinf = fcmp oeq float %fabs, 0x7FF0000000000000
%cond6 = select i1 %cmpinf, float 0.000000e+00, float %min		%cond6 = select i1 %cmpinf, float 0.000000e+00, float %min
store float %floor, ptr addrspace(1) %ip, align 4		store float %floor, ptr addrspace(1) %ip, align 4
ret float %cond6		ret float %cond6
}		}

define float @basic_fract_f32_nonans(float nofpclass(nan) %x) {		define float @basic_fract_f32_nonans(float nofpclass(nan) %x) {
; IR-LABEL: define float @basic_fract_f32_nonans		; GFX6-IR-LABEL: define float @basic_fract_f32_nonans
; IR-SAME: (float nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {		; GFX6-IR-SAME: (float nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
; IR-NEXT: entry:		; GFX6-IR-NEXT: entry:
; IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])		; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
; IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]		; GFX6-IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
; IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)		; GFX6-IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
; IR-NEXT: ret float [[MIN]]		; GFX6-IR-NEXT: ret float [[MIN]]
		;
		; IR-FRACT-LABEL: define float @basic_fract_f32_nonans
		; IR-FRACT-SAME: (float nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
		; IR-FRACT-NEXT: entry:
		; IR-FRACT-NEXT: [[MIN:%.*]] = call nnan float @llvm.amdgcn.fract.f32(float [[X]])
		; IR-FRACT-NEXT: ret float [[MIN]]
;		;
; GFX6-LABEL: basic_fract_f32_nonans:		; GFX6-LABEL: basic_fract_f32_nonans:
; GFX6: ; %bb.0: ; %entry		; GFX6: ; %bb.0: ; %entry
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_floor_f32_e32 v1, v0		; GFX6-NEXT: v_floor_f32_e32 v1, v0
; GFX6-NEXT: v_sub_f32_e32 v0, v0, v1		; GFX6-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX6-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0		; GFX6-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
; GFX6-NEXT: s_setpc_b64 s[30:31]		; GFX6-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX7-LABEL: basic_fract_f32_nonans:		; GFX7-LABEL: basic_fract_f32_nonans:
; GFX7: ; %bb.0: ; %entry		; GFX7: ; %bb.0: ; %entry
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_floor_f32_e32 v1, v0		; GFX7-NEXT: v_fract_f32_e32 v0, v0
; GFX7-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX7-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
; GFX7-NEXT: s_setpc_b64 s[30:31]		; GFX7-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX8-LABEL: basic_fract_f32_nonans:		; GFX8-LABEL: basic_fract_f32_nonans:
; GFX8: ; %bb.0: ; %entry		; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_floor_f32_e32 v1, v0		; GFX8-NEXT: v_fract_f32_e32 v0, v0
; GFX8-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX8-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]		; GFX8-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX11-LABEL: basic_fract_f32_nonans:		; GFX11-LABEL: basic_fract_f32_nonans:
; GFX11: ; %bb.0: ; %entry		; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0		; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_floor_f32_e32 v1, v0		; GFX11-NEXT: v_fract_f32_e32 v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) \| instskip(NEXT) \| instid1(VALU_DEP_1)
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX11-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]		; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:		entry:
%floor = tail call float @llvm.floor.f32(float %x)		%floor = tail call float @llvm.floor.f32(float %x)
%sub = fsub float %x, %floor		%sub = fsub float %x, %floor
%min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)		%min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
ret float %min		ret float %min
}		}

▲ Show 20 Lines • Show All 42 Lines • ▼ Show 20 Lines
entry:		entry:
%floor = tail call float @llvm.floor.f32(float %x)		%floor = tail call float @llvm.floor.f32(float %x)
%sub = fsub float %x, %floor		%sub = fsub float %x, %floor
%min = tail call nsz float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)		%min = tail call nsz float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
ret float %min		ret float %min
}		}

define float @basic_fract_f32_flags_fsub(float nofpclass(nan) %x) {		define float @basic_fract_f32_flags_fsub(float nofpclass(nan) %x) {
; IR-LABEL: define float @basic_fract_f32_flags_fsub		; GFX6-IR-LABEL: define float @basic_fract_f32_flags_fsub
; IR-SAME: (float nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {		; GFX6-IR-SAME: (float nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
; IR-NEXT: entry:		; GFX6-IR-NEXT: entry:
; IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])		; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
; IR-NEXT: [[SUB:%.*]] = fsub nsz float [[X]], [[FLOOR]]		; GFX6-IR-NEXT: [[SUB:%.*]] = fsub nsz float [[X]], [[FLOOR]]
; IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)		; GFX6-IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
; IR-NEXT: ret float [[MIN]]		; GFX6-IR-NEXT: ret float [[MIN]]
		;
		; IR-FRACT-LABEL: define float @basic_fract_f32_flags_fsub
		; IR-FRACT-SAME: (float nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
		; IR-FRACT-NEXT: entry:
		; IR-FRACT-NEXT: [[MIN:%.*]] = call nnan float @llvm.amdgcn.fract.f32(float [[X]])
		; IR-FRACT-NEXT: ret float [[MIN]]
;		;
; GFX6-LABEL: basic_fract_f32_flags_fsub:		; GFX6-LABEL: basic_fract_f32_flags_fsub:
; GFX6: ; %bb.0: ; %entry		; GFX6: ; %bb.0: ; %entry
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_floor_f32_e32 v1, v0		; GFX6-NEXT: v_floor_f32_e32 v1, v0
; GFX6-NEXT: v_sub_f32_e32 v0, v0, v1		; GFX6-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX6-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0		; GFX6-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
; GFX6-NEXT: s_setpc_b64 s[30:31]		; GFX6-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX7-LABEL: basic_fract_f32_flags_fsub:		; GFX7-LABEL: basic_fract_f32_flags_fsub:
; GFX7: ; %bb.0: ; %entry		; GFX7: ; %bb.0: ; %entry
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_floor_f32_e32 v1, v0		; GFX7-NEXT: v_fract_f32_e32 v0, v0
; GFX7-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX7-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
; GFX7-NEXT: s_setpc_b64 s[30:31]		; GFX7-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX8-LABEL: basic_fract_f32_flags_fsub:		; GFX8-LABEL: basic_fract_f32_flags_fsub:
; GFX8: ; %bb.0: ; %entry		; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_floor_f32_e32 v1, v0		; GFX8-NEXT: v_fract_f32_e32 v0, v0
; GFX8-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX8-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]		; GFX8-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX11-LABEL: basic_fract_f32_flags_fsub:		; GFX11-LABEL: basic_fract_f32_flags_fsub:
; GFX11: ; %bb.0: ; %entry		; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0		; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_floor_f32_e32 v1, v0		; GFX11-NEXT: v_fract_f32_e32 v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) \| instskip(NEXT) \| instid1(VALU_DEP_1)
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX11-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]		; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:		entry:
%floor = tail call float @llvm.floor.f32(float %x)		%floor = tail call float @llvm.floor.f32(float %x)
%sub = fsub nsz float %x, %floor		%sub = fsub nsz float %x, %floor
%min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)		%min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
ret float %min		ret float %min
}		}

define <2 x float> @basic_fract_v2f32_nonans(<2 x float> nofpclass(nan) %x) {		define <2 x float> @basic_fract_v2f32_nonans(<2 x float> nofpclass(nan) %x) {
; IR-LABEL: define <2 x float> @basic_fract_v2f32_nonans		; GFX6-IR-LABEL: define <2 x float> @basic_fract_v2f32_nonans
; IR-SAME: (<2 x float> nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {		; GFX6-IR-SAME: (<2 x float> nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
; IR-NEXT: entry:		; GFX6-IR-NEXT: entry:
; IR-NEXT: [[FLOOR:%.*]] = tail call <2 x float> @llvm.floor.v2f32(<2 x float> [[X]])		; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call <2 x float> @llvm.floor.v2f32(<2 x float> [[X]])
; IR-NEXT: [[SUB:%.*]] = fsub <2 x float> [[X]], [[FLOOR]]		; GFX6-IR-NEXT: [[SUB:%.*]] = fsub <2 x float> [[X]], [[FLOOR]]
; IR-NEXT: [[MIN:%.*]] = tail call <2 x float> @llvm.minnum.v2f32(<2 x float> [[SUB]], <2 x float> <float 0x3FEFFFFFE0000000, float 0x3FEFFFFFE0000000>)		; GFX6-IR-NEXT: [[MIN:%.*]] = tail call <2 x float> @llvm.minnum.v2f32(<2 x float> [[SUB]], <2 x float> <float 0x3FEFFFFFE0000000, float 0x3FEFFFFFE0000000>)
; IR-NEXT: ret <2 x float> [[MIN]]		; GFX6-IR-NEXT: ret <2 x float> [[MIN]]
		;
		; IR-FRACT-LABEL: define <2 x float> @basic_fract_v2f32_nonans
		; IR-FRACT-SAME: (<2 x float> nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
		; IR-FRACT-NEXT: entry:
		; IR-FRACT-NEXT: [[TMP0:%.*]] = extractelement <2 x float> [[X]], i64 0
		; IR-FRACT-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X]], i64 1
		; IR-FRACT-NEXT: [[TMP2:%.*]] = call nnan float @llvm.amdgcn.fract.f32(float [[TMP0]])
		; IR-FRACT-NEXT: [[TMP3:%.*]] = call nnan float @llvm.amdgcn.fract.f32(float [[TMP1]])
		; IR-FRACT-NEXT: [[TMP4:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i64 0
		; IR-FRACT-NEXT: [[MIN:%.*]] = insertelement <2 x float> [[TMP4]], float [[TMP3]], i64 1
		; IR-FRACT-NEXT: ret <2 x float> [[MIN]]
;		;
; GFX6-LABEL: basic_fract_v2f32_nonans:		; GFX6-LABEL: basic_fract_v2f32_nonans:
; GFX6: ; %bb.0: ; %entry		; GFX6: ; %bb.0: ; %entry
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_floor_f32_e32 v2, v0		; GFX6-NEXT: v_floor_f32_e32 v2, v0
; GFX6-NEXT: v_floor_f32_e32 v3, v1		; GFX6-NEXT: v_floor_f32_e32 v3, v1
; GFX6-NEXT: v_sub_f32_e32 v1, v1, v3		; GFX6-NEXT: v_sub_f32_e32 v1, v1, v3
; GFX6-NEXT: v_sub_f32_e32 v0, v0, v2		; GFX6-NEXT: v_sub_f32_e32 v0, v0, v2
; GFX6-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0		; GFX6-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
; GFX6-NEXT: v_min_f32_e32 v1, 0x3f7fffff, v1		; GFX6-NEXT: v_min_f32_e32 v1, 0x3f7fffff, v1
; GFX6-NEXT: s_setpc_b64 s[30:31]		; GFX6-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX7-LABEL: basic_fract_v2f32_nonans:		; GFX7-LABEL: basic_fract_v2f32_nonans:
; GFX7: ; %bb.0: ; %entry		; GFX7: ; %bb.0: ; %entry
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_floor_f32_e32 v2, v0		; GFX7-NEXT: v_fract_f32_e32 v0, v0
; GFX7-NEXT: v_floor_f32_e32 v3, v1		; GFX7-NEXT: v_fract_f32_e32 v1, v1
; GFX7-NEXT: v_sub_f32_e32 v1, v1, v3
; GFX7-NEXT: v_sub_f32_e32 v0, v0, v2
; GFX7-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
; GFX7-NEXT: v_min_f32_e32 v1, 0x3f7fffff, v1
; GFX7-NEXT: s_setpc_b64 s[30:31]		; GFX7-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX8-LABEL: basic_fract_v2f32_nonans:		; GFX8-LABEL: basic_fract_v2f32_nonans:
; GFX8: ; %bb.0: ; %entry		; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_floor_f32_e32 v2, v0		; GFX8-NEXT: v_fract_f32_e32 v0, v0
; GFX8-NEXT: v_floor_f32_e32 v3, v1		; GFX8-NEXT: v_fract_f32_e32 v1, v1
; GFX8-NEXT: v_sub_f32_e32 v1, v1, v3
; GFX8-NEXT: v_sub_f32_e32 v0, v0, v2
; GFX8-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
; GFX8-NEXT: v_min_f32_e32 v1, 0x3f7fffff, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]		; GFX8-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX11-LABEL: basic_fract_v2f32_nonans:		; GFX11-LABEL: basic_fract_v2f32_nonans:
; GFX11: ; %bb.0: ; %entry		; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0		; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_floor_f32_e32 v2, v0		; GFX11-NEXT: v_fract_f32_e32 v0, v0
; GFX11-NEXT: v_floor_f32_e32 v3, v1		; GFX11-NEXT: v_fract_f32_e32 v1, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) \| instskip(NEXT) \| instid1(VALU_DEP_1)
; GFX11-NEXT: v_dual_sub_f32 v0, v0, v2 :: v_dual_sub_f32 v1, v1, v3
; GFX11-NEXT: v_dual_min_f32 v0, 0x3f7fffff, v0 :: v_dual_min_f32 v1, 0x3f7fffff, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]		; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:		entry:
%floor = tail call <2 x float> @llvm.floor.v2f32(<2 x float> %x)		%floor = tail call <2 x float> @llvm.floor.v2f32(<2 x float> %x)
%sub = fsub <2 x float> %x, %floor		%sub = fsub <2 x float> %x, %floor
%min = tail call <2 x float> @llvm.minnum.v2f32(<2 x float> %sub, <2 x float> <float 0x3FEFFFFFE0000000, float 0x3FEFFFFFE0000000>)		%min = tail call <2 x float> @llvm.minnum.v2f32(<2 x float> %sub, <2 x float> <float 0x3FEFFFFFE0000000, float 0x3FEFFFFFE0000000>)
ret <2 x float> %min		ret <2 x float> %min
}		}

define float @basic_fract_f32_multi_use_fsub_nonans(float nofpclass(nan) %x, ptr addrspace(1) %ptr) {		define float @basic_fract_f32_multi_use_fsub_nonans(float nofpclass(nan) %x, ptr addrspace(1) %ptr) {
; IR-LABEL: define float @basic_fract_f32_multi_use_fsub_nonans		; GFX6-IR-LABEL: define float @basic_fract_f32_multi_use_fsub_nonans
; IR-SAME: (float nofpclass(nan) [[X:%.]], ptr addrspace(1) [[PTR:%.]]) #[[ATTR0]] {		; GFX6-IR-SAME: (float nofpclass(nan) [[X:%.]], ptr addrspace(1) [[PTR:%.]]) #[[ATTR0]] {
; IR-NEXT: entry:		; GFX6-IR-NEXT: entry:
; IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])		; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
; IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]		; GFX6-IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
; IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)		; GFX6-IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
; IR-NEXT: store float [[SUB]], ptr addrspace(1) [[PTR]], align 4		; GFX6-IR-NEXT: store float [[SUB]], ptr addrspace(1) [[PTR]], align 4
; IR-NEXT: ret float [[MIN]]		; GFX6-IR-NEXT: ret float [[MIN]]
		;
		; IR-FRACT-LABEL: define float @basic_fract_f32_multi_use_fsub_nonans
		; IR-FRACT-SAME: (float nofpclass(nan) [[X:%.]], ptr addrspace(1) [[PTR:%.]]) #[[ATTR0]] {
		; IR-FRACT-NEXT: entry:
		; IR-FRACT-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
		; IR-FRACT-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
		; IR-FRACT-NEXT: [[MIN:%.*]] = call nnan float @llvm.amdgcn.fract.f32(float [[X]])
		; IR-FRACT-NEXT: store float [[SUB]], ptr addrspace(1) [[PTR]], align 4
		; IR-FRACT-NEXT: ret float [[MIN]]
;		;
; GFX6-LABEL: basic_fract_f32_multi_use_fsub_nonans:		; GFX6-LABEL: basic_fract_f32_multi_use_fsub_nonans:
; GFX6: ; %bb.0: ; %entry		; GFX6: ; %bb.0: ; %entry
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_floor_f32_e32 v3, v0		; GFX6-NEXT: v_floor_f32_e32 v3, v0
; GFX6-NEXT: s_mov_b32 s6, 0		; GFX6-NEXT: s_mov_b32 s6, 0
; GFX6-NEXT: v_sub_f32_e32 v3, v0, v3		; GFX6-NEXT: v_sub_f32_e32 v3, v0, v3
; GFX6-NEXT: s_mov_b32 s7, 0xf000		; GFX6-NEXT: s_mov_b32 s7, 0xf000
; GFX6-NEXT: s_mov_b32 s4, s6		; GFX6-NEXT: s_mov_b32 s4, s6
; GFX6-NEXT: s_mov_b32 s5, s6		; GFX6-NEXT: s_mov_b32 s5, s6
; GFX6-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v3		; GFX6-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v3
; GFX6-NEXT: buffer_store_dword v3, v[1:2], s[4:7], 0 addr64		; GFX6-NEXT: buffer_store_dword v3, v[1:2], s[4:7], 0 addr64
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)		; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT: s_setpc_b64 s[30:31]		; GFX6-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX7-LABEL: basic_fract_f32_multi_use_fsub_nonans:		; GFX7-LABEL: basic_fract_f32_multi_use_fsub_nonans:
; GFX7: ; %bb.0: ; %entry		; GFX7: ; %bb.0: ; %entry
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_floor_f32_e32 v3, v0
; GFX7-NEXT: s_mov_b32 s6, 0		; GFX7-NEXT: s_mov_b32 s6, 0
; GFX7-NEXT: v_sub_f32_e32 v3, v0, v3		; GFX7-NEXT: v_floor_f32_e32 v3, v0
; GFX7-NEXT: s_mov_b32 s7, 0xf000		; GFX7-NEXT: s_mov_b32 s7, 0xf000
; GFX7-NEXT: s_mov_b32 s4, s6		; GFX7-NEXT: s_mov_b32 s4, s6
; GFX7-NEXT: s_mov_b32 s5, s6		; GFX7-NEXT: s_mov_b32 s5, s6
; GFX7-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v3		; GFX7-NEXT: v_sub_f32_e32 v3, v0, v3
		; GFX7-NEXT: v_fract_f32_e32 v0, v0
; GFX7-NEXT: buffer_store_dword v3, v[1:2], s[4:7], 0 addr64		; GFX7-NEXT: buffer_store_dword v3, v[1:2], s[4:7], 0 addr64
; GFX7-NEXT: s_waitcnt vmcnt(0)		; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: s_setpc_b64 s[30:31]		; GFX7-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX8-LABEL: basic_fract_f32_multi_use_fsub_nonans:		; GFX8-LABEL: basic_fract_f32_multi_use_fsub_nonans:
; GFX8: ; %bb.0: ; %entry		; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_floor_f32_e32 v3, v0		; GFX8-NEXT: v_floor_f32_e32 v3, v0
; GFX8-NEXT: v_sub_f32_e32 v3, v0, v3		; GFX8-NEXT: v_sub_f32_e32 v3, v0, v3
; GFX8-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v3		; GFX8-NEXT: v_fract_f32_e32 v0, v0
; GFX8-NEXT: global_store_dword v[1:2], v3, off		; GFX8-NEXT: global_store_dword v[1:2], v3, off
; GFX8-NEXT: s_waitcnt vmcnt(0)		; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]		; GFX8-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX11-LABEL: basic_fract_f32_multi_use_fsub_nonans:		; GFX11-LABEL: basic_fract_f32_multi_use_fsub_nonans:
; GFX11: ; %bb.0: ; %entry		; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0		; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_floor_f32_e32 v3, v0		; GFX11-NEXT: v_floor_f32_e32 v3, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) \| instskip(NEXT) \| instid1(VALU_DEP_1)		; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_sub_f32_e32 v3, v0, v3		; GFX11-NEXT: v_sub_f32_e32 v3, v0, v3
; GFX11-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v3		; GFX11-NEXT: v_fract_f32_e32 v0, v0
; GFX11-NEXT: global_store_b32 v[1:2], v3, off		; GFX11-NEXT: global_store_b32 v[1:2], v3, off
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0		; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]		; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:		entry:
%floor = tail call float @llvm.floor.f32(float %x)		%floor = tail call float @llvm.floor.f32(float %x)
%sub = fsub float %x, %floor		%sub = fsub float %x, %floor
%min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)		%min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
store float %sub, ptr addrspace(1) %ptr		store float %sub, ptr addrspace(1) %ptr
ret float %min		ret float %min
}		}

define float @nnan_minnum_fract_f32(float %x) {		define float @nnan_minnum_fract_f32(float %x) {
; IR-LABEL: define float @nnan_minnum_fract_f32		; GFX6-IR-LABEL: define float @nnan_minnum_fract_f32
; IR-SAME: (float [[X:%.*]]) #[[ATTR0]] {		; GFX6-IR-SAME: (float [[X:%.*]]) #[[ATTR0]] {
; IR-NEXT: entry:		; GFX6-IR-NEXT: entry:
; IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])		; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
; IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]		; GFX6-IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
; IR-NEXT: [[MIN:%.*]] = tail call nnan float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)		; GFX6-IR-NEXT: [[MIN:%.*]] = tail call nnan float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
; IR-NEXT: ret float [[MIN]]		; GFX6-IR-NEXT: ret float [[MIN]]
		;
		; IR-FRACT-LABEL: define float @nnan_minnum_fract_f32
		; IR-FRACT-SAME: (float [[X:%.*]]) #[[ATTR0]] {
		; IR-FRACT-NEXT: entry:
		; IR-FRACT-NEXT: [[MIN:%.*]] = call nnan float @llvm.amdgcn.fract.f32(float [[X]])
		; IR-FRACT-NEXT: ret float [[MIN]]
;		;
; GFX6-LABEL: nnan_minnum_fract_f32:		; GFX6-LABEL: nnan_minnum_fract_f32:
; GFX6: ; %bb.0: ; %entry		; GFX6: ; %bb.0: ; %entry
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_floor_f32_e32 v1, v0		; GFX6-NEXT: v_floor_f32_e32 v1, v0
; GFX6-NEXT: v_sub_f32_e32 v0, v0, v1		; GFX6-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX6-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0		; GFX6-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
; GFX6-NEXT: s_setpc_b64 s[30:31]		; GFX6-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX7-LABEL: nnan_minnum_fract_f32:		; GFX7-LABEL: nnan_minnum_fract_f32:
; GFX7: ; %bb.0: ; %entry		; GFX7: ; %bb.0: ; %entry
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_floor_f32_e32 v1, v0		; GFX7-NEXT: v_fract_f32_e32 v0, v0
; GFX7-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX7-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
; GFX7-NEXT: s_setpc_b64 s[30:31]		; GFX7-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX8-LABEL: nnan_minnum_fract_f32:		; GFX8-LABEL: nnan_minnum_fract_f32:
; GFX8: ; %bb.0: ; %entry		; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_floor_f32_e32 v1, v0		; GFX8-NEXT: v_fract_f32_e32 v0, v0
; GFX8-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX8-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]		; GFX8-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX11-LABEL: nnan_minnum_fract_f32:		; GFX11-LABEL: nnan_minnum_fract_f32:
; GFX11: ; %bb.0: ; %entry		; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0		; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_floor_f32_e32 v1, v0		; GFX11-NEXT: v_fract_f32_e32 v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) \| instskip(NEXT) \| instid1(VALU_DEP_1)
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX11-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]		; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:		entry:
%floor = tail call float @llvm.floor.f32(float %x)		%floor = tail call float @llvm.floor.f32(float %x)
%sub = fsub float %x, %floor		%sub = fsub float %x, %floor
%min = tail call nnan float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)		%min = tail call nnan float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
ret float %min		ret float %min
}		}

		; TODO: Could match if we checked isKnownNeverNaN on the minnum src
		; instead of the pattern input source.
define float @nnan_fsub_fract_f32(float %x) {		define float @nnan_fsub_fract_f32(float %x) {
; IR-LABEL: define float @nnan_fsub_fract_f32		; IR-LABEL: define float @nnan_fsub_fract_f32
; IR-SAME: (float [[X:%.*]]) #[[ATTR0]] {		; IR-SAME: (float [[X:%.*]]) #[[ATTR0]] {
; IR-NEXT: entry:		; IR-NEXT: entry:
; IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])		; IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
; IR-NEXT: [[SUB:%.*]] = fsub nnan float [[X]], [[FLOOR]]		; IR-NEXT: [[SUB:%.*]] = fsub nnan float [[X]], [[FLOOR]]
; IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)		; IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
; IR-NEXT: ret float [[MIN]]		; IR-NEXT: ret float [[MIN]]
▲ Show 20 Lines • Show All 83 Lines • ▼ Show 20 Lines
entry:		entry:
%floor = tail call nnan float @llvm.floor.f32(float %x)		%floor = tail call nnan float @llvm.floor.f32(float %x)
%sub = fsub float %x, %floor		%sub = fsub float %x, %floor
%min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)		%min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
ret float %min		ret float %min
}		}

define float @nnan_src_fract_f32(float nofpclass(nan) %x) {		define float @nnan_src_fract_f32(float nofpclass(nan) %x) {
; IR-LABEL: define float @nnan_src_fract_f32		; GFX6-IR-LABEL: define float @nnan_src_fract_f32
; IR-SAME: (float nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {		; GFX6-IR-SAME: (float nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
; IR-NEXT: entry:		; GFX6-IR-NEXT: entry:
; IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])		; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
; IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]		; GFX6-IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
; IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)		; GFX6-IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
; IR-NEXT: ret float [[MIN]]		; GFX6-IR-NEXT: ret float [[MIN]]
		;
		; IR-FRACT-LABEL: define float @nnan_src_fract_f32
		; IR-FRACT-SAME: (float nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
		; IR-FRACT-NEXT: entry:
		; IR-FRACT-NEXT: [[MIN:%.*]] = call nnan float @llvm.amdgcn.fract.f32(float [[X]])
		; IR-FRACT-NEXT: ret float [[MIN]]
;		;
; GFX6-LABEL: nnan_src_fract_f32:		; GFX6-LABEL: nnan_src_fract_f32:
; GFX6: ; %bb.0: ; %entry		; GFX6: ; %bb.0: ; %entry
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_floor_f32_e32 v1, v0		; GFX6-NEXT: v_floor_f32_e32 v1, v0
; GFX6-NEXT: v_sub_f32_e32 v0, v0, v1		; GFX6-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX6-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0		; GFX6-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
; GFX6-NEXT: s_setpc_b64 s[30:31]		; GFX6-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX7-LABEL: nnan_src_fract_f32:		; GFX7-LABEL: nnan_src_fract_f32:
; GFX7: ; %bb.0: ; %entry		; GFX7: ; %bb.0: ; %entry
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_floor_f32_e32 v1, v0		; GFX7-NEXT: v_fract_f32_e32 v0, v0
; GFX7-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX7-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
; GFX7-NEXT: s_setpc_b64 s[30:31]		; GFX7-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX8-LABEL: nnan_src_fract_f32:		; GFX8-LABEL: nnan_src_fract_f32:
; GFX8: ; %bb.0: ; %entry		; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_floor_f32_e32 v1, v0		; GFX8-NEXT: v_fract_f32_e32 v0, v0
; GFX8-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX8-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]		; GFX8-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX11-LABEL: nnan_src_fract_f32:		; GFX11-LABEL: nnan_src_fract_f32:
; GFX11: ; %bb.0: ; %entry		; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0		; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_floor_f32_e32 v1, v0		; GFX11-NEXT: v_fract_f32_e32 v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) \| instskip(NEXT) \| instid1(VALU_DEP_1)
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX11-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]		; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:		entry:
%floor = tail call float @llvm.floor.f32(float %x)		%floor = tail call float @llvm.floor.f32(float %x)
%sub = fsub float %x, %floor		%sub = fsub float %x, %floor
%min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)		%min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
ret float %min		ret float %min
}		}

▲ Show 20 Lines • Show All 284 Lines • ▼ Show 20 Lines	entry:
%min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)		%min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
%uno = fcmp uno float %x, 0x7FF8000000000000		%uno = fcmp uno float %x, 0x7FF8000000000000
%cond = select i1 %uno, float %x, float %min		%cond = select i1 %uno, float %x, float %min
ret float %cond		ret float %cond
}		}

; No inf check		; No inf check
define float @select_nan_fract_f32(float %x) {		define float @select_nan_fract_f32(float %x) {
; IR-LABEL: define float @select_nan_fract_f32		; GFX6-IR-LABEL: define float @select_nan_fract_f32
; IR-SAME: (float [[X:%.*]]) #[[ATTR0]] {		; GFX6-IR-SAME: (float [[X:%.*]]) #[[ATTR0]] {
; IR-NEXT: entry:		; GFX6-IR-NEXT: entry:
; IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])		; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
; IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]		; GFX6-IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
; IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)		; GFX6-IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
; IR-NEXT: [[UNO:%.*]] = fcmp uno float [[X]], 0.000000e+00		; GFX6-IR-NEXT: [[UNO:%.*]] = fcmp uno float [[X]], 0.000000e+00
; IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], float [[X]], float [[MIN]]		; GFX6-IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], float [[X]], float [[MIN]]
; IR-NEXT: ret float [[COND]]		; GFX6-IR-NEXT: ret float [[COND]]
		;
		; IR-FRACT-LABEL: define float @select_nan_fract_f32
		; IR-FRACT-SAME: (float [[X:%.*]]) #[[ATTR0]] {
		; IR-FRACT-NEXT: entry:
		; IR-FRACT-NEXT: [[COND:%.*]] = call float @llvm.amdgcn.fract.f32(float [[X]])
		; IR-FRACT-NEXT: ret float [[COND]]
;		;
; GFX6-LABEL: select_nan_fract_f32:		; GFX6-LABEL: select_nan_fract_f32:
; GFX6: ; %bb.0: ; %entry		; GFX6: ; %bb.0: ; %entry
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_floor_f32_e32 v1, v0		; GFX6-NEXT: v_floor_f32_e32 v1, v0
; GFX6-NEXT: v_sub_f32_e32 v1, v0, v1		; GFX6-NEXT: v_sub_f32_e32 v1, v0, v1
; GFX6-NEXT: v_min_f32_e32 v1, 0x3f7fffff, v1		; GFX6-NEXT: v_min_f32_e32 v1, 0x3f7fffff, v1
; GFX6-NEXT: v_cmp_u_f32_e32 vcc, v0, v0		; GFX6-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
; GFX6-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc		; GFX6-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX6-NEXT: s_setpc_b64 s[30:31]		; GFX6-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX7-LABEL: select_nan_fract_f32:		; GFX7-LABEL: select_nan_fract_f32:
; GFX7: ; %bb.0: ; %entry		; GFX7: ; %bb.0: ; %entry
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_floor_f32_e32 v1, v0		; GFX7-NEXT: v_fract_f32_e32 v0, v0
; GFX7-NEXT: v_sub_f32_e32 v1, v0, v1
; GFX7-NEXT: v_min_f32_e32 v1, 0x3f7fffff, v1
; GFX7-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
; GFX7-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX7-NEXT: s_setpc_b64 s[30:31]		; GFX7-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX8-LABEL: select_nan_fract_f32:		; GFX8-LABEL: select_nan_fract_f32:
; GFX8: ; %bb.0: ; %entry		; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_floor_f32_e32 v1, v0		; GFX8-NEXT: v_fract_f32_e32 v0, v0
; GFX8-NEXT: v_sub_f32_e32 v1, v0, v1
; GFX8-NEXT: v_min_f32_e32 v1, 0x3f7fffff, v1
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]		; GFX8-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX11-LABEL: select_nan_fract_f32:		; GFX11-LABEL: select_nan_fract_f32:
; GFX11: ; %bb.0: ; %entry		; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0		; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_floor_f32_e32 v1, v0		; GFX11-NEXT: v_fract_f32_e32 v0, v0
; GFX11-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) \| instskip(NEXT) \| instid1(VALU_DEP_1)
; GFX11-NEXT: v_sub_f32_e32 v1, v0, v1
; GFX11-NEXT: v_min_f32_e32 v1, 0x3f7fffff, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]		; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:		entry:
%floor = tail call float @llvm.floor.f32(float %x)		%floor = tail call float @llvm.floor.f32(float %x)
%sub = fsub float %x, %floor		%sub = fsub float %x, %floor
%min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)		%min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
%uno = fcmp uno float %x, 0.000000e+00		%uno = fcmp uno float %x, 0.000000e+00
%cond = select i1 %uno, float %x, float %min		%cond = select i1 %uno, float %x, float %min
ret float %cond		ret float %cond
}		}

define float @commuted_select_nan_fract_f32(float %x) {		define float @commuted_select_nan_fract_f32(float %x) {
; IR-LABEL: define float @commuted_select_nan_fract_f32		; GFX6-IR-LABEL: define float @commuted_select_nan_fract_f32
; IR-SAME: (float [[X:%.*]]) #[[ATTR0]] {		; GFX6-IR-SAME: (float [[X:%.*]]) #[[ATTR0]] {
; IR-NEXT: entry:		; GFX6-IR-NEXT: entry:
; IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])		; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
; IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]		; GFX6-IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
; IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)		; GFX6-IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
; IR-NEXT: [[UNO:%.*]] = fcmp ord float [[X]], 0.000000e+00		; GFX6-IR-NEXT: [[UNO:%.*]] = fcmp ord float [[X]], 0.000000e+00
; IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], float [[MIN]], float [[X]]		; GFX6-IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], float [[MIN]], float [[X]]
; IR-NEXT: ret float [[COND]]		; GFX6-IR-NEXT: ret float [[COND]]
		;
		; IR-FRACT-LABEL: define float @commuted_select_nan_fract_f32
		; IR-FRACT-SAME: (float [[X:%.*]]) #[[ATTR0]] {
		; IR-FRACT-NEXT: entry:
		; IR-FRACT-NEXT: [[COND:%.*]] = call float @llvm.amdgcn.fract.f32(float [[X]])
		; IR-FRACT-NEXT: ret float [[COND]]
;		;
; GFX6-LABEL: commuted_select_nan_fract_f32:		; GFX6-LABEL: commuted_select_nan_fract_f32:
; GFX6: ; %bb.0: ; %entry		; GFX6: ; %bb.0: ; %entry
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_floor_f32_e32 v1, v0		; GFX6-NEXT: v_floor_f32_e32 v1, v0
; GFX6-NEXT: v_sub_f32_e32 v1, v0, v1		; GFX6-NEXT: v_sub_f32_e32 v1, v0, v1
; GFX6-NEXT: v_min_f32_e32 v1, 0x3f7fffff, v1		; GFX6-NEXT: v_min_f32_e32 v1, 0x3f7fffff, v1
; GFX6-NEXT: v_cmp_o_f32_e32 vcc, v0, v0		; GFX6-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc		; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX6-NEXT: s_setpc_b64 s[30:31]		; GFX6-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX7-LABEL: commuted_select_nan_fract_f32:		; GFX7-LABEL: commuted_select_nan_fract_f32:
; GFX7: ; %bb.0: ; %entry		; GFX7: ; %bb.0: ; %entry
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_floor_f32_e32 v1, v0		; GFX7-NEXT: v_fract_f32_e32 v0, v0
; GFX7-NEXT: v_sub_f32_e32 v1, v0, v1
; GFX7-NEXT: v_min_f32_e32 v1, 0x3f7fffff, v1
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX7-NEXT: s_setpc_b64 s[30:31]		; GFX7-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX8-LABEL: commuted_select_nan_fract_f32:		; GFX8-LABEL: commuted_select_nan_fract_f32:
; GFX8: ; %bb.0: ; %entry		; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_floor_f32_e32 v1, v0		; GFX8-NEXT: v_fract_f32_e32 v0, v0
; GFX8-NEXT: v_sub_f32_e32 v1, v0, v1
; GFX8-NEXT: v_min_f32_e32 v1, 0x3f7fffff, v1
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]		; GFX8-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX11-LABEL: commuted_select_nan_fract_f32:		; GFX11-LABEL: commuted_select_nan_fract_f32:
; GFX11: ; %bb.0: ; %entry		; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0		; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_floor_f32_e32 v1, v0		; GFX11-NEXT: v_fract_f32_e32 v0, v0
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) \| instskip(NEXT) \| instid1(VALU_DEP_1)
; GFX11-NEXT: v_sub_f32_e32 v1, v0, v1
; GFX11-NEXT: v_min_f32_e32 v1, 0x3f7fffff, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]		; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:		entry:
%floor = tail call float @llvm.floor.f32(float %x)		%floor = tail call float @llvm.floor.f32(float %x)
%sub = fsub float %x, %floor		%sub = fsub float %x, %floor
%min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)		%min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
%uno = fcmp ord float %x, 0.000000e+00		%uno = fcmp ord float %x, 0.000000e+00
%cond = select i1 %uno, float %min, float %x		%cond = select i1 %uno, float %min, float %x
ret float %cond		ret float %cond
▲ Show 20 Lines • Show All 57 Lines • ▼ Show 20 Lines	entry:
%sub = fsub float %x, %floor		%sub = fsub float %x, %floor
%min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)		%min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
%uno = fcmp uno float %x, 0.000000e+00		%uno = fcmp uno float %x, 0.000000e+00
%cond = select i1 %uno, float %min, float %x		%cond = select i1 %uno, float %min, float %x
ret float %cond		ret float %cond
}		}

define half @basic_fract_f16_nonan(half nofpclass(nan) %x) {		define half @basic_fract_f16_nonan(half nofpclass(nan) %x) {
; IR-LABEL: define half @basic_fract_f16_nonan		; GFX6-IR-LABEL: define half @basic_fract_f16_nonan
; IR-SAME: (half nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {		; GFX6-IR-SAME: (half nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
; IR-NEXT: entry:		; GFX6-IR-NEXT: entry:
; IR-NEXT: [[FLOOR:%.*]] = tail call half @llvm.floor.f16(half [[X]])		; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call half @llvm.floor.f16(half [[X]])
; IR-NEXT: [[SUB:%.*]] = fsub half [[X]], [[FLOOR]]		; GFX6-IR-NEXT: [[SUB:%.*]] = fsub half [[X]], [[FLOOR]]
; IR-NEXT: [[MIN:%.*]] = tail call half @llvm.minnum.f16(half [[SUB]], half 0xH3BFF)		; GFX6-IR-NEXT: [[MIN:%.*]] = tail call half @llvm.minnum.f16(half [[SUB]], half 0xH3BFF)
; IR-NEXT: ret half [[MIN]]		; GFX6-IR-NEXT: ret half [[MIN]]
		;
		; GFX7-IR-LABEL: define half @basic_fract_f16_nonan
		; GFX7-IR-SAME: (half nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
		; GFX7-IR-NEXT: entry:
		; GFX7-IR-NEXT: [[FLOOR:%.*]] = tail call half @llvm.floor.f16(half [[X]])
		; GFX7-IR-NEXT: [[SUB:%.*]] = fsub half [[X]], [[FLOOR]]
		; GFX7-IR-NEXT: [[MIN:%.*]] = tail call half @llvm.minnum.f16(half [[SUB]], half 0xH3BFF)
		; GFX7-IR-NEXT: ret half [[MIN]]
		;
		; IR-LEGALF16-LABEL: define half @basic_fract_f16_nonan
		; IR-LEGALF16-SAME: (half nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
		; IR-LEGALF16-NEXT: entry:
		; IR-LEGALF16-NEXT: [[MIN:%.*]] = call nnan half @llvm.amdgcn.fract.f16(half [[X]])
		; IR-LEGALF16-NEXT: ret half [[MIN]]
;		;
; GFX6-LABEL: basic_fract_f16_nonan:		; GFX6-LABEL: basic_fract_f16_nonan:
; GFX6: ; %bb.0: ; %entry		; GFX6: ; %bb.0: ; %entry
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0		; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0		; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX6-NEXT: v_floor_f32_e32 v1, v0		; GFX6-NEXT: v_floor_f32_e32 v1, v0
; GFX6-NEXT: v_sub_f32_e32 v0, v0, v1		; GFX6-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX6-NEXT: v_min_f32_e32 v0, 0x3f7fe000, v0		; GFX6-NEXT: v_min_f32_e32 v0, 0x3f7fe000, v0
; GFX6-NEXT: s_setpc_b64 s[30:31]		; GFX6-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX7-LABEL: basic_fract_f16_nonan:		; GFX7-LABEL: basic_fract_f16_nonan:
; GFX7: ; %bb.0: ; %entry		; GFX7: ; %bb.0: ; %entry
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0		; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0		; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX7-NEXT: v_floor_f32_e32 v1, v0		; GFX7-NEXT: v_floor_f32_e32 v1, v0
; GFX7-NEXT: v_sub_f32_e32 v0, v0, v1		; GFX7-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX7-NEXT: v_min_f32_e32 v0, 0x3f7fe000, v0		; GFX7-NEXT: v_min_f32_e32 v0, 0x3f7fe000, v0
; GFX7-NEXT: s_setpc_b64 s[30:31]		; GFX7-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX8-LABEL: basic_fract_f16_nonan:		; GFX8-LABEL: basic_fract_f16_nonan:
; GFX8: ; %bb.0: ; %entry		; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_floor_f16_e32 v1, v0		; GFX8-NEXT: v_fract_f16_e32 v0, v0
; GFX8-NEXT: v_sub_f16_e32 v0, v0, v1
; GFX8-NEXT: v_min_f16_e32 v0, 0x3bff, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]		; GFX8-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX11-LABEL: basic_fract_f16_nonan:		; GFX11-LABEL: basic_fract_f16_nonan:
; GFX11: ; %bb.0: ; %entry		; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0		; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_floor_f16_e32 v1, v0		; GFX11-NEXT: v_fract_f16_e32 v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) \| instskip(NEXT) \| instid1(VALU_DEP_1)
; GFX11-NEXT: v_sub_f16_e32 v0, v0, v1
; GFX11-NEXT: v_min_f16_e32 v0, 0x3bff, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]		; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:		entry:
%floor = tail call half @llvm.floor.f16(half %x)		%floor = tail call half @llvm.floor.f16(half %x)
%sub = fsub half %x, %floor		%sub = fsub half %x, %floor
%min = tail call half @llvm.minnum.f16(half %sub, half 0xH3BFF)		%min = tail call half @llvm.minnum.f16(half %sub, half 0xH3BFF)
ret half %min		ret half %min
}		}

define <2 x half> @basic_fract_v2f16_nonan(<2 x half> nofpclass(nan) %x) {		define <2 x half> @basic_fract_v2f16_nonan(<2 x half> nofpclass(nan) %x) {
; IR-LABEL: define <2 x half> @basic_fract_v2f16_nonan		; GFX6-IR-LABEL: define <2 x half> @basic_fract_v2f16_nonan
; IR-SAME: (<2 x half> nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {		; GFX6-IR-SAME: (<2 x half> nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
; IR-NEXT: entry:		; GFX6-IR-NEXT: entry:
; IR-NEXT: [[FLOOR:%.*]] = tail call <2 x half> @llvm.floor.v2f16(<2 x half> [[X]])		; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call <2 x half> @llvm.floor.v2f16(<2 x half> [[X]])
; IR-NEXT: [[SUB:%.*]] = fsub <2 x half> [[X]], [[FLOOR]]		; GFX6-IR-NEXT: [[SUB:%.*]] = fsub <2 x half> [[X]], [[FLOOR]]
; IR-NEXT: [[MIN:%.*]] = tail call <2 x half> @llvm.minnum.v2f16(<2 x half> [[SUB]], <2 x half> <half 0xH3BFF, half 0xH3BFF>)		; GFX6-IR-NEXT: [[MIN:%.*]] = tail call <2 x half> @llvm.minnum.v2f16(<2 x half> [[SUB]], <2 x half> <half 0xH3BFF, half 0xH3BFF>)
; IR-NEXT: ret <2 x half> [[MIN]]		; GFX6-IR-NEXT: ret <2 x half> [[MIN]]
		;
		; GFX7-IR-LABEL: define <2 x half> @basic_fract_v2f16_nonan
		; GFX7-IR-SAME: (<2 x half> nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
		; GFX7-IR-NEXT: entry:
		; GFX7-IR-NEXT: [[FLOOR:%.*]] = tail call <2 x half> @llvm.floor.v2f16(<2 x half> [[X]])
		; GFX7-IR-NEXT: [[SUB:%.*]] = fsub <2 x half> [[X]], [[FLOOR]]
		; GFX7-IR-NEXT: [[MIN:%.*]] = tail call <2 x half> @llvm.minnum.v2f16(<2 x half> [[SUB]], <2 x half> <half 0xH3BFF, half 0xH3BFF>)
		; GFX7-IR-NEXT: ret <2 x half> [[MIN]]
		;
		; IR-LEGALF16-LABEL: define <2 x half> @basic_fract_v2f16_nonan
		; IR-LEGALF16-SAME: (<2 x half> nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
		; IR-LEGALF16-NEXT: entry:
		; IR-LEGALF16-NEXT: [[TMP0:%.*]] = extractelement <2 x half> [[X]], i64 0
		; IR-LEGALF16-NEXT: [[TMP1:%.*]] = extractelement <2 x half> [[X]], i64 1
		; IR-LEGALF16-NEXT: [[TMP2:%.*]] = call nnan half @llvm.amdgcn.fract.f16(half [[TMP0]])
		; IR-LEGALF16-NEXT: [[TMP3:%.*]] = call nnan half @llvm.amdgcn.fract.f16(half [[TMP1]])
		; IR-LEGALF16-NEXT: [[TMP4:%.*]] = insertelement <2 x half> poison, half [[TMP2]], i64 0
		; IR-LEGALF16-NEXT: [[MIN:%.*]] = insertelement <2 x half> [[TMP4]], half [[TMP3]], i64 1
		; IR-LEGALF16-NEXT: ret <2 x half> [[MIN]]
;		;
; GFX6-LABEL: basic_fract_v2f16_nonan:		; GFX6-LABEL: basic_fract_v2f16_nonan:
; GFX6: ; %bb.0: ; %entry		; GFX6: ; %bb.0: ; %entry
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0		; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1		; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0		; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1		; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1
Show All 18 Lines
; GFX7-NEXT: v_sub_f32_e32 v0, v0, v2		; GFX7-NEXT: v_sub_f32_e32 v0, v0, v2
; GFX7-NEXT: v_min_f32_e32 v0, 0x3f7fe000, v0		; GFX7-NEXT: v_min_f32_e32 v0, 0x3f7fe000, v0
; GFX7-NEXT: v_min_f32_e32 v1, 0x3f7fe000, v1		; GFX7-NEXT: v_min_f32_e32 v1, 0x3f7fe000, v1
; GFX7-NEXT: s_setpc_b64 s[30:31]		; GFX7-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX8-LABEL: basic_fract_v2f16_nonan:		; GFX8-LABEL: basic_fract_v2f16_nonan:
; GFX8: ; %bb.0: ; %entry		; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_floor_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1		; GFX8-NEXT: v_fract_f16_e32 v1, v0
; GFX8-NEXT: v_floor_f16_e32 v2, v0		; GFX8-NEXT: v_fract_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX8-NEXT: v_pack_b32_f16 v1, v2, v1		; GFX8-NEXT: v_pack_b32_f16 v0, v1, v0
; GFX8-NEXT: v_pk_add_f16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1]
; GFX8-NEXT: s_movk_i32 s4, 0x3bff
; GFX8-NEXT: v_pk_min_f16 v0, v0, s4 op_sel_hi:[1,0]
; GFX8-NEXT: s_setpc_b64 s[30:31]		; GFX8-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX11-LABEL: basic_fract_v2f16_nonan:		; GFX11-LABEL: basic_fract_v2f16_nonan:
; GFX11: ; %bb.0: ; %entry		; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0		; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0		; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX11-NEXT: v_floor_f16_e32 v2, v0		; GFX11-NEXT: v_fract_f16_e32 v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) \| instskip(NEXT) \| instid1(VALU_DEP_1)		; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) \| instskip(NEXT) \| instid1(VALU_DEP_1)
; GFX11-NEXT: v_floor_f16_e32 v1, v1		; GFX11-NEXT: v_fract_f16_e32 v1, v1
; GFX11-NEXT: v_pack_b32_f16 v1, v2, v1		; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) \| instskip(NEXT) \| instid1(VALU_DEP_1)
; GFX11-NEXT: v_pk_add_f16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1]
; GFX11-NEXT: v_pk_min_f16 v0, 0x3bff, v0 op_sel_hi:[0,1]
; GFX11-NEXT: s_setpc_b64 s[30:31]		; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:		entry:
%floor = tail call <2 x half> @llvm.floor.v2f16(<2 x half> %x)		%floor = tail call <2 x half> @llvm.floor.v2f16(<2 x half> %x)
%sub = fsub <2 x half> %x, %floor		%sub = fsub <2 x half> %x, %floor
%min = tail call <2 x half> @llvm.minnum.v2f16(<2 x half> %sub, <2 x half> <half 0xH3BFF, half 0xH3BFF>)		%min = tail call <2 x half> @llvm.minnum.v2f16(<2 x half> %sub, <2 x half> <half 0xH3BFF, half 0xH3BFF>)
ret <2 x half> %min		ret <2 x half> %min
}		}

define double @basic_fract_f64_nonans(double nofpclass(nan) %x) {		define double @basic_fract_f64_nanans(double nofpclass(nan) %x) {
; IR-LABEL: define double @basic_fract_f64_nonans		; GFX6-IR-LABEL: define double @basic_fract_f64_nanans
; IR-SAME: (double nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {		; GFX6-IR-SAME: (double nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
; IR-NEXT: entry:		; GFX6-IR-NEXT: entry:
; IR-NEXT: [[FLOOR:%.*]] = tail call double @llvm.floor.f64(double [[X]])		; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call double @llvm.floor.f64(double [[X]])
; IR-NEXT: [[SUB:%.*]] = fsub double [[X]], [[FLOOR]]		; GFX6-IR-NEXT: [[SUB:%.*]] = fsub double [[X]], [[FLOOR]]
; IR-NEXT: [[MIN:%.*]] = tail call double @llvm.minnum.f64(double [[SUB]], double 0x3FEFFFFFFFFFFFFF)		; GFX6-IR-NEXT: [[MIN:%.*]] = tail call double @llvm.minnum.f64(double [[SUB]], double 0x3FEFFFFFFFFFFFFF)
; IR-NEXT: ret double [[MIN]]		; GFX6-IR-NEXT: ret double [[MIN]]
		;
		; IR-FRACT-LABEL: define double @basic_fract_f64_nanans
		; IR-FRACT-SAME: (double nofpclass(nan) [[X:%.*]]) #[[ATTR0]] {
		; IR-FRACT-NEXT: entry:
		; IR-FRACT-NEXT: [[MIN:%.*]] = call nnan double @llvm.amdgcn.fract.f64(double [[X]])
		; IR-FRACT-NEXT: ret double [[MIN]]
;		;
; GFX6-LABEL: basic_fract_f64_nonans:		; GFX6-LABEL: basic_fract_f64_nanans:
; GFX6: ; %bb.0: ; %entry		; GFX6: ; %bb.0: ; %entry
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_fract_f64_e32 v[2:3], v[0:1]		; GFX6-NEXT: v_fract_f64_e32 v[2:3], v[0:1]
; GFX6-NEXT: v_mov_b32_e32 v4, -1		; GFX6-NEXT: v_mov_b32_e32 v4, -1
; GFX6-NEXT: v_mov_b32_e32 v5, 0x3fefffff		; GFX6-NEXT: v_mov_b32_e32 v5, 0x3fefffff
; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5]		; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5]
; GFX6-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 3		; GFX6-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 3
; GFX6-NEXT: s_mov_b32 s4, -1		; GFX6-NEXT: s_mov_b32 s4, -1
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc		; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc		; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
; GFX6-NEXT: v_add_f64 v[2:3], v[0:1], -v[2:3]		; GFX6-NEXT: v_add_f64 v[2:3], v[0:1], -v[2:3]
; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff		; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff
; GFX6-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3]		; GFX6-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3]
; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], s[4:5]		; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], s[4:5]
; GFX6-NEXT: s_setpc_b64 s[30:31]		; GFX6-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX7-LABEL: basic_fract_f64_nonans:		; GFX7-LABEL: basic_fract_f64_nanans:
; GFX7: ; %bb.0: ; %entry		; GFX7: ; %bb.0: ; %entry
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_floor_f64_e32 v[2:3], v[0:1]		; GFX7-NEXT: v_fract_f64_e32 v[0:1], v[0:1]
; GFX7-NEXT: s_mov_b32 s4, -1
; GFX7-NEXT: s_mov_b32 s5, 0x3fefffff
; GFX7-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3]
; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], s[4:5]
; GFX7-NEXT: s_setpc_b64 s[30:31]		; GFX7-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX8-LABEL: basic_fract_f64_nonans:		; GFX8-LABEL: basic_fract_f64_nanans:
; GFX8: ; %bb.0: ; %entry		; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_floor_f64_e32 v[2:3], v[0:1]		; GFX8-NEXT: v_fract_f64_e32 v[0:1], v[0:1]
; GFX8-NEXT: s_mov_b32 s4, -1
; GFX8-NEXT: s_mov_b32 s5, 0x3fefffff
; GFX8-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3]
; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], s[4:5]
; GFX8-NEXT: s_setpc_b64 s[30:31]		; GFX8-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX11-LABEL: basic_fract_f64_nonans:		; GFX11-LABEL: basic_fract_f64_nanans:
; GFX11: ; %bb.0: ; %entry		; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0		; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_floor_f64_e32 v[2:3], v[0:1]		; GFX11-NEXT: v_fract_f64_e32 v[0:1], v[0:1]
; GFX11-NEXT: s_mov_b32 s0, -1
; GFX11-NEXT: s_mov_b32 s1, 0x3fefffff
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) \| instskip(NEXT) \| instid1(VALU_DEP_1)
; GFX11-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3]
; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], s[0:1]
; GFX11-NEXT: s_setpc_b64 s[30:31]		; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:		entry:
%floor = tail call double @llvm.floor.f64(double %x)		%floor = tail call double @llvm.floor.f64(double %x)
%sub = fsub double %x, %floor		%sub = fsub double %x, %floor
%min = tail call double @llvm.minnum.f64(double %sub, double 0x3FEFFFFFFFFFFFFF)		%min = tail call double @llvm.minnum.f64(double %sub, double 0x3FEFFFFFFFFFFFFF)
ret double %min		ret double %min
}		}

define half @safe_math_fract_f16_noinf_check(half %x, ptr addrspace(1) nocapture writeonly %ip) {		define half @safe_math_fract_f16_noinf_check(half %x, ptr addrspace(1) nocapture writeonly %ip) {
; IR-LABEL: define half @safe_math_fract_f16_noinf_check		; GFX6-IR-LABEL: define half @safe_math_fract_f16_noinf_check
; IR-SAME: (half [[X:%.]], ptr addrspace(1) nocapture writeonly [[IP:%.]]) #[[ATTR0]] {		; GFX6-IR-SAME: (half [[X:%.]], ptr addrspace(1) nocapture writeonly [[IP:%.]]) #[[ATTR0]] {
; IR-NEXT: entry:		; GFX6-IR-NEXT: entry:
; IR-NEXT: [[FLOOR:%.*]] = tail call half @llvm.floor.f16(half [[X]])		; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call half @llvm.floor.f16(half [[X]])
; IR-NEXT: [[SUB:%.*]] = fsub half [[X]], [[FLOOR]]		; GFX6-IR-NEXT: [[SUB:%.*]] = fsub half [[X]], [[FLOOR]]
; IR-NEXT: [[MIN:%.*]] = tail call half @llvm.minnum.f16(half [[SUB]], half 0xH3BFF)		; GFX6-IR-NEXT: [[MIN:%.*]] = tail call half @llvm.minnum.f16(half [[SUB]], half 0xH3BFF)
; IR-NEXT: [[UNO:%.*]] = fcmp uno half [[X]], 0xH0000		; GFX6-IR-NEXT: [[UNO:%.*]] = fcmp uno half [[X]], 0xH0000
; IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], half [[X]], half [[MIN]]		; GFX6-IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], half [[X]], half [[MIN]]
; IR-NEXT: store half [[FLOOR]], ptr addrspace(1) [[IP]], align 4		; GFX6-IR-NEXT: store half [[FLOOR]], ptr addrspace(1) [[IP]], align 4
; IR-NEXT: ret half [[COND]]		; GFX6-IR-NEXT: ret half [[COND]]
		;
		; GFX7-IR-LABEL: define half @safe_math_fract_f16_noinf_check
		; GFX7-IR-SAME: (half [[X:%.]], ptr addrspace(1) nocapture writeonly [[IP:%.]]) #[[ATTR0]] {
		; GFX7-IR-NEXT: entry:
		; GFX7-IR-NEXT: [[FLOOR:%.*]] = tail call half @llvm.floor.f16(half [[X]])
		; GFX7-IR-NEXT: [[SUB:%.*]] = fsub half [[X]], [[FLOOR]]
		; GFX7-IR-NEXT: [[MIN:%.*]] = tail call half @llvm.minnum.f16(half [[SUB]], half 0xH3BFF)
		; GFX7-IR-NEXT: [[UNO:%.*]] = fcmp uno half [[X]], 0xH0000
		; GFX7-IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], half [[X]], half [[MIN]]
		; GFX7-IR-NEXT: store half [[FLOOR]], ptr addrspace(1) [[IP]], align 4
		; GFX7-IR-NEXT: ret half [[COND]]
		;
		; IR-LEGALF16-LABEL: define half @safe_math_fract_f16_noinf_check
		; IR-LEGALF16-SAME: (half [[X:%.]], ptr addrspace(1) nocapture writeonly [[IP:%.]]) #[[ATTR0]] {
		; IR-LEGALF16-NEXT: entry:
		; IR-LEGALF16-NEXT: [[FLOOR:%.*]] = tail call half @llvm.floor.f16(half [[X]])
		; IR-LEGALF16-NEXT: [[COND:%.*]] = call half @llvm.amdgcn.fract.f16(half [[X]])
		; IR-LEGALF16-NEXT: store half [[FLOOR]], ptr addrspace(1) [[IP]], align 4
		; IR-LEGALF16-NEXT: ret half [[COND]]
;		;
; GFX6-LABEL: safe_math_fract_f16_noinf_check:		; GFX6-LABEL: safe_math_fract_f16_noinf_check:
; GFX6: ; %bb.0: ; %entry		; GFX6: ; %bb.0: ; %entry
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0		; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX6-NEXT: s_mov_b32 s6, 0		; GFX6-NEXT: s_mov_b32 s6, 0
; GFX6-NEXT: s_mov_b32 s7, 0xf000		; GFX6-NEXT: s_mov_b32 s7, 0xf000
; GFX6-NEXT: s_mov_b32 s4, s6		; GFX6-NEXT: s_mov_b32 s4, s6
Show All 27 Lines
; GFX7-NEXT: buffer_store_short v3, v[1:2], s[4:7], 0 addr64		; GFX7-NEXT: buffer_store_short v3, v[1:2], s[4:7], 0 addr64
; GFX7-NEXT: s_waitcnt vmcnt(0)		; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: s_setpc_b64 s[30:31]		; GFX7-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX8-LABEL: safe_math_fract_f16_noinf_check:		; GFX8-LABEL: safe_math_fract_f16_noinf_check:
; GFX8: ; %bb.0: ; %entry		; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_floor_f16_e32 v3, v0		; GFX8-NEXT: v_floor_f16_e32 v3, v0
; GFX8-NEXT: v_sub_f16_e32 v4, v0, v3		; GFX8-NEXT: v_fract_f16_e32 v0, v0
; GFX8-NEXT: v_min_f16_e32 v4, 0x3bff, v4
; GFX8-NEXT: v_cmp_u_f16_e32 vcc, v0, v0
; GFX8-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
; GFX8-NEXT: global_store_short v[1:2], v3, off		; GFX8-NEXT: global_store_short v[1:2], v3, off
; GFX8-NEXT: s_waitcnt vmcnt(0)		; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]		; GFX8-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX11-LABEL: safe_math_fract_f16_noinf_check:		; GFX11-LABEL: safe_math_fract_f16_noinf_check:
; GFX11: ; %bb.0: ; %entry		; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0		; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_floor_f16_e32 v3, v0		; GFX11-NEXT: v_floor_f16_e32 v3, v0
; GFX11-NEXT: v_cmp_u_f16_e32 vcc_lo, v0, v0		; GFX11-NEXT: v_fract_f16_e32 v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) \| instskip(SKIP_2) \| instid1(VALU_DEP_1)
; GFX11-NEXT: v_sub_f16_e32 v4, v0, v3
; GFX11-NEXT: global_store_b16 v[1:2], v3, off		; GFX11-NEXT: global_store_b16 v[1:2], v3, off
; GFX11-NEXT: v_min_f16_e32 v4, 0x3bff, v4
; GFX11-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0		; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]		; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:		entry:
%floor = tail call half @llvm.floor.f16(half %x)		%floor = tail call half @llvm.floor.f16(half %x)
%sub = fsub half %x, %floor		%sub = fsub half %x, %floor
%min = tail call half @llvm.minnum.f16(half %sub, half 0xH3BFF)		%min = tail call half @llvm.minnum.f16(half %sub, half 0xH3BFF)
%uno = fcmp uno half %x, 0.000000e+00		%uno = fcmp uno half %x, 0.000000e+00
%cond = select i1 %uno, half %x, half %min		%cond = select i1 %uno, half %x, half %min
store half %floor, ptr addrspace(1) %ip, align 4		store half %floor, ptr addrspace(1) %ip, align 4
ret half %cond		ret half %cond
}		}

define double @safe_math_fract_f64_noinf_check(double %x, ptr addrspace(1) nocapture writeonly %ip) {		define double @safe_math_fract_f64_noinf_check(double %x, ptr addrspace(1) nocapture writeonly %ip) {
; IR-LABEL: define double @safe_math_fract_f64_noinf_check		; GFX6-IR-LABEL: define double @safe_math_fract_f64_noinf_check
; IR-SAME: (double [[X:%.]], ptr addrspace(1) nocapture writeonly [[IP:%.]]) #[[ATTR0]] {		; GFX6-IR-SAME: (double [[X:%.]], ptr addrspace(1) nocapture writeonly [[IP:%.]]) #[[ATTR0]] {
; IR-NEXT: entry:		; GFX6-IR-NEXT: entry:
; IR-NEXT: [[FLOOR:%.*]] = tail call double @llvm.floor.f64(double [[X]])		; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call double @llvm.floor.f64(double [[X]])
; IR-NEXT: [[SUB:%.*]] = fsub double [[X]], [[FLOOR]]		; GFX6-IR-NEXT: [[SUB:%.*]] = fsub double [[X]], [[FLOOR]]
; IR-NEXT: [[MIN:%.*]] = tail call double @llvm.minnum.f64(double [[SUB]], double 0x3FEFFFFFFFFFFFFF)		; GFX6-IR-NEXT: [[MIN:%.*]] = tail call double @llvm.minnum.f64(double [[SUB]], double 0x3FEFFFFFFFFFFFFF)
; IR-NEXT: [[UNO:%.*]] = fcmp uno double [[X]], 0.000000e+00		; GFX6-IR-NEXT: [[UNO:%.*]] = fcmp uno double [[X]], 0.000000e+00
; IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], double [[X]], double [[MIN]]		; GFX6-IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], double [[X]], double [[MIN]]
; IR-NEXT: store double [[FLOOR]], ptr addrspace(1) [[IP]], align 4		; GFX6-IR-NEXT: store double [[FLOOR]], ptr addrspace(1) [[IP]], align 4
; IR-NEXT: ret double [[COND]]		; GFX6-IR-NEXT: ret double [[COND]]
		;
		; IR-FRACT-LABEL: define double @safe_math_fract_f64_noinf_check
		; IR-FRACT-SAME: (double [[X:%.]], ptr addrspace(1) nocapture writeonly [[IP:%.]]) #[[ATTR0]] {
		; IR-FRACT-NEXT: entry:
		; IR-FRACT-NEXT: [[FLOOR:%.*]] = tail call double @llvm.floor.f64(double [[X]])
		; IR-FRACT-NEXT: [[COND:%.*]] = call double @llvm.amdgcn.fract.f64(double [[X]])
		; IR-FRACT-NEXT: store double [[FLOOR]], ptr addrspace(1) [[IP]], align 4
		; IR-FRACT-NEXT: ret double [[COND]]
;		;
; GFX6-LABEL: safe_math_fract_f64_noinf_check:		; GFX6-LABEL: safe_math_fract_f64_noinf_check:
; GFX6: ; %bb.0: ; %entry		; GFX6: ; %bb.0: ; %entry
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_fract_f64_e32 v[4:5], v[0:1]		; GFX6-NEXT: v_fract_f64_e32 v[4:5], v[0:1]
; GFX6-NEXT: v_mov_b32_e32 v6, -1		; GFX6-NEXT: v_mov_b32_e32 v6, -1
; GFX6-NEXT: v_mov_b32_e32 v7, 0x3fefffff		; GFX6-NEXT: v_mov_b32_e32 v7, 0x3fefffff
; GFX6-NEXT: v_min_f64 v[4:5], v[4:5], v[6:7]		; GFX6-NEXT: v_min_f64 v[4:5], v[4:5], v[6:7]
Show All 15 Lines
; GFX6-NEXT: buffer_store_dwordx2 v[4:5], v[2:3], s[4:7], 0 addr64		; GFX6-NEXT: buffer_store_dwordx2 v[4:5], v[2:3], s[4:7], 0 addr64
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)		; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT: s_setpc_b64 s[30:31]		; GFX6-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX7-LABEL: safe_math_fract_f64_noinf_check:		; GFX7-LABEL: safe_math_fract_f64_noinf_check:
; GFX7: ; %bb.0: ; %entry		; GFX7: ; %bb.0: ; %entry
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_floor_f64_e32 v[4:5], v[0:1]		; GFX7-NEXT: v_floor_f64_e32 v[4:5], v[0:1]
; GFX7-NEXT: s_mov_b32 s4, -1		; GFX7-NEXT: v_fract_f64_e32 v[0:1], v[0:1]
; GFX7-NEXT: s_mov_b32 s5, 0x3fefffff
; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
; GFX7-NEXT: s_mov_b32 s6, 0		; GFX7-NEXT: s_mov_b32 s6, 0
; GFX7-NEXT: s_mov_b32 s7, 0xf000		; GFX7-NEXT: s_mov_b32 s7, 0xf000
; GFX7-NEXT: v_add_f64 v[6:7], v[0:1], -v[4:5]
; GFX7-NEXT: v_min_f64 v[6:7], v[6:7], s[4:5]
; GFX7-NEXT: s_mov_b32 s4, s6		; GFX7-NEXT: s_mov_b32 s4, s6
; GFX7-NEXT: s_mov_b32 s5, s6		; GFX7-NEXT: s_mov_b32 s5, s6
; GFX7-NEXT: buffer_store_dwordx2 v[4:5], v[2:3], s[4:7], 0 addr64		; GFX7-NEXT: buffer_store_dwordx2 v[4:5], v[2:3], s[4:7], 0 addr64
; GFX7-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
; GFX7-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
; GFX7-NEXT: s_waitcnt vmcnt(0)		; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: s_setpc_b64 s[30:31]		; GFX7-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX8-LABEL: safe_math_fract_f64_noinf_check:		; GFX8-LABEL: safe_math_fract_f64_noinf_check:
; GFX8: ; %bb.0: ; %entry		; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_floor_f64_e32 v[4:5], v[0:1]		; GFX8-NEXT: v_floor_f64_e32 v[4:5], v[0:1]
; GFX8-NEXT: s_mov_b32 s4, -1		; GFX8-NEXT: v_fract_f64_e32 v[0:1], v[0:1]
; GFX8-NEXT: s_mov_b32 s5, 0x3fefffff
; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
; GFX8-NEXT: v_add_f64 v[6:7], v[0:1], -v[4:5]
; GFX8-NEXT: global_store_dwordx2 v[2:3], v[4:5], off		; GFX8-NEXT: global_store_dwordx2 v[2:3], v[4:5], off
; GFX8-NEXT: v_min_f64 v[6:7], v[6:7], s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
; GFX8-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
; GFX8-NEXT: s_waitcnt vmcnt(0)		; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]		; GFX8-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX11-LABEL: safe_math_fract_f64_noinf_check:		; GFX11-LABEL: safe_math_fract_f64_noinf_check:
; GFX11: ; %bb.0: ; %entry		; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0		; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_floor_f64_e32 v[4:5], v[0:1]		; GFX11-NEXT: v_floor_f64_e32 v[4:5], v[0:1]
; GFX11-NEXT: s_mov_b32 s0, -1		; GFX11-NEXT: v_fract_f64_e32 v[0:1], v[0:1]
; GFX11-NEXT: s_mov_b32 s1, 0x3fefffff
; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) \| instskip(SKIP_2) \| instid1(VALU_DEP_1)
; GFX11-NEXT: v_add_f64 v[6:7], v[0:1], -v[4:5]
; GFX11-NEXT: global_store_b64 v[2:3], v[4:5], off		; GFX11-NEXT: global_store_b64 v[2:3], v[4:5], off
; GFX11-NEXT: v_min_f64 v[6:7], v[6:7], s[0:1]
; GFX11-NEXT: v_dual_cndmask_b32 v0, v6, v0 :: v_dual_cndmask_b32 v1, v7, v1
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0		; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]		; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:		entry:
%floor = tail call double @llvm.floor.f64(double %x)		%floor = tail call double @llvm.floor.f64(double %x)
%sub = fsub double %x, %floor		%sub = fsub double %x, %floor
%min = tail call double @llvm.minnum.f64(double %sub, double 0x3FEFFFFFFFFFFFFF)		%min = tail call double @llvm.minnum.f64(double %sub, double 0x3FEFFFFFFFFFFFFF)
%uno = fcmp uno double %x, 0.000000e+00		%uno = fcmp uno double %x, 0.000000e+00
%cond = select i1 %uno, double %x, double %min		%cond = select i1 %uno, double %x, double %min
store double %floor, ptr addrspace(1) %ip, align 4		store double %floor, ptr addrspace(1) %ip, align 4
ret double %cond		ret double %cond
}		}

define float @select_nan_fract_f32_flags_select(float %x) {		define float @select_nan_fract_f32_flags_select(float %x) {
; IR-LABEL: define float @select_nan_fract_f32_flags_select		; GFX6-IR-LABEL: define float @select_nan_fract_f32_flags_select
; IR-SAME: (float [[X:%.*]]) #[[ATTR0]] {		; GFX6-IR-SAME: (float [[X:%.*]]) #[[ATTR0]] {
; IR-NEXT: entry:		; GFX6-IR-NEXT: entry:
; IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])		; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
; IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]		; GFX6-IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
; IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)		; GFX6-IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
; IR-NEXT: [[UNO:%.*]] = fcmp uno float [[X]], 0.000000e+00		; GFX6-IR-NEXT: [[UNO:%.*]] = fcmp uno float [[X]], 0.000000e+00
; IR-NEXT: [[COND:%.*]] = select nsz i1 [[UNO]], float [[X]], float [[MIN]]		; GFX6-IR-NEXT: [[COND:%.*]] = select nsz i1 [[UNO]], float [[X]], float [[MIN]]
; IR-NEXT: ret float [[COND]]		; GFX6-IR-NEXT: ret float [[COND]]
		;
		; IR-FRACT-LABEL: define float @select_nan_fract_f32_flags_select
		; IR-FRACT-SAME: (float [[X:%.*]]) #[[ATTR0]] {
		; IR-FRACT-NEXT: entry:
		; IR-FRACT-NEXT: [[COND:%.*]] = call nsz float @llvm.amdgcn.fract.f32(float [[X]])
		; IR-FRACT-NEXT: ret float [[COND]]
;		;
; GFX6-LABEL: select_nan_fract_f32_flags_select:		; GFX6-LABEL: select_nan_fract_f32_flags_select:
; GFX6: ; %bb.0: ; %entry		; GFX6: ; %bb.0: ; %entry
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_floor_f32_e32 v1, v0		; GFX6-NEXT: v_floor_f32_e32 v1, v0
; GFX6-NEXT: v_sub_f32_e32 v1, v0, v1		; GFX6-NEXT: v_sub_f32_e32 v1, v0, v1
; GFX6-NEXT: v_min_f32_e32 v1, 0x3f7fffff, v1		; GFX6-NEXT: v_min_f32_e32 v1, 0x3f7fffff, v1
; GFX6-NEXT: v_cmp_u_f32_e32 vcc, v0, v0		; GFX6-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
; GFX6-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc		; GFX6-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX6-NEXT: s_setpc_b64 s[30:31]		; GFX6-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX7-LABEL: select_nan_fract_f32_flags_select:		; GFX7-LABEL: select_nan_fract_f32_flags_select:
; GFX7: ; %bb.0: ; %entry		; GFX7: ; %bb.0: ; %entry
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_floor_f32_e32 v1, v0		; GFX7-NEXT: v_fract_f32_e32 v0, v0
; GFX7-NEXT: v_sub_f32_e32 v1, v0, v1
; GFX7-NEXT: v_min_f32_e32 v1, 0x3f7fffff, v1
; GFX7-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
; GFX7-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX7-NEXT: s_setpc_b64 s[30:31]		; GFX7-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX8-LABEL: select_nan_fract_f32_flags_select:		; GFX8-LABEL: select_nan_fract_f32_flags_select:
; GFX8: ; %bb.0: ; %entry		; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_floor_f32_e32 v1, v0		; GFX8-NEXT: v_fract_f32_e32 v0, v0
; GFX8-NEXT: v_sub_f32_e32 v1, v0, v1
; GFX8-NEXT: v_min_f32_e32 v1, 0x3f7fffff, v1
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]		; GFX8-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX11-LABEL: select_nan_fract_f32_flags_select:		; GFX11-LABEL: select_nan_fract_f32_flags_select:
; GFX11: ; %bb.0: ; %entry		; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0		; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_floor_f32_e32 v1, v0		; GFX11-NEXT: v_fract_f32_e32 v0, v0
; GFX11-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) \| instskip(NEXT) \| instid1(VALU_DEP_1)
; GFX11-NEXT: v_sub_f32_e32 v1, v0, v1
; GFX11-NEXT: v_min_f32_e32 v1, 0x3f7fffff, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]		; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:		entry:
%floor = tail call float @llvm.floor.f32(float %x)		%floor = tail call float @llvm.floor.f32(float %x)
%sub = fsub float %x, %floor		%sub = fsub float %x, %floor
%min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)		%min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
%uno = fcmp uno float %x, 0.000000e+00		%uno = fcmp uno float %x, 0.000000e+00
%cond = select nsz i1 %uno, float %x, float %min		%cond = select nsz i1 %uno, float %x, float %min
ret float %cond		ret float %cond
}		}

define float @select_nan_fract_f32_flags_minnum(float %x) {		define float @select_nan_fract_f32_flags_minnum(float %x) {
; IR-LABEL: define float @select_nan_fract_f32_flags_minnum		; GFX6-IR-LABEL: define float @select_nan_fract_f32_flags_minnum
; IR-SAME: (float [[X:%.*]]) #[[ATTR0]] {		; GFX6-IR-SAME: (float [[X:%.*]]) #[[ATTR0]] {
; IR-NEXT: entry:		; GFX6-IR-NEXT: entry:
; IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])		; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
; IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]		; GFX6-IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
; IR-NEXT: [[MIN:%.*]] = tail call nsz float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)		; GFX6-IR-NEXT: [[MIN:%.*]] = tail call nsz float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
; IR-NEXT: [[UNO:%.*]] = fcmp uno float [[X]], 0.000000e+00		; GFX6-IR-NEXT: [[UNO:%.*]] = fcmp uno float [[X]], 0.000000e+00
; IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], float [[X]], float [[MIN]]		; GFX6-IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], float [[X]], float [[MIN]]
; IR-NEXT: ret float [[COND]]		; GFX6-IR-NEXT: ret float [[COND]]
		;
		; IR-FRACT-LABEL: define float @select_nan_fract_f32_flags_minnum
		; IR-FRACT-SAME: (float [[X:%.*]]) #[[ATTR0]] {
		; IR-FRACT-NEXT: entry:
		; IR-FRACT-NEXT: [[COND:%.*]] = call float @llvm.amdgcn.fract.f32(float [[X]])
		; IR-FRACT-NEXT: ret float [[COND]]
;		;
; GFX6-LABEL: select_nan_fract_f32_flags_minnum:		; GFX6-LABEL: select_nan_fract_f32_flags_minnum:
; GFX6: ; %bb.0: ; %entry		; GFX6: ; %bb.0: ; %entry
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_floor_f32_e32 v1, v0		; GFX6-NEXT: v_floor_f32_e32 v1, v0
; GFX6-NEXT: v_sub_f32_e32 v1, v0, v1		; GFX6-NEXT: v_sub_f32_e32 v1, v0, v1
; GFX6-NEXT: v_min_f32_e32 v1, 0x3f7fffff, v1		; GFX6-NEXT: v_min_f32_e32 v1, 0x3f7fffff, v1
; GFX6-NEXT: v_cmp_u_f32_e32 vcc, v0, v0		; GFX6-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
; GFX6-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc		; GFX6-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX6-NEXT: s_setpc_b64 s[30:31]		; GFX6-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX7-LABEL: select_nan_fract_f32_flags_minnum:		; GFX7-LABEL: select_nan_fract_f32_flags_minnum:
; GFX7: ; %bb.0: ; %entry		; GFX7: ; %bb.0: ; %entry
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_floor_f32_e32 v1, v0		; GFX7-NEXT: v_fract_f32_e32 v0, v0
; GFX7-NEXT: v_sub_f32_e32 v1, v0, v1
; GFX7-NEXT: v_min_f32_e32 v1, 0x3f7fffff, v1
; GFX7-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
; GFX7-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX7-NEXT: s_setpc_b64 s[30:31]		; GFX7-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX8-LABEL: select_nan_fract_f32_flags_minnum:		; GFX8-LABEL: select_nan_fract_f32_flags_minnum:
; GFX8: ; %bb.0: ; %entry		; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_floor_f32_e32 v1, v0		; GFX8-NEXT: v_fract_f32_e32 v0, v0
; GFX8-NEXT: v_sub_f32_e32 v1, v0, v1
; GFX8-NEXT: v_min_f32_e32 v1, 0x3f7fffff, v1
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]		; GFX8-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX11-LABEL: select_nan_fract_f32_flags_minnum:		; GFX11-LABEL: select_nan_fract_f32_flags_minnum:
; GFX11: ; %bb.0: ; %entry		; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0		; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_floor_f32_e32 v1, v0		; GFX11-NEXT: v_fract_f32_e32 v0, v0
; GFX11-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) \| instskip(NEXT) \| instid1(VALU_DEP_1)
; GFX11-NEXT: v_sub_f32_e32 v1, v0, v1
; GFX11-NEXT: v_min_f32_e32 v1, 0x3f7fffff, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]		; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:		entry:
%floor = tail call float @llvm.floor.f32(float %x)		%floor = tail call float @llvm.floor.f32(float %x)
%sub = fsub float %x, %floor		%sub = fsub float %x, %floor
%min = tail call nsz float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)		%min = tail call nsz float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000)
%uno = fcmp uno float %x, 0.000000e+00		%uno = fcmp uno float %x, 0.000000e+00
%cond = select i1 %uno, float %x, float %min		%cond = select i1 %uno, float %x, float %min
ret float %cond		ret float %cond
}		}

define <2 x float> @safe_math_fract_v2f32(<2 x float> %x, ptr addrspace(1) nocapture writeonly %ip) {		define <2 x float> @safe_math_fract_v2f32(<2 x float> %x, ptr addrspace(1) nocapture writeonly %ip) {
; IR-LABEL: define <2 x float> @safe_math_fract_v2f32		; GFX6-IR-LABEL: define <2 x float> @safe_math_fract_v2f32
; IR-SAME: (<2 x float> [[X:%.]], ptr addrspace(1) nocapture writeonly [[IP:%.]]) #[[ATTR0]] {		; GFX6-IR-SAME: (<2 x float> [[X:%.]], ptr addrspace(1) nocapture writeonly [[IP:%.]]) #[[ATTR0]] {
; IR-NEXT: entry:		; GFX6-IR-NEXT: entry:
; IR-NEXT: [[FLOOR:%.*]] = tail call <2 x float> @llvm.floor.v2f32(<2 x float> [[X]])		; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call <2 x float> @llvm.floor.v2f32(<2 x float> [[X]])
; IR-NEXT: [[SUB:%.*]] = fsub <2 x float> [[X]], [[FLOOR]]		; GFX6-IR-NEXT: [[SUB:%.*]] = fsub <2 x float> [[X]], [[FLOOR]]
; IR-NEXT: [[MIN:%.*]] = tail call <2 x float> @llvm.minnum.v2f32(<2 x float> [[SUB]], <2 x float> <float 0x3FEFFFFFE0000000, float 0x3FEFFFFFE0000000>)		; GFX6-IR-NEXT: [[MIN:%.*]] = tail call <2 x float> @llvm.minnum.v2f32(<2 x float> [[SUB]], <2 x float> <float 0x3FEFFFFFE0000000, float 0x3FEFFFFFE0000000>)
; IR-NEXT: [[UNO:%.*]] = fcmp uno <2 x float> [[X]], zeroinitializer		; GFX6-IR-NEXT: [[UNO:%.*]] = fcmp uno <2 x float> [[X]], zeroinitializer
; IR-NEXT: [[COND:%.*]] = select <2 x i1> [[UNO]], <2 x float> [[X]], <2 x float> [[MIN]]		; GFX6-IR-NEXT: [[COND:%.*]] = select <2 x i1> [[UNO]], <2 x float> [[X]], <2 x float> [[MIN]]
; IR-NEXT: [[FABS:%.*]] = tail call <2 x float> @llvm.fabs.v2f32(<2 x float> [[X]])		; GFX6-IR-NEXT: [[FABS:%.*]] = tail call <2 x float> @llvm.fabs.v2f32(<2 x float> [[X]])
; IR-NEXT: [[CMPINF:%.*]] = fcmp oeq <2 x float> [[FABS]], <float 0x7FF0000000000000, float 0x7FF0000000000000>		; GFX6-IR-NEXT: [[CMPINF:%.*]] = fcmp oeq <2 x float> [[FABS]], <float 0x7FF0000000000000, float 0x7FF0000000000000>
; IR-NEXT: [[COND6:%.*]] = select <2 x i1> [[CMPINF]], <2 x float> zeroinitializer, <2 x float> [[COND]]		; GFX6-IR-NEXT: [[COND6:%.*]] = select <2 x i1> [[CMPINF]], <2 x float> zeroinitializer, <2 x float> [[COND]]
; IR-NEXT: store <2 x float> [[FLOOR]], ptr addrspace(1) [[IP]], align 4		; GFX6-IR-NEXT: store <2 x float> [[FLOOR]], ptr addrspace(1) [[IP]], align 4
; IR-NEXT: ret <2 x float> [[COND6]]		; GFX6-IR-NEXT: ret <2 x float> [[COND6]]
		;
		; IR-FRACT-LABEL: define <2 x float> @safe_math_fract_v2f32
		; IR-FRACT-SAME: (<2 x float> [[X:%.]], ptr addrspace(1) nocapture writeonly [[IP:%.]]) #[[ATTR0]] {
		; IR-FRACT-NEXT: entry:
		; IR-FRACT-NEXT: [[FLOOR:%.*]] = tail call <2 x float> @llvm.floor.v2f32(<2 x float> [[X]])
		; IR-FRACT-NEXT: [[TMP0:%.*]] = extractelement <2 x float> [[X]], i64 0
		; IR-FRACT-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X]], i64 1
		; IR-FRACT-NEXT: [[TMP2:%.*]] = call float @llvm.amdgcn.fract.f32(float [[TMP0]])
		; IR-FRACT-NEXT: [[TMP3:%.*]] = call float @llvm.amdgcn.fract.f32(float [[TMP1]])
		; IR-FRACT-NEXT: [[TMP4:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i64 0
		; IR-FRACT-NEXT: [[COND:%.*]] = insertelement <2 x float> [[TMP4]], float [[TMP3]], i64 1
		; IR-FRACT-NEXT: [[FABS:%.*]] = tail call <2 x float> @llvm.fabs.v2f32(<2 x float> [[X]])
		; IR-FRACT-NEXT: [[CMPINF:%.*]] = fcmp oeq <2 x float> [[FABS]], <float 0x7FF0000000000000, float 0x7FF0000000000000>
		; IR-FRACT-NEXT: [[COND6:%.*]] = select <2 x i1> [[CMPINF]], <2 x float> zeroinitializer, <2 x float> [[COND]]
		; IR-FRACT-NEXT: store <2 x float> [[FLOOR]], ptr addrspace(1) [[IP]], align 4
		; IR-FRACT-NEXT: ret <2 x float> [[COND6]]
;		;
; GFX6-LABEL: safe_math_fract_v2f32:		; GFX6-LABEL: safe_math_fract_v2f32:
; GFX6: ; %bb.0: ; %entry		; GFX6: ; %bb.0: ; %entry
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_floor_f32_e32 v5, v1		; GFX6-NEXT: v_floor_f32_e32 v5, v1
; GFX6-NEXT: v_floor_f32_e32 v4, v0		; GFX6-NEXT: v_floor_f32_e32 v4, v0
; GFX6-NEXT: v_sub_f32_e32 v6, v1, v5		; GFX6-NEXT: v_sub_f32_e32 v6, v1, v5
; GFX6-NEXT: v_sub_f32_e32 v7, v0, v4		; GFX6-NEXT: v_sub_f32_e32 v7, v0, v4
Show All 14 Lines
; GFX6-NEXT: v_cndmask_b32_e64 v1, v6, 0, s[8:9]		; GFX6-NEXT: v_cndmask_b32_e64 v1, v6, 0, s[8:9]
; GFX6-NEXT: buffer_store_dwordx2 v[4:5], v[2:3], s[4:7], 0 addr64		; GFX6-NEXT: buffer_store_dwordx2 v[4:5], v[2:3], s[4:7], 0 addr64
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)		; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT: s_setpc_b64 s[30:31]		; GFX6-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX7-LABEL: safe_math_fract_v2f32:		; GFX7-LABEL: safe_math_fract_v2f32:
; GFX7: ; %bb.0: ; %entry		; GFX7: ; %bb.0: ; %entry
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_floor_f32_e32 v5, v1		; GFX7-NEXT: s_mov_b32 s8, 0x7f800000
; GFX7-NEXT: v_floor_f32_e32 v4, v0		; GFX7-NEXT: v_fract_f32_e32 v6, v0
; GFX7-NEXT: v_sub_f32_e32 v6, v1, v5		; GFX7-NEXT: v_cmp_neq_f32_e64 vcc, \|v0\|, s8
; GFX7-NEXT: v_sub_f32_e32 v7, v0, v4
; GFX7-NEXT: v_min_f32_e32 v6, 0x3f7fffff, v6
; GFX7-NEXT: v_cmp_u_f32_e32 vcc, v1, v1
; GFX7-NEXT: v_min_f32_e32 v7, 0x3f7fffff, v7
; GFX7-NEXT: v_cndmask_b32_e32 v6, v6, v1, vcc
; GFX7-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
; GFX7-NEXT: s_movk_i32 s10, 0x204
; GFX7-NEXT: v_cndmask_b32_e32 v7, v7, v0, vcc
; GFX7-NEXT: v_cmp_class_f32_e64 s[8:9], v0, s10
; GFX7-NEXT: s_mov_b32 s6, 0		; GFX7-NEXT: s_mov_b32 s6, 0
; GFX7-NEXT: v_cndmask_b32_e64 v0, v7, 0, s[8:9]		; GFX7-NEXT: v_floor_f32_e32 v4, v0
; GFX7-NEXT: v_cmp_class_f32_e64 s[8:9], v1, s10		; GFX7-NEXT: v_fract_f32_e32 v7, v1
		; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc
		; GFX7-NEXT: v_cmp_neq_f32_e64 vcc, \|v1\|, s8
; GFX7-NEXT: s_mov_b32 s7, 0xf000		; GFX7-NEXT: s_mov_b32 s7, 0xf000
; GFX7-NEXT: s_mov_b32 s4, s6		; GFX7-NEXT: s_mov_b32 s4, s6
; GFX7-NEXT: s_mov_b32 s5, s6		; GFX7-NEXT: s_mov_b32 s5, s6
; GFX7-NEXT: v_cndmask_b32_e64 v1, v6, 0, s[8:9]		; GFX7-NEXT: v_floor_f32_e32 v5, v1
		; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v7, vcc
; GFX7-NEXT: buffer_store_dwordx2 v[4:5], v[2:3], s[4:7], 0 addr64		; GFX7-NEXT: buffer_store_dwordx2 v[4:5], v[2:3], s[4:7], 0 addr64
; GFX7-NEXT: s_waitcnt vmcnt(0)		; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: s_setpc_b64 s[30:31]		; GFX7-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX8-LABEL: safe_math_fract_v2f32:		; GFX8-LABEL: safe_math_fract_v2f32:
; GFX8: ; %bb.0: ; %entry		; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_floor_f32_e32 v5, v1		; GFX8-NEXT: s_mov_b32 s4, 0x7f800000
		; GFX8-NEXT: v_fract_f32_e32 v6, v0
		; GFX8-NEXT: v_cmp_neq_f32_e64 vcc, \|v0\|, s4
; GFX8-NEXT: v_floor_f32_e32 v4, v0		; GFX8-NEXT: v_floor_f32_e32 v4, v0
; GFX8-NEXT: v_sub_f32_e32 v6, v1, v5		; GFX8-NEXT: v_fract_f32_e32 v7, v1
; GFX8-NEXT: v_sub_f32_e32 v7, v0, v4		; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc
; GFX8-NEXT: v_min_f32_e32 v6, 0x3f7fffff, v6		; GFX8-NEXT: v_cmp_neq_f32_e64 vcc, \|v1\|, s4
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v1, v1		; GFX8-NEXT: v_floor_f32_e32 v5, v1
; GFX8-NEXT: v_min_f32_e32 v7, 0x3f7fffff, v7		; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v7, vcc
; GFX8-NEXT: v_cndmask_b32_e32 v6, v6, v1, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
; GFX8-NEXT: s_movk_i32 s6, 0x204
; GFX8-NEXT: v_cndmask_b32_e32 v7, v7, v0, vcc
; GFX8-NEXT: v_cmp_class_f32_e64 s[4:5], v0, s6
; GFX8-NEXT: v_cndmask_b32_e64 v0, v7, 0, s[4:5]
; GFX8-NEXT: v_cmp_class_f32_e64 s[4:5], v1, s6
; GFX8-NEXT: v_cndmask_b32_e64 v1, v6, 0, s[4:5]
; GFX8-NEXT: global_store_dwordx2 v[2:3], v[4:5], off		; GFX8-NEXT: global_store_dwordx2 v[2:3], v[4:5], off
; GFX8-NEXT: s_waitcnt vmcnt(0)		; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]		; GFX8-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX11-LABEL: safe_math_fract_v2f32:		; GFX11-LABEL: safe_math_fract_v2f32:
; GFX11: ; %bb.0: ; %entry		; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0		; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
		; GFX11-NEXT: v_fract_f32_e32 v6, v0
		; GFX11-NEXT: v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, \|v0\|
		; GFX11-NEXT: v_fract_f32_e32 v7, v1
; GFX11-NEXT: v_floor_f32_e32 v4, v0		; GFX11-NEXT: v_floor_f32_e32 v4, v0
; GFX11-NEXT: v_floor_f32_e32 v5, v1		; GFX11-NEXT: v_floor_f32_e32 v5, v1
; GFX11-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0		; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc_lo
; GFX11-NEXT: v_cmp_class_f32_e64 s0, v0, 0x204		; GFX11-NEXT: v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, \|v1\|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) \| instskip(SKIP_2) \| instid1(VALU_DEP_1)
; GFX11-NEXT: v_dual_sub_f32 v6, v0, v4 :: v_dual_sub_f32 v7, v1, v5
; GFX11-NEXT: global_store_b64 v[2:3], v[4:5], off		; GFX11-NEXT: global_store_b64 v[2:3], v[4:5], off
; GFX11-NEXT: v_dual_min_f32 v6, 0x3f7fffff, v6 :: v_dual_min_f32 v7, 0x3f7fffff, v7		; GFX11-NEXT: v_cndmask_b32_e32 v1, 0, v7, vcc_lo
; GFX11-NEXT: v_cndmask_b32_e32 v6, v6, v0, vcc_lo
; GFX11-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) \| instskip(NEXT) \| instid1(VALU_DEP_3)
; GFX11-NEXT: v_cndmask_b32_e32 v7, v7, v1, vcc_lo
; GFX11-NEXT: v_cndmask_b32_e64 v0, v6, 0, s0
; GFX11-NEXT: v_cmp_class_f32_e64 s0, v1, 0x204
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cndmask_b32_e64 v1, v7, 0, s0
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0		; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]		; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:		entry:
%floor = tail call <2 x float> @llvm.floor.v2f32(<2 x float> %x)		%floor = tail call <2 x float> @llvm.floor.v2f32(<2 x float> %x)
%sub = fsub <2 x float> %x, %floor		%sub = fsub <2 x float> %x, %floor
%min = tail call <2 x float> @llvm.minnum.v2f32(<2 x float> %sub, <2 x float> <float 0x3FEFFFFFE0000000, float 0x3FEFFFFFE0000000>)		%min = tail call <2 x float> @llvm.minnum.v2f32(<2 x float> %sub, <2 x float> <float 0x3FEFFFFFE0000000, float 0x3FEFFFFFE0000000>)
%uno = fcmp uno <2 x float> %x, zeroinitializer		%uno = fcmp uno <2 x float> %x, zeroinitializer
%cond = select <2 x i1> %uno, <2 x float> %x, <2 x float> %min		%cond = select <2 x i1> %uno, <2 x float> %x, <2 x float> %min
%fabs = tail call <2 x float> @llvm.fabs.v2f32(<2 x float> %x)		%fabs = tail call <2 x float> @llvm.fabs.v2f32(<2 x float> %x)
%cmpinf = fcmp oeq <2 x float> %fabs, <float 0x7FF0000000000000, float 0x7FF0000000000000>		%cmpinf = fcmp oeq <2 x float> %fabs, <float 0x7FF0000000000000, float 0x7FF0000000000000>
%cond6 = select <2 x i1> %cmpinf, <2 x float> zeroinitializer, <2 x float> %cond		%cond6 = select <2 x i1> %cmpinf, <2 x float> zeroinitializer, <2 x float> %cond
store <2 x float> %floor, ptr addrspace(1) %ip, align 4		store <2 x float> %floor, ptr addrspace(1) %ip, align 4
ret <2 x float> %cond6		ret <2 x float> %cond6
}		}

define double @safe_math_fract_f64(double %x, ptr addrspace(1) nocapture writeonly %ip) {		define double @safe_math_fract_f64(double %x, ptr addrspace(1) nocapture writeonly %ip) {
; IR-LABEL: define double @safe_math_fract_f64		; GFX6-IR-LABEL: define double @safe_math_fract_f64
; IR-SAME: (double [[X:%.]], ptr addrspace(1) nocapture writeonly [[IP:%.]]) #[[ATTR0]] {		; GFX6-IR-SAME: (double [[X:%.]], ptr addrspace(1) nocapture writeonly [[IP:%.]]) #[[ATTR0]] {
; IR-NEXT: entry:		; GFX6-IR-NEXT: entry:
; IR-NEXT: [[FLOOR:%.*]] = tail call double @llvm.floor.f64(double [[X]])		; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call double @llvm.floor.f64(double [[X]])
; IR-NEXT: [[SUB:%.*]] = fsub double [[X]], [[FLOOR]]		; GFX6-IR-NEXT: [[SUB:%.*]] = fsub double [[X]], [[FLOOR]]
; IR-NEXT: [[MIN:%.*]] = tail call double @llvm.minnum.f64(double [[SUB]], double 0x3FEFFFFFFFFFFFFF)		; GFX6-IR-NEXT: [[MIN:%.*]] = tail call double @llvm.minnum.f64(double [[SUB]], double 0x3FEFFFFFFFFFFFFF)
; IR-NEXT: [[UNO:%.*]] = fcmp uno double [[X]], 0.000000e+00		; GFX6-IR-NEXT: [[UNO:%.*]] = fcmp uno double [[X]], 0.000000e+00
; IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], double [[X]], double [[MIN]]		; GFX6-IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], double [[X]], double [[MIN]]
; IR-NEXT: [[FABS:%.*]] = tail call double @llvm.fabs.f64(double [[X]])		; GFX6-IR-NEXT: [[FABS:%.*]] = tail call double @llvm.fabs.f64(double [[X]])
; IR-NEXT: [[CMPINF:%.*]] = fcmp oeq double [[FABS]], 0x7FF0000000000000		; GFX6-IR-NEXT: [[CMPINF:%.*]] = fcmp oeq double [[FABS]], 0x7FF0000000000000
; IR-NEXT: [[COND6:%.*]] = select i1 [[CMPINF]], double 0.000000e+00, double [[COND]]		; GFX6-IR-NEXT: [[COND6:%.*]] = select i1 [[CMPINF]], double 0.000000e+00, double [[COND]]
; IR-NEXT: store double [[FLOOR]], ptr addrspace(1) [[IP]], align 4		; GFX6-IR-NEXT: store double [[FLOOR]], ptr addrspace(1) [[IP]], align 4
; IR-NEXT: ret double [[COND6]]		; GFX6-IR-NEXT: ret double [[COND6]]
		;
		; IR-FRACT-LABEL: define double @safe_math_fract_f64
		; IR-FRACT-SAME: (double [[X:%.]], ptr addrspace(1) nocapture writeonly [[IP:%.]]) #[[ATTR0]] {
		; IR-FRACT-NEXT: entry:
		; IR-FRACT-NEXT: [[FLOOR:%.*]] = tail call double @llvm.floor.f64(double [[X]])
		; IR-FRACT-NEXT: [[COND:%.*]] = call double @llvm.amdgcn.fract.f64(double [[X]])
		; IR-FRACT-NEXT: [[FABS:%.*]] = tail call double @llvm.fabs.f64(double [[X]])
		; IR-FRACT-NEXT: [[CMPINF:%.*]] = fcmp oeq double [[FABS]], 0x7FF0000000000000
		; IR-FRACT-NEXT: [[COND6:%.*]] = select i1 [[CMPINF]], double 0.000000e+00, double [[COND]]
		; IR-FRACT-NEXT: store double [[FLOOR]], ptr addrspace(1) [[IP]], align 4
		; IR-FRACT-NEXT: ret double [[COND6]]
;		;
; GFX6-LABEL: safe_math_fract_f64:		; GFX6-LABEL: safe_math_fract_f64:
; GFX6: ; %bb.0: ; %entry		; GFX6: ; %bb.0: ; %entry
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_fract_f64_e32 v[4:5], v[0:1]		; GFX6-NEXT: v_fract_f64_e32 v[4:5], v[0:1]
; GFX6-NEXT: v_mov_b32_e32 v6, -1		; GFX6-NEXT: v_mov_b32_e32 v6, -1
; GFX6-NEXT: v_mov_b32_e32 v7, 0x3fefffff		; GFX6-NEXT: v_mov_b32_e32 v7, 0x3fefffff
; GFX6-NEXT: v_min_f64 v[4:5], v[4:5], v[6:7]		; GFX6-NEXT: v_min_f64 v[4:5], v[4:5], v[6:7]
Show All 19 Lines
; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v7, vcc		; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v7, vcc
; GFX6-NEXT: buffer_store_dwordx2 v[4:5], v[2:3], s[4:7], 0 addr64		; GFX6-NEXT: buffer_store_dwordx2 v[4:5], v[2:3], s[4:7], 0 addr64
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)		; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT: s_setpc_b64 s[30:31]		; GFX6-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX7-LABEL: safe_math_fract_f64:		; GFX7-LABEL: safe_math_fract_f64:
; GFX7: ; %bb.0: ; %entry		; GFX7: ; %bb.0: ; %entry
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_floor_f64_e32 v[4:5], v[0:1]
; GFX7-NEXT: s_mov_b32 s4, -1
; GFX7-NEXT: s_mov_b32 s5, 0x3fefffff
; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
; GFX7-NEXT: s_mov_b32 s6, 0		; GFX7-NEXT: s_mov_b32 s6, 0
; GFX7-NEXT: s_mov_b32 s7, 0xf000
; GFX7-NEXT: v_add_f64 v[6:7], v[0:1], -v[4:5]
; GFX7-NEXT: v_min_f64 v[6:7], v[6:7], s[4:5]
; GFX7-NEXT: s_mov_b32 s5, 0x7ff00000		; GFX7-NEXT: s_mov_b32 s5, 0x7ff00000
; GFX7-NEXT: s_mov_b32 s4, s6		; GFX7-NEXT: s_mov_b32 s4, s6
; GFX7-NEXT: v_cndmask_b32_e32 v7, v7, v1, vcc		; GFX7-NEXT: v_fract_f64_e32 v[4:5], v[0:1]
; GFX7-NEXT: v_cndmask_b32_e32 v6, v6, v0, vcc
; GFX7-NEXT: v_cmp_neq_f64_e64 vcc, \|v[0:1]\|, s[4:5]		; GFX7-NEXT: v_cmp_neq_f64_e64 vcc, \|v[0:1]\|, s[4:5]
		; GFX7-NEXT: v_floor_f64_e32 v[6:7], v[0:1]
		; GFX7-NEXT: s_mov_b32 s7, 0xf000
; GFX7-NEXT: s_mov_b32 s5, s6		; GFX7-NEXT: s_mov_b32 s5, s6
; GFX7-NEXT: buffer_store_dwordx2 v[4:5], v[2:3], s[4:7], 0 addr64		; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc		; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v5, vcc
; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v7, vcc		; GFX7-NEXT: buffer_store_dwordx2 v[6:7], v[2:3], s[4:7], 0 addr64
; GFX7-NEXT: s_waitcnt vmcnt(0)		; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: s_setpc_b64 s[30:31]		; GFX7-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX8-LABEL: safe_math_fract_f64:		; GFX8-LABEL: safe_math_fract_f64:
; GFX8: ; %bb.0: ; %entry		; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_floor_f64_e32 v[4:5], v[0:1]
; GFX8-NEXT: s_mov_b32 s4, -1
; GFX8-NEXT: s_mov_b32 s5, 0x3fefffff
; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
; GFX8-NEXT: v_add_f64 v[6:7], v[0:1], -v[4:5]
; GFX8-NEXT: global_store_dwordx2 v[2:3], v[4:5], off
; GFX8-NEXT: v_min_f64 v[6:7], v[6:7], s[4:5]
; GFX8-NEXT: s_mov_b32 s4, 0		; GFX8-NEXT: s_mov_b32 s4, 0
; GFX8-NEXT: s_mov_b32 s5, 0x7ff00000		; GFX8-NEXT: s_mov_b32 s5, 0x7ff00000
; GFX8-NEXT: v_cndmask_b32_e32 v7, v7, v1, vcc		; GFX8-NEXT: v_fract_f64_e32 v[4:5], v[0:1]
; GFX8-NEXT: v_cndmask_b32_e32 v6, v6, v0, vcc
; GFX8-NEXT: v_cmp_neq_f64_e64 vcc, \|v[0:1]\|, s[4:5]		; GFX8-NEXT: v_cmp_neq_f64_e64 vcc, \|v[0:1]\|, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc		; GFX8-NEXT: v_floor_f64_e32 v[6:7], v[0:1]
; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v7, vcc		; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
		; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v5, vcc
		; GFX8-NEXT: global_store_dwordx2 v[2:3], v[6:7], off
; GFX8-NEXT: s_waitcnt vmcnt(0)		; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]		; GFX8-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX11-LABEL: safe_math_fract_f64:		; GFX11-LABEL: safe_math_fract_f64:
; GFX11: ; %bb.0: ; %entry		; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0		; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_floor_f64_e32 v[4:5], v[0:1]
; GFX11-NEXT: s_mov_b32 s0, -1
; GFX11-NEXT: s_mov_b32 s1, 0x3fefffff
; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) \| instskip(SKIP_4) \| instid1(VALU_DEP_1)
; GFX11-NEXT: v_add_f64 v[6:7], v[0:1], -v[4:5]
; GFX11-NEXT: global_store_b64 v[2:3], v[4:5], off
; GFX11-NEXT: v_min_f64 v[6:7], v[6:7], s[0:1]
; GFX11-NEXT: s_mov_b32 s0, 0		; GFX11-NEXT: s_mov_b32 s0, 0
; GFX11-NEXT: s_mov_b32 s1, 0x7ff00000		; GFX11-NEXT: s_mov_b32 s1, 0x7ff00000
; GFX11-NEXT: v_dual_cndmask_b32 v7, v7, v1 :: v_dual_cndmask_b32 v6, v6, v0		; GFX11-NEXT: v_fract_f64_e32 v[4:5], v[0:1]
; GFX11-NEXT: v_cmp_neq_f64_e64 vcc_lo, \|v[0:1]\|, s[0:1]		; GFX11-NEXT: v_cmp_neq_f64_e64 vcc_lo, \|v[0:1]\|, s[0:1]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)		; GFX11-NEXT: v_floor_f64_e32 v[6:7], v[0:1]
; GFX11-NEXT: v_dual_cndmask_b32 v0, 0, v6 :: v_dual_cndmask_b32 v1, 0, v7		; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
		; GFX11-NEXT: v_dual_cndmask_b32 v0, 0, v4 :: v_dual_cndmask_b32 v1, 0, v5
		; GFX11-NEXT: global_store_b64 v[2:3], v[6:7], off
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0		; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]		; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:		entry:
%floor = tail call double @llvm.floor.f64(double %x)		%floor = tail call double @llvm.floor.f64(double %x)
%sub = fsub double %x, %floor		%sub = fsub double %x, %floor
%min = tail call double @llvm.minnum.f64(double %sub, double 0x3FEFFFFFFFFFFFFF)		%min = tail call double @llvm.minnum.f64(double %sub, double 0x3FEFFFFFFFFFFFFF)
%uno = fcmp uno double %x, 0.000000e+00		%uno = fcmp uno double %x, 0.000000e+00
%cond = select i1 %uno, double %x, double %min		%cond = select i1 %uno, double %x, double %min
%fabs = tail call double @llvm.fabs.f64(double %x)		%fabs = tail call double @llvm.fabs.f64(double %x)
%cmpinf = fcmp oeq double %fabs, 0x7FF0000000000000		%cmpinf = fcmp oeq double %fabs, 0x7FF0000000000000
%cond6 = select i1 %cmpinf, double 0.000000e+00, double %cond		%cond6 = select i1 %cmpinf, double 0.000000e+00, double %cond
store double %floor, ptr addrspace(1) %ip, align 4		store double %floor, ptr addrspace(1) %ip, align 4
ret double %cond6		ret double %cond6
}		}

define half @safe_math_fract_f16(half %x, ptr addrspace(1) nocapture writeonly %ip) {		define half @safe_math_fract_f16(half %x, ptr addrspace(1) nocapture writeonly %ip) {
; IR-LABEL: define half @safe_math_fract_f16		; GFX6-IR-LABEL: define half @safe_math_fract_f16
; IR-SAME: (half [[X:%.]], ptr addrspace(1) nocapture writeonly [[IP:%.]]) #[[ATTR0]] {		; GFX6-IR-SAME: (half [[X:%.]], ptr addrspace(1) nocapture writeonly [[IP:%.]]) #[[ATTR0]] {
; IR-NEXT: entry:		; GFX6-IR-NEXT: entry:
; IR-NEXT: [[FLOOR:%.*]] = tail call half @llvm.floor.f16(half [[X]])		; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call half @llvm.floor.f16(half [[X]])
; IR-NEXT: [[SUB:%.*]] = fsub half [[X]], [[FLOOR]]		; GFX6-IR-NEXT: [[SUB:%.*]] = fsub half [[X]], [[FLOOR]]
; IR-NEXT: [[MIN:%.*]] = tail call half @llvm.minnum.f16(half [[SUB]], half 0xH3BFF)		; GFX6-IR-NEXT: [[MIN:%.*]] = tail call half @llvm.minnum.f16(half [[SUB]], half 0xH3BFF)
; IR-NEXT: [[UNO:%.*]] = fcmp uno half [[X]], 0xH0000		; GFX6-IR-NEXT: [[UNO:%.*]] = fcmp uno half [[X]], 0xH0000
; IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], half [[X]], half [[MIN]]		; GFX6-IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], half [[X]], half [[MIN]]
; IR-NEXT: [[FABS:%.*]] = tail call half @llvm.fabs.f16(half [[X]])		; GFX6-IR-NEXT: [[FABS:%.*]] = tail call half @llvm.fabs.f16(half [[X]])
; IR-NEXT: [[CMPINF:%.*]] = fcmp oeq half [[FABS]], 0xH7C00		; GFX6-IR-NEXT: [[CMPINF:%.*]] = fcmp oeq half [[FABS]], 0xH7C00
; IR-NEXT: [[COND6:%.*]] = select i1 [[CMPINF]], half 0xH0000, half [[COND]]		; GFX6-IR-NEXT: [[COND6:%.*]] = select i1 [[CMPINF]], half 0xH0000, half [[COND]]
; IR-NEXT: store half [[FLOOR]], ptr addrspace(1) [[IP]], align 4		; GFX6-IR-NEXT: store half [[FLOOR]], ptr addrspace(1) [[IP]], align 4
; IR-NEXT: ret half [[COND6]]		; GFX6-IR-NEXT: ret half [[COND6]]
		;
		; GFX7-IR-LABEL: define half @safe_math_fract_f16
		; GFX7-IR-SAME: (half [[X:%.]], ptr addrspace(1) nocapture writeonly [[IP:%.]]) #[[ATTR0]] {
		; GFX7-IR-NEXT: entry:
		; GFX7-IR-NEXT: [[FLOOR:%.*]] = tail call half @llvm.floor.f16(half [[X]])
		; GFX7-IR-NEXT: [[SUB:%.*]] = fsub half [[X]], [[FLOOR]]
		; GFX7-IR-NEXT: [[MIN:%.*]] = tail call half @llvm.minnum.f16(half [[SUB]], half 0xH3BFF)
		; GFX7-IR-NEXT: [[UNO:%.*]] = fcmp uno half [[X]], 0xH0000
		; GFX7-IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], half [[X]], half [[MIN]]
		; GFX7-IR-NEXT: [[FABS:%.*]] = tail call half @llvm.fabs.f16(half [[X]])
		; GFX7-IR-NEXT: [[CMPINF:%.*]] = fcmp oeq half [[FABS]], 0xH7C00
		; GFX7-IR-NEXT: [[COND6:%.*]] = select i1 [[CMPINF]], half 0xH0000, half [[COND]]
		; GFX7-IR-NEXT: store half [[FLOOR]], ptr addrspace(1) [[IP]], align 4
		; GFX7-IR-NEXT: ret half [[COND6]]
		;
		; IR-LEGALF16-LABEL: define half @safe_math_fract_f16
		; IR-LEGALF16-SAME: (half [[X:%.]], ptr addrspace(1) nocapture writeonly [[IP:%.]]) #[[ATTR0]] {
		; IR-LEGALF16-NEXT: entry:
		; IR-LEGALF16-NEXT: [[FLOOR:%.*]] = tail call half @llvm.floor.f16(half [[X]])
		; IR-LEGALF16-NEXT: [[COND:%.*]] = call half @llvm.amdgcn.fract.f16(half [[X]])
		; IR-LEGALF16-NEXT: [[FABS:%.*]] = tail call half @llvm.fabs.f16(half [[X]])
		; IR-LEGALF16-NEXT: [[CMPINF:%.*]] = fcmp oeq half [[FABS]], 0xH7C00
		; IR-LEGALF16-NEXT: [[COND6:%.*]] = select i1 [[CMPINF]], half 0xH0000, half [[COND]]
		; IR-LEGALF16-NEXT: store half [[FLOOR]], ptr addrspace(1) [[IP]], align 4
		; IR-LEGALF16-NEXT: ret half [[COND6]]
;		;
; GFX6-LABEL: safe_math_fract_f16:		; GFX6-LABEL: safe_math_fract_f16:
; GFX6: ; %bb.0: ; %entry		; GFX6: ; %bb.0: ; %entry
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0		; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX6-NEXT: s_mov_b32 s8, 0x7f800000		; GFX6-NEXT: s_mov_b32 s8, 0x7f800000
; GFX6-NEXT: s_mov_b32 s6, 0		; GFX6-NEXT: s_mov_b32 s6, 0
; GFX6-NEXT: s_mov_b32 s7, 0xf000		; GFX6-NEXT: s_mov_b32 s7, 0xf000
Show All 32 Lines
; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc		; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
; GFX7-NEXT: buffer_store_short v3, v[1:2], s[4:7], 0 addr64		; GFX7-NEXT: buffer_store_short v3, v[1:2], s[4:7], 0 addr64
; GFX7-NEXT: s_waitcnt vmcnt(0)		; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: s_setpc_b64 s[30:31]		; GFX7-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX8-LABEL: safe_math_fract_f16:		; GFX8-LABEL: safe_math_fract_f16:
; GFX8: ; %bb.0: ; %entry		; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_floor_f16_e32 v3, v0
; GFX8-NEXT: v_sub_f16_e32 v4, v0, v3
; GFX8-NEXT: v_min_f16_e32 v4, 0x3bff, v4
; GFX8-NEXT: v_cmp_u_f16_e32 vcc, v0, v0
; GFX8-NEXT: s_movk_i32 s4, 0x7c00		; GFX8-NEXT: s_movk_i32 s4, 0x7c00
; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc		; GFX8-NEXT: v_fract_f16_e32 v4, v0
; GFX8-NEXT: v_cmp_neq_f16_e64 vcc, \|v0\|, s4		; GFX8-NEXT: v_cmp_neq_f16_e64 vcc, \|v0\|, s4
		; GFX8-NEXT: v_floor_f16_e32 v3, v0
; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc		; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
; GFX8-NEXT: global_store_short v[1:2], v3, off		; GFX8-NEXT: global_store_short v[1:2], v3, off
; GFX8-NEXT: s_waitcnt vmcnt(0)		; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]		; GFX8-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX11-LABEL: safe_math_fract_f16:		; GFX11-LABEL: safe_math_fract_f16:
; GFX11: ; %bb.0: ; %entry		; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0		; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_floor_f16_e32 v3, v0		; GFX11-NEXT: v_fract_f16_e32 v3, v0
; GFX11-NEXT: v_cmp_u_f16_e32 vcc_lo, v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) \| instskip(SKIP_2) \| instid1(VALU_DEP_1)
; GFX11-NEXT: v_sub_f16_e32 v4, v0, v3
; GFX11-NEXT: global_store_b16 v[1:2], v3, off
; GFX11-NEXT: v_min_f16_e32 v4, 0x3bff, v4
; GFX11-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc_lo
; GFX11-NEXT: v_cmp_neq_f16_e64 vcc_lo, 0x7c00, \|v0\|		; GFX11-NEXT: v_cmp_neq_f16_e64 vcc_lo, 0x7c00, \|v0\|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)		; GFX11-NEXT: v_floor_f16_e32 v4, v0
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc_lo		; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
		; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc_lo
		; GFX11-NEXT: global_store_b16 v[1:2], v4, off
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0		; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]		; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:		entry:
%floor = tail call half @llvm.floor.f16(half %x)		%floor = tail call half @llvm.floor.f16(half %x)
%sub = fsub half %x, %floor		%sub = fsub half %x, %floor
%min = tail call half @llvm.minnum.f16(half %sub, half 0xH3BFF)		%min = tail call half @llvm.minnum.f16(half %sub, half 0xH3BFF)
%uno = fcmp uno half %x, 0.000000e+00		%uno = fcmp uno half %x, 0.000000e+00
%cond = select i1 %uno, half %x, half %min		%cond = select i1 %uno, half %x, half %min
%fabs = tail call half @llvm.fabs.f16(half %x)		%fabs = tail call half @llvm.fabs.f16(half %x)
%cmpinf = fcmp oeq half %fabs, 0xH7C00		%cmpinf = fcmp oeq half %fabs, 0xH7C00
%cond6 = select i1 %cmpinf, half 0.000000e+00, half %cond		%cond6 = select i1 %cmpinf, half 0.000000e+00, half %cond
store half %floor, ptr addrspace(1) %ip, align 4		store half %floor, ptr addrspace(1) %ip, align 4
ret half %cond6		ret half %cond6
}		}

define <2 x half> @safe_math_fract_v2f16(<2 x half> %x, ptr addrspace(1) nocapture writeonly %ip) {		define <2 x half> @safe_math_fract_v2f16(<2 x half> %x, ptr addrspace(1) nocapture writeonly %ip) {
; IR-LABEL: define <2 x half> @safe_math_fract_v2f16		; GFX6-IR-LABEL: define <2 x half> @safe_math_fract_v2f16
; IR-SAME: (<2 x half> [[X:%.]], ptr addrspace(1) nocapture writeonly [[IP:%.]]) #[[ATTR0]] {		; GFX6-IR-SAME: (<2 x half> [[X:%.]], ptr addrspace(1) nocapture writeonly [[IP:%.]]) #[[ATTR0]] {
; IR-NEXT: entry:		; GFX6-IR-NEXT: entry:
; IR-NEXT: [[FLOOR:%.*]] = tail call <2 x half> @llvm.floor.v2f16(<2 x half> [[X]])		; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call <2 x half> @llvm.floor.v2f16(<2 x half> [[X]])
; IR-NEXT: [[SUB:%.*]] = fsub <2 x half> [[X]], [[FLOOR]]		; GFX6-IR-NEXT: [[SUB:%.*]] = fsub <2 x half> [[X]], [[FLOOR]]
; IR-NEXT: [[MIN:%.*]] = tail call <2 x half> @llvm.minnum.v2f16(<2 x half> [[SUB]], <2 x half> <half 0xH3BFF, half 0xH3BFF>)		; GFX6-IR-NEXT: [[MIN:%.*]] = tail call <2 x half> @llvm.minnum.v2f16(<2 x half> [[SUB]], <2 x half> <half 0xH3BFF, half 0xH3BFF>)
; IR-NEXT: [[UNO:%.*]] = fcmp uno <2 x half> [[X]], zeroinitializer		; GFX6-IR-NEXT: [[UNO:%.*]] = fcmp uno <2 x half> [[X]], zeroinitializer
; IR-NEXT: [[COND:%.*]] = select <2 x i1> [[UNO]], <2 x half> [[X]], <2 x half> [[MIN]]		; GFX6-IR-NEXT: [[COND:%.*]] = select <2 x i1> [[UNO]], <2 x half> [[X]], <2 x half> [[MIN]]
; IR-NEXT: [[FABS:%.*]] = tail call <2 x half> @llvm.fabs.v2f16(<2 x half> [[X]])		; GFX6-IR-NEXT: [[FABS:%.*]] = tail call <2 x half> @llvm.fabs.v2f16(<2 x half> [[X]])
; IR-NEXT: [[CMPINF:%.*]] = fcmp oeq <2 x half> [[FABS]], <half 0xH7C00, half 0xH7C00>		; GFX6-IR-NEXT: [[CMPINF:%.*]] = fcmp oeq <2 x half> [[FABS]], <half 0xH7C00, half 0xH7C00>
; IR-NEXT: [[COND6:%.*]] = select <2 x i1> [[CMPINF]], <2 x half> zeroinitializer, <2 x half> [[COND]]		; GFX6-IR-NEXT: [[COND6:%.*]] = select <2 x i1> [[CMPINF]], <2 x half> zeroinitializer, <2 x half> [[COND]]
; IR-NEXT: store <2 x half> [[FLOOR]], ptr addrspace(1) [[IP]], align 4		; GFX6-IR-NEXT: store <2 x half> [[FLOOR]], ptr addrspace(1) [[IP]], align 4
; IR-NEXT: ret <2 x half> [[COND6]]		; GFX6-IR-NEXT: ret <2 x half> [[COND6]]
		;
		; GFX7-IR-LABEL: define <2 x half> @safe_math_fract_v2f16
		; GFX7-IR-SAME: (<2 x half> [[X:%.]], ptr addrspace(1) nocapture writeonly [[IP:%.]]) #[[ATTR0]] {
		; GFX7-IR-NEXT: entry:
		; GFX7-IR-NEXT: [[FLOOR:%.*]] = tail call <2 x half> @llvm.floor.v2f16(<2 x half> [[X]])
		; GFX7-IR-NEXT: [[SUB:%.*]] = fsub <2 x half> [[X]], [[FLOOR]]
		; GFX7-IR-NEXT: [[MIN:%.*]] = tail call <2 x half> @llvm.minnum.v2f16(<2 x half> [[SUB]], <2 x half> <half 0xH3BFF, half 0xH3BFF>)
		; GFX7-IR-NEXT: [[UNO:%.*]] = fcmp uno <2 x half> [[X]], zeroinitializer
		; GFX7-IR-NEXT: [[COND:%.*]] = select <2 x i1> [[UNO]], <2 x half> [[X]], <2 x half> [[MIN]]
		; GFX7-IR-NEXT: [[FABS:%.*]] = tail call <2 x half> @llvm.fabs.v2f16(<2 x half> [[X]])
		; GFX7-IR-NEXT: [[CMPINF:%.*]] = fcmp oeq <2 x half> [[FABS]], <half 0xH7C00, half 0xH7C00>
		; GFX7-IR-NEXT: [[COND6:%.*]] = select <2 x i1> [[CMPINF]], <2 x half> zeroinitializer, <2 x half> [[COND]]
		; GFX7-IR-NEXT: store <2 x half> [[FLOOR]], ptr addrspace(1) [[IP]], align 4
		; GFX7-IR-NEXT: ret <2 x half> [[COND6]]
		;
		; IR-LEGALF16-LABEL: define <2 x half> @safe_math_fract_v2f16
		; IR-LEGALF16-SAME: (<2 x half> [[X:%.]], ptr addrspace(1) nocapture writeonly [[IP:%.]]) #[[ATTR0]] {
		; IR-LEGALF16-NEXT: entry:
		; IR-LEGALF16-NEXT: [[FLOOR:%.*]] = tail call <2 x half> @llvm.floor.v2f16(<2 x half> [[X]])
		; IR-LEGALF16-NEXT: [[TMP0:%.*]] = extractelement <2 x half> [[X]], i64 0
		; IR-LEGALF16-NEXT: [[TMP1:%.*]] = extractelement <2 x half> [[X]], i64 1
		; IR-LEGALF16-NEXT: [[TMP2:%.*]] = call half @llvm.amdgcn.fract.f16(half [[TMP0]])
		; IR-LEGALF16-NEXT: [[TMP3:%.*]] = call half @llvm.amdgcn.fract.f16(half [[TMP1]])
		; IR-LEGALF16-NEXT: [[TMP4:%.*]] = insertelement <2 x half> poison, half [[TMP2]], i64 0
		; IR-LEGALF16-NEXT: [[COND:%.*]] = insertelement <2 x half> [[TMP4]], half [[TMP3]], i64 1
		; IR-LEGALF16-NEXT: [[FABS:%.*]] = tail call <2 x half> @llvm.fabs.v2f16(<2 x half> [[X]])
		; IR-LEGALF16-NEXT: [[CMPINF:%.*]] = fcmp oeq <2 x half> [[FABS]], <half 0xH7C00, half 0xH7C00>
		; IR-LEGALF16-NEXT: [[COND6:%.*]] = select <2 x i1> [[CMPINF]], <2 x half> zeroinitializer, <2 x half> [[COND]]
		; IR-LEGALF16-NEXT: store <2 x half> [[FLOOR]], ptr addrspace(1) [[IP]], align 4
		; IR-LEGALF16-NEXT: ret <2 x half> [[COND6]]
;		;
; GFX6-LABEL: safe_math_fract_v2f16:		; GFX6-LABEL: safe_math_fract_v2f16:
; GFX6: ; %bb.0: ; %entry		; GFX6: ; %bb.0: ; %entry
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1		; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0		; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX6-NEXT: s_mov_b32 s8, 0x7f800000		; GFX6-NEXT: s_mov_b32 s8, 0x7f800000
; GFX6-NEXT: s_mov_b32 s6, 0		; GFX6-NEXT: s_mov_b32 s6, 0
▲ Show 20 Lines • Show All 61 Lines • ▼ Show 20 Lines
; GFX7-NEXT: buffer_store_dword v7, v[2:3], s[4:7], 0 addr64		; GFX7-NEXT: buffer_store_dword v7, v[2:3], s[4:7], 0 addr64
; GFX7-NEXT: s_waitcnt vmcnt(0)		; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: s_setpc_b64 s[30:31]		; GFX7-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX8-LABEL: safe_math_fract_v2f16:		; GFX8-LABEL: safe_math_fract_v2f16:
; GFX8: ; %bb.0: ; %entry		; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0		; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0
		; GFX8-NEXT: s_movk_i32 s6, 0x204
; GFX8-NEXT: v_floor_f16_e32 v4, v3		; GFX8-NEXT: v_floor_f16_e32 v4, v3
; GFX8-NEXT: v_floor_f16_e32 v5, v0		; GFX8-NEXT: v_floor_f16_e32 v5, v0
; GFX8-NEXT: v_pack_b32_f16 v4, v5, v4		; GFX8-NEXT: v_fract_f16_e32 v6, v3
; GFX8-NEXT: v_pk_add_f16 v5, v0, v4 neg_lo:[0,1] neg_hi:[0,1]
; GFX8-NEXT: s_movk_i32 s4, 0x3bff
; GFX8-NEXT: v_pk_min_f16 v5, v5, s4 op_sel_hi:[1,0]
; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v5
; GFX8-NEXT: v_cmp_u_f16_e32 vcc, v3, v3
; GFX8-NEXT: s_movk_i32 s6, 0x204
; GFX8-NEXT: v_cndmask_b32_e32 v6, v6, v3, vcc
; GFX8-NEXT: v_cmp_u_f16_e32 vcc, v0, v0
; GFX8-NEXT: v_cmp_class_f16_e64 s[4:5], v3, s6		; GFX8-NEXT: v_cmp_class_f16_e64 s[4:5], v3, s6
; GFX8-NEXT: v_cndmask_b32_e32 v5, v5, v0, vcc		; GFX8-NEXT: v_pack_b32_f16 v4, v5, v4
		; GFX8-NEXT: v_fract_f16_e32 v5, v0
; GFX8-NEXT: v_cndmask_b32_e64 v3, v6, 0, s[4:5]		; GFX8-NEXT: v_cndmask_b32_e64 v3, v6, 0, s[4:5]
; GFX8-NEXT: v_cmp_class_f16_e64 s[4:5], v0, s6		; GFX8-NEXT: v_cmp_class_f16_e64 s[4:5], v0, s6
; GFX8-NEXT: v_cndmask_b32_e64 v0, v5, 0, s[4:5]		; GFX8-NEXT: v_cndmask_b32_e64 v0, v5, 0, s[4:5]
; GFX8-NEXT: s_mov_b32 s4, 0x5040100		; GFX8-NEXT: v_pack_b32_f16 v0, v0, v3
; GFX8-NEXT: v_perm_b32 v0, v3, v0, s4
; GFX8-NEXT: global_store_dword v[1:2], v4, off		; GFX8-NEXT: global_store_dword v[1:2], v4, off
; GFX8-NEXT: s_waitcnt vmcnt(0)		; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]		; GFX8-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX11-LABEL: safe_math_fract_v2f16:		; GFX11-LABEL: safe_math_fract_v2f16:
; GFX11: ; %bb.0: ; %entry		; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0		; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v0		; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v0
; GFX11-NEXT: v_floor_f16_e32 v4, v0		; GFX11-NEXT: v_fract_f16_e32 v6, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) \| instskip(SKIP_2) \| instid1(VALU_DEP_3)		; GFX11-NEXT: v_floor_f16_e32 v5, v0
; GFX11-NEXT: v_floor_f16_e32 v5, v3		; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) \| instskip(SKIP_2) \| instid1(VALU_DEP_2)
; GFX11-NEXT: v_cmp_u_f16_e32 vcc_lo, v3, v3		; GFX11-NEXT: v_fract_f16_e32 v4, v3
; GFX11-NEXT: v_cmp_class_f16_e64 s0, v3, 0x204		; GFX11-NEXT: v_cmp_class_f16_e64 s0, v3, 0x204
; GFX11-NEXT: v_pack_b32_f16 v4, v4, v5		; GFX11-NEXT: v_floor_f16_e32 v7, v3
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) \| instskip(SKIP_2) \| instid1(VALU_DEP_1)		; GFX11-NEXT: v_cndmask_b32_e64 v3, v4, 0, s0
; GFX11-NEXT: v_pk_add_f16 v5, v0, v4 neg_lo:[0,1] neg_hi:[0,1]
; GFX11-NEXT: global_store_b32 v[1:2], v4, off
; GFX11-NEXT: v_pk_min_f16 v5, 0x3bff, v5 op_sel_hi:[0,1]
; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v5
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) \| instskip(SKIP_2) \| instid1(VALU_DEP_3)
; GFX11-NEXT: v_cndmask_b32_e32 v6, v6, v3, vcc_lo
; GFX11-NEXT: v_cmp_u_f16_e32 vcc_lo, v0, v0
; GFX11-NEXT: v_cndmask_b32_e32 v5, v5, v0, vcc_lo
; GFX11-NEXT: v_cndmask_b32_e64 v3, v6, 0, s0
; GFX11-NEXT: v_cmp_class_f16_e64 s0, v0, 0x204		; GFX11-NEXT: v_cmp_class_f16_e64 s0, v0, 0x204
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) \| instskip(NEXT) \| instid1(VALU_DEP_1)		; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) \| instskip(NEXT) \| instid1(VALU_DEP_2)
; GFX11-NEXT: v_cndmask_b32_e64 v0, v5, 0, s0		; GFX11-NEXT: v_pack_b32_f16 v4, v5, v7
; GFX11-NEXT: v_perm_b32 v0, v3, v0, 0x5040100		; GFX11-NEXT: v_cndmask_b32_e64 v0, v6, 0, s0
		; GFX11-NEXT: global_store_b32 v[1:2], v4, off
		; GFX11-NEXT: v_pack_b32_f16 v0, v0, v3
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0		; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]		; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:		entry:
%floor = tail call <2 x half> @llvm.floor.v2f16(<2 x half> %x)		%floor = tail call <2 x half> @llvm.floor.v2f16(<2 x half> %x)
%sub = fsub <2 x half> %x, %floor		%sub = fsub <2 x half> %x, %floor
%min = tail call <2 x half> @llvm.minnum.v2f16(<2 x half> %sub, <2 x half> <half 0xH3BFF, half 0xH3BFF>)		%min = tail call <2 x half> @llvm.minnum.v2f16(<2 x half> %sub, <2 x half> <half 0xH3BFF, half 0xH3BFF>)
%uno = fcmp uno <2 x half> %x, zeroinitializer		%uno = fcmp uno <2 x half> %x, zeroinitializer
%cond = select <2 x i1> %uno, <2 x half> %x, <2 x half> %min		%cond = select <2 x i1> %uno, <2 x half> %x, <2 x half> %min
%fabs = tail call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)		%fabs = tail call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%cmpinf = fcmp oeq <2 x half> %fabs, <half 0xH7C00, half 0xH7C00>		%cmpinf = fcmp oeq <2 x half> %fabs, <half 0xH7C00, half 0xH7C00>
%cond6 = select <2 x i1> %cmpinf, <2 x half> zeroinitializer, <2 x half> %cond		%cond6 = select <2 x i1> %cmpinf, <2 x half> zeroinitializer, <2 x half> %cond
store <2 x half> %floor, ptr addrspace(1) %ip, align 4		store <2 x half> %floor, ptr addrspace(1) %ip, align 4
ret <2 x half> %cond6		ret <2 x half> %cond6
}		}

define <2 x double> @safe_math_fract_v2f64(<2 x double> %x, ptr addrspace(1) nocapture writeonly %ip) {		define <2 x double> @safe_math_fract_v2f64(<2 x double> %x, ptr addrspace(1) nocapture writeonly %ip) {
; IR-LABEL: define <2 x double> @safe_math_fract_v2f64		; GFX6-IR-LABEL: define <2 x double> @safe_math_fract_v2f64
; IR-SAME: (<2 x double> [[X:%.]], ptr addrspace(1) nocapture writeonly [[IP:%.]]) #[[ATTR0]] {		; GFX6-IR-SAME: (<2 x double> [[X:%.]], ptr addrspace(1) nocapture writeonly [[IP:%.]]) #[[ATTR0]] {
; IR-NEXT: entry:		; GFX6-IR-NEXT: entry:
; IR-NEXT: [[FLOOR:%.*]] = tail call <2 x double> @llvm.floor.v2f64(<2 x double> [[X]])		; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call <2 x double> @llvm.floor.v2f64(<2 x double> [[X]])
; IR-NEXT: [[SUB:%.*]] = fsub <2 x double> [[X]], [[FLOOR]]		; GFX6-IR-NEXT: [[SUB:%.*]] = fsub <2 x double> [[X]], [[FLOOR]]
; IR-NEXT: [[MIN:%.*]] = tail call <2 x double> @llvm.minnum.v2f64(<2 x double> [[SUB]], <2 x double> <double 0x3FEFFFFFFFFFFFFF, double 0x3FEFFFFFFFFFFFFF>)		; GFX6-IR-NEXT: [[MIN:%.*]] = tail call <2 x double> @llvm.minnum.v2f64(<2 x double> [[SUB]], <2 x double> <double 0x3FEFFFFFFFFFFFFF, double 0x3FEFFFFFFFFFFFFF>)
; IR-NEXT: [[UNO:%.*]] = fcmp uno <2 x double> [[X]], zeroinitializer		; GFX6-IR-NEXT: [[UNO:%.*]] = fcmp uno <2 x double> [[X]], zeroinitializer
; IR-NEXT: [[COND:%.*]] = select <2 x i1> [[UNO]], <2 x double> [[X]], <2 x double> [[MIN]]		; GFX6-IR-NEXT: [[COND:%.*]] = select <2 x i1> [[UNO]], <2 x double> [[X]], <2 x double> [[MIN]]
; IR-NEXT: [[FABS:%.*]] = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> [[X]])		; GFX6-IR-NEXT: [[FABS:%.*]] = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> [[X]])
; IR-NEXT: [[CMPINF:%.*]] = fcmp oeq <2 x double> [[FABS]], <double 0x7FF0000000000000, double 0x7FF0000000000000>		; GFX6-IR-NEXT: [[CMPINF:%.*]] = fcmp oeq <2 x double> [[FABS]], <double 0x7FF0000000000000, double 0x7FF0000000000000>
; IR-NEXT: [[COND6:%.*]] = select <2 x i1> [[CMPINF]], <2 x double> zeroinitializer, <2 x double> [[COND]]		; GFX6-IR-NEXT: [[COND6:%.*]] = select <2 x i1> [[CMPINF]], <2 x double> zeroinitializer, <2 x double> [[COND]]
; IR-NEXT: store <2 x double> [[FLOOR]], ptr addrspace(1) [[IP]], align 4		; GFX6-IR-NEXT: store <2 x double> [[FLOOR]], ptr addrspace(1) [[IP]], align 4
; IR-NEXT: ret <2 x double> [[COND6]]		; GFX6-IR-NEXT: ret <2 x double> [[COND6]]
		;
		; IR-FRACT-LABEL: define <2 x double> @safe_math_fract_v2f64
		; IR-FRACT-SAME: (<2 x double> [[X:%.]], ptr addrspace(1) nocapture writeonly [[IP:%.]]) #[[ATTR0]] {
		; IR-FRACT-NEXT: entry:
		; IR-FRACT-NEXT: [[FLOOR:%.*]] = tail call <2 x double> @llvm.floor.v2f64(<2 x double> [[X]])
		; IR-FRACT-NEXT: [[TMP0:%.*]] = extractelement <2 x double> [[X]], i64 0
		; IR-FRACT-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[X]], i64 1
		; IR-FRACT-NEXT: [[TMP2:%.*]] = call double @llvm.amdgcn.fract.f64(double [[TMP0]])
		; IR-FRACT-NEXT: [[TMP3:%.*]] = call double @llvm.amdgcn.fract.f64(double [[TMP1]])
		; IR-FRACT-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[TMP2]], i64 0
		; IR-FRACT-NEXT: [[COND:%.*]] = insertelement <2 x double> [[TMP4]], double [[TMP3]], i64 1
		; IR-FRACT-NEXT: [[FABS:%.*]] = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> [[X]])
		; IR-FRACT-NEXT: [[CMPINF:%.*]] = fcmp oeq <2 x double> [[FABS]], <double 0x7FF0000000000000, double 0x7FF0000000000000>
		; IR-FRACT-NEXT: [[COND6:%.*]] = select <2 x i1> [[CMPINF]], <2 x double> zeroinitializer, <2 x double> [[COND]]
		; IR-FRACT-NEXT: store <2 x double> [[FLOOR]], ptr addrspace(1) [[IP]], align 4
		; IR-FRACT-NEXT: ret <2 x double> [[COND6]]
;		;
; GFX6-LABEL: safe_math_fract_v2f64:		; GFX6-LABEL: safe_math_fract_v2f64:
; GFX6: ; %bb.0: ; %entry		; GFX6: ; %bb.0: ; %entry
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_fract_f64_e32 v[6:7], v[2:3]		; GFX6-NEXT: v_fract_f64_e32 v[6:7], v[2:3]
; GFX6-NEXT: v_mov_b32_e32 v10, -1		; GFX6-NEXT: v_mov_b32_e32 v10, -1
; GFX6-NEXT: v_mov_b32_e32 v11, 0x3fefffff		; GFX6-NEXT: v_mov_b32_e32 v11, 0x3fefffff
; GFX6-NEXT: v_min_f64 v[6:7], v[6:7], v[10:11]		; GFX6-NEXT: v_min_f64 v[6:7], v[6:7], v[10:11]
Show All 32 Lines
; GFX6-NEXT: v_cndmask_b32_e64 v3, v11, 0, s[8:9]		; GFX6-NEXT: v_cndmask_b32_e64 v3, v11, 0, s[8:9]
; GFX6-NEXT: buffer_store_dwordx4 v[6:9], v[4:5], s[4:7], 0 addr64		; GFX6-NEXT: buffer_store_dwordx4 v[6:9], v[4:5], s[4:7], 0 addr64
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)		; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT: s_setpc_b64 s[30:31]		; GFX6-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX7-LABEL: safe_math_fract_v2f64:		; GFX7-LABEL: safe_math_fract_v2f64:
; GFX7: ; %bb.0: ; %entry		; GFX7: ; %bb.0: ; %entry
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
		; GFX7-NEXT: s_movk_i32 s4, 0x204
		; GFX7-NEXT: v_fract_f64_e32 v[10:11], v[0:1]
		; GFX7-NEXT: v_cmp_class_f64_e64 s[8:9], v[0:1], s4
		; GFX7-NEXT: v_fract_f64_e32 v[12:13], v[2:3]
		; GFX7-NEXT: v_cmp_class_f64_e64 s[10:11], v[2:3], s4
; GFX7-NEXT: v_floor_f64_e32 v[8:9], v[2:3]		; GFX7-NEXT: v_floor_f64_e32 v[8:9], v[2:3]
; GFX7-NEXT: v_floor_f64_e32 v[6:7], v[0:1]		; GFX7-NEXT: v_floor_f64_e32 v[6:7], v[0:1]
; GFX7-NEXT: s_mov_b32 s4, -1
; GFX7-NEXT: s_mov_b32 s5, 0x3fefffff
; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
; GFX7-NEXT: s_movk_i32 s6, 0x204
; GFX7-NEXT: v_cmp_class_f64_e64 s[8:9], v[0:1], s6
; GFX7-NEXT: v_cmp_class_f64_e64 s[10:11], v[2:3], s6
; GFX7-NEXT: v_add_f64 v[10:11], v[2:3], -v[8:9]
; GFX7-NEXT: v_add_f64 v[12:13], v[0:1], -v[6:7]
; GFX7-NEXT: s_mov_b32 s6, 0		; GFX7-NEXT: s_mov_b32 s6, 0
; GFX7-NEXT: s_mov_b32 s7, 0xf000		; GFX7-NEXT: s_mov_b32 s7, 0xf000
; GFX7-NEXT: v_min_f64 v[10:11], v[10:11], s[4:5]
; GFX7-NEXT: v_min_f64 v[12:13], v[12:13], s[4:5]
; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[0:1], v[0:1]
; GFX7-NEXT: v_cndmask_b32_e32 v11, v11, v3, vcc
; GFX7-NEXT: v_cndmask_b32_e32 v10, v10, v2, vcc
; GFX7-NEXT: v_cndmask_b32_e64 v13, v13, v1, s[4:5]
; GFX7-NEXT: v_cndmask_b32_e64 v12, v12, v0, s[4:5]
; GFX7-NEXT: s_mov_b32 s4, s6		; GFX7-NEXT: s_mov_b32 s4, s6
; GFX7-NEXT: s_mov_b32 s5, s6		; GFX7-NEXT: s_mov_b32 s5, s6
; GFX7-NEXT: v_cndmask_b32_e64 v0, v12, 0, s[8:9]		; GFX7-NEXT: v_cndmask_b32_e64 v0, v10, 0, s[8:9]
; GFX7-NEXT: v_cndmask_b32_e64 v2, v10, 0, s[10:11]		; GFX7-NEXT: v_cndmask_b32_e64 v1, v11, 0, s[8:9]
; GFX7-NEXT: v_cndmask_b32_e64 v1, v13, 0, s[8:9]		; GFX7-NEXT: v_cndmask_b32_e64 v2, v12, 0, s[10:11]
; GFX7-NEXT: v_cndmask_b32_e64 v3, v11, 0, s[10:11]		; GFX7-NEXT: v_cndmask_b32_e64 v3, v13, 0, s[10:11]
; GFX7-NEXT: buffer_store_dwordx4 v[6:9], v[4:5], s[4:7], 0 addr64		; GFX7-NEXT: buffer_store_dwordx4 v[6:9], v[4:5], s[4:7], 0 addr64
; GFX7-NEXT: s_waitcnt vmcnt(0)		; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: s_setpc_b64 s[30:31]		; GFX7-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX8-LABEL: safe_math_fract_v2f64:		; GFX8-LABEL: safe_math_fract_v2f64:
; GFX8: ; %bb.0: ; %entry		; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_floor_f64_e32 v[8:9], v[2:3]
; GFX8-NEXT: v_floor_f64_e32 v[6:7], v[0:1]
; GFX8-NEXT: s_mov_b32 s4, -1
; GFX8-NEXT: s_mov_b32 s5, 0x3fefffff
; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
; GFX8-NEXT: s_movk_i32 s6, 0x204		; GFX8-NEXT: s_movk_i32 s6, 0x204
; GFX8-NEXT: v_add_f64 v[10:11], v[2:3], -v[8:9]		; GFX8-NEXT: v_fract_f64_e32 v[10:11], v[0:1]
; GFX8-NEXT: v_add_f64 v[12:13], v[0:1], -v[6:7]
; GFX8-NEXT: global_store_dwordx4 v[4:5], v[6:9], off
; GFX8-NEXT: v_min_f64 v[10:11], v[10:11], s[4:5]
; GFX8-NEXT: v_min_f64 v[12:13], v[12:13], s[4:5]
; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[0:1], v[0:1]
; GFX8-NEXT: v_cndmask_b32_e32 v11, v11, v3, vcc
; GFX8-NEXT: v_cndmask_b32_e32 v10, v10, v2, vcc
; GFX8-NEXT: v_cndmask_b32_e64 v13, v13, v1, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e64 v12, v12, v0, s[4:5]
; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[0:1], s6		; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[0:1], s6
		; GFX8-NEXT: v_fract_f64_e32 v[12:13], v[2:3]
; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[2:3], s6		; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[2:3], s6
; GFX8-NEXT: v_cndmask_b32_e64 v0, v12, 0, s[4:5]		; GFX8-NEXT: v_floor_f64_e32 v[8:9], v[2:3]
; GFX8-NEXT: v_cndmask_b32_e64 v2, v10, 0, s[6:7]		; GFX8-NEXT: v_floor_f64_e32 v[6:7], v[0:1]
; GFX8-NEXT: v_cndmask_b32_e64 v1, v13, 0, s[4:5]		; GFX8-NEXT: v_cndmask_b32_e64 v0, v10, 0, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e64 v3, v11, 0, s[6:7]		; GFX8-NEXT: v_cndmask_b32_e64 v1, v11, 0, s[4:5]
		; GFX8-NEXT: v_cndmask_b32_e64 v2, v12, 0, s[6:7]
		; GFX8-NEXT: v_cndmask_b32_e64 v3, v13, 0, s[6:7]
		; GFX8-NEXT: global_store_dwordx4 v[4:5], v[6:9], off
; GFX8-NEXT: s_waitcnt vmcnt(0)		; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]		; GFX8-NEXT: s_setpc_b64 s[30:31]
;		;
; GFX11-LABEL: safe_math_fract_v2f64:		; GFX11-LABEL: safe_math_fract_v2f64:
; GFX11: ; %bb.0: ; %entry		; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0		; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
		; GFX11-NEXT: v_fract_f64_e32 v[10:11], v[0:1]
		; GFX11-NEXT: v_cmp_class_f64_e64 s0, v[0:1], 0x204
		; GFX11-NEXT: v_fract_f64_e32 v[12:13], v[2:3]
		; GFX11-NEXT: v_cmp_class_f64_e64 s1, v[2:3], 0x204
; GFX11-NEXT: v_floor_f64_e32 v[8:9], v[2:3]		; GFX11-NEXT: v_floor_f64_e32 v[8:9], v[2:3]
; GFX11-NEXT: v_floor_f64_e32 v[6:7], v[0:1]		; GFX11-NEXT: v_floor_f64_e32 v[6:7], v[0:1]
; GFX11-NEXT: s_mov_b32 s0, -1		; GFX11-NEXT: v_cndmask_b32_e64 v0, v10, 0, s0
; GFX11-NEXT: s_mov_b32 s1, 0x3fefffff		; GFX11-NEXT: v_cndmask_b32_e64 v1, v11, 0, s0
; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[2:3], v[2:3]		; GFX11-NEXT: v_cndmask_b32_e64 v2, v12, 0, s1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) \| instskip(NEXT) \| instid1(VALU_DEP_3)		; GFX11-NEXT: v_cndmask_b32_e64 v3, v13, 0, s1
; GFX11-NEXT: v_add_f64 v[10:11], v[2:3], -v[8:9]
; GFX11-NEXT: v_add_f64 v[12:13], v[0:1], -v[6:7]
; GFX11-NEXT: global_store_b128 v[4:5], v[6:9], off		; GFX11-NEXT: global_store_b128 v[4:5], v[6:9], off
; GFX11-NEXT: v_min_f64 v[10:11], v[10:11], s[0:1]
; GFX11-NEXT: v_min_f64 v[12:13], v[12:13], s[0:1]
; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[0:1], v[0:1]
; GFX11-NEXT: v_cmp_class_f64_e64 s1, v[2:3], 0x204
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) \| instskip(NEXT) \| instid1(VALU_DEP_3)
; GFX11-NEXT: v_dual_cndmask_b32 v11, v11, v3 :: v_dual_cndmask_b32 v10, v10, v2
; GFX11-NEXT: v_cndmask_b32_e64 v13, v13, v1, s0
; GFX11-NEXT: v_cndmask_b32_e64 v12, v12, v0, s0
; GFX11-NEXT: v_cmp_class_f64_e64 s0, v[0:1], 0x204
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) \| instskip(SKIP_1) \| instid1(VALU_DEP_3)
; GFX11-NEXT: v_cndmask_b32_e64 v3, v11, 0, s1
; GFX11-NEXT: v_cndmask_b32_e64 v2, v10, 0, s1
; GFX11-NEXT: v_cndmask_b32_e64 v0, v12, 0, s0
; GFX11-NEXT: v_cndmask_b32_e64 v1, v13, 0, s0
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0		; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]		; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:		entry:
%floor = tail call <2 x double> @llvm.floor.v2f64(<2 x double> %x)		%floor = tail call <2 x double> @llvm.floor.v2f64(<2 x double> %x)
%sub = fsub <2 x double> %x, %floor		%sub = fsub <2 x double> %x, %floor
%min = tail call <2 x double> @llvm.minnum.v2f64(<2 x double> %sub, <2 x double> <double 0x3FEFFFFFFFFFFFFF, double 0x3FEFFFFFFFFFFFFF>)		%min = tail call <2 x double> @llvm.minnum.v2f64(<2 x double> %sub, <2 x double> <double 0x3FEFFFFFFFFFFFFF, double 0x3FEFFFFFFFFFFFFF>)
%uno = fcmp uno <2 x double> %x, zeroinitializer		%uno = fcmp uno <2 x double> %x, zeroinitializer
%cond = select <2 x i1> %uno, <2 x double> %x, <2 x double> %min		%cond = select <2 x i1> %uno, <2 x double> %x, <2 x double> %min
Show All 23 Lines
declare <2 x float> @llvm.fabs.v2f32(<2 x float>) #0		declare <2 x float> @llvm.fabs.v2f32(<2 x float>) #0
declare half @llvm.fabs.f16(half) #0		declare half @llvm.fabs.f16(half) #0
declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #0		declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #0
declare <2 x double> @llvm.fabs.v2f64(<2 x double>) #0		declare <2 x double> @llvm.fabs.v2f64(<2 x double>) #0

attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }		attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:		;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GCN: {{.*}}		; GCN: {{.*}}
; GFX6-IR: {{.*}}
; GFX7-IR: {{.*}}
; GFX7-IR: {{.*}}
; IR-FRACT: {{.*}}
; IR-LEGALF16: {{.*}}