Index: llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h +++ llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h @@ -236,6 +236,9 @@ bool translateSimpleIntrinsic(const CallInst &CI, Intrinsic::ID ID, MachineIRBuilder &MIRBuilder); + bool translateConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI, + MachineIRBuilder &MIRBuilder); + bool translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, MachineIRBuilder &MIRBuilder); Index: llvm/include/llvm/Support/TargetOpcodes.def =================================================================== --- llvm/include/llvm/Support/TargetOpcodes.def +++ llvm/include/llvm/Support/TargetOpcodes.def @@ -640,6 +640,15 @@ /// Generic dynamic stack allocation. HANDLE_TARGET_OPCODE(G_DYN_STACKALLOC) +/// Strict floating point instructions. +HANDLE_TARGET_OPCODE(G_STRICT_FADD) +HANDLE_TARGET_OPCODE(G_STRICT_FSUB) +HANDLE_TARGET_OPCODE(G_STRICT_FMUL) +HANDLE_TARGET_OPCODE(G_STRICT_FDIV) +HANDLE_TARGET_OPCODE(G_STRICT_FREM) +HANDLE_TARGET_OPCODE(G_STRICT_FMA) +HANDLE_TARGET_OPCODE(G_STRICT_FSQRT) + /// read_register intrinsic HANDLE_TARGET_OPCODE(G_READ_REGISTER) Index: llvm/include/llvm/Target/GenericOpcodes.td =================================================================== --- llvm/include/llvm/Target/GenericOpcodes.td +++ llvm/include/llvm/Target/GenericOpcodes.td @@ -19,6 +19,22 @@ let isPreISelOpcode = 1; } +// Provide a variant of an instruction with the same operands, but +// different instruction flags. This is intended to provide a +// convenient way to define strict floating point variants of ordinary +// floating point instructions. +class ConstrainedIntruction : + GenericInstruction { + let OutOperandList = baseInst.OutOperandList; + let InOperandList = baseInst.InOperandList; + let isCommutable = baseInst.isCommutable; + + // TODO: Do we need a better way to mark reads from FP mode than + // hasSideEffects? + let hasSideEffects = 1; + let mayRaiseFPException = 1; +} + // Extend the underlying scalar type of an operation, leaving the high bits // unspecified. def G_ANYEXT : GenericInstruction { @@ -1128,4 +1144,14 @@ let hasSideEffects = 0; } -// TODO: Add the other generic opcodes. +//------------------------------------------------------------------------------ +// Constrained floating point ops +//------------------------------------------------------------------------------ + +def G_STRICT_FADD : ConstrainedIntruction; +def G_STRICT_FSUB : ConstrainedIntruction; +def G_STRICT_FMUL : ConstrainedIntruction; +def G_STRICT_FDIV : ConstrainedIntruction; +def G_STRICT_FREM : ConstrainedIntruction; +def G_STRICT_FMA : ConstrainedIntruction; +def G_STRICT_FSQRT : ConstrainedIntruction; Index: llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td =================================================================== --- llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td +++ llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td @@ -124,6 +124,14 @@ def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; + // Broadly speaking G_LOAD is equivalent to ISD::LOAD but there are some // complications that tablegen must take care of. For example, Predicates such // as isSignExtLoad require that this is not a perfect 1:1 mapping since a Index: llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1307,6 +1307,51 @@ return true; } +// TODO: Include ConstainedOps.def when all strict instructions are defined. +static unsigned getConstrainedOpcode(Intrinsic::ID ID) { + switch (ID) { + case Intrinsic::experimental_constrained_fadd: + return TargetOpcode::G_STRICT_FADD; + case Intrinsic::experimental_constrained_fsub: + return TargetOpcode::G_STRICT_FSUB; + case Intrinsic::experimental_constrained_fmul: + return TargetOpcode::G_STRICT_FMUL; + case Intrinsic::experimental_constrained_fdiv: + return TargetOpcode::G_STRICT_FDIV; + case Intrinsic::experimental_constrained_frem: + return TargetOpcode::G_STRICT_FREM; + case Intrinsic::experimental_constrained_fma: + return TargetOpcode::G_STRICT_FMA; + case Intrinsic::experimental_constrained_sqrt: + return TargetOpcode::G_STRICT_FSQRT; + default: + return 0; + } +} + +bool IRTranslator::translateConstrainedFPIntrinsic( + const ConstrainedFPIntrinsic &FPI, MachineIRBuilder &MIRBuilder) { + fp::ExceptionBehavior EB = FPI.getExceptionBehavior().getValue(); + + unsigned Opcode = getConstrainedOpcode(FPI.getIntrinsicID()); + if (!Opcode) + return false; + + unsigned Flags = MachineInstr::copyFlagsFromInstruction(FPI); + if (EB == fp::ExceptionBehavior::ebIgnore) + Flags |= MachineInstr::NoFPExcept; + + SmallVector VRegs; + VRegs.push_back(getOrCreateVReg(*FPI.getArgOperand(0))); + if (!FPI.isUnaryOp()) + VRegs.push_back(getOrCreateVReg(*FPI.getArgOperand(1))); + if (FPI.isTernaryOp()) + VRegs.push_back(getOrCreateVReg(*FPI.getArgOperand(2))); + + MIRBuilder.buildInstr(Opcode, {getOrCreateVReg(FPI)}, VRegs, Flags); + return true; +} + bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, MachineIRBuilder &MIRBuilder) { @@ -1572,6 +1617,12 @@ .addUse(getOrCreateVReg(*CI.getArgOperand(1))); return true; } +#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \ + case Intrinsic::INTRINSIC: +#include "llvm/IR/ConstrainedOps.def" + return translateConstrainedFPIntrinsic(cast(CI), + MIRBuilder); + } return false; } Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constrained-fp.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constrained-fp.ll @@ -0,0 +1,243 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -global-isel -march=amdgcn -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s + +define float @v_constained_fadd_f32_fpexcept_strict(float %x, float %y) #0 { + ; CHECK-LABEL: name: v_constained_fadd_f32_fpexcept_strict + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[STRICT_FADD:%[0-9]+]]:_(s32) = G_STRICT_FADD [[COPY]], [[COPY1]] + ; CHECK: $vgpr0 = COPY [[STRICT_FADD]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + %val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") + ret float %val +} + +define float @v_constained_fadd_f32_fpexcept_strict_flags(float %x, float %y) #0 { + ; CHECK-LABEL: name: v_constained_fadd_f32_fpexcept_strict_flags + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[STRICT_FADD:%[0-9]+]]:_(s32) = nsz G_STRICT_FADD [[COPY]], [[COPY1]] + ; CHECK: $vgpr0 = COPY [[STRICT_FADD]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + %val = call nsz float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") + ret float %val +} + +define float @v_constained_fadd_f32_fpexcept_ignore(float %x, float %y) #0 { + ; CHECK-LABEL: name: v_constained_fadd_f32_fpexcept_ignore + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: %3:_(s32) = nofpexcept G_STRICT_FADD [[COPY]], [[COPY1]] + ; CHECK: $vgpr0 = COPY %3(s32) + ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + %val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") + ret float %val +} + +define float @v_constained_fadd_f32_fpexcept_ignore_flags(float %x, float %y) #0 { + ; CHECK-LABEL: name: v_constained_fadd_f32_fpexcept_ignore_flags + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: %3:_(s32) = nsz nofpexcept G_STRICT_FADD [[COPY]], [[COPY1]] + ; CHECK: $vgpr0 = COPY %3(s32) + ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + %val = call nsz float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") + ret float %val +} + +define float @v_constained_fadd_f32_fpexcept_maytrap(float %x, float %y) #0 { + ; CHECK-LABEL: name: v_constained_fadd_f32_fpexcept_maytrap + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[STRICT_FADD:%[0-9]+]]:_(s32) = G_STRICT_FADD [[COPY]], [[COPY1]] + ; CHECK: $vgpr0 = COPY [[STRICT_FADD]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + %val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") + ret float %val +} + +define <2 x float> @v_constained_fadd_v2f32_fpexcept_strict(<2 x float> %x, <2 x float> %y) #0 { + ; CHECK-LABEL: name: v_constained_fadd_v2f32_fpexcept_strict + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[STRICT_FADD:%[0-9]+]]:_(<2 x s32>) = G_STRICT_FADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[STRICT_FADD]](<2 x s32>) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + %val = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") + ret <2 x float> %val +} + +define <2 x float> @v_constained_fadd_v2f32_fpexcept_ignore(<2 x float> %x, <2 x float> %y) #0 { + ; CHECK-LABEL: name: v_constained_fadd_v2f32_fpexcept_ignore + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: %7:_(<2 x s32>) = nofpexcept G_STRICT_FADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES %7(<2 x s32>) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + %val = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") + ret <2 x float> %val +} + +define <2 x float> @v_constained_fadd_v2f32_fpexcept_maytrap(<2 x float> %x, <2 x float> %y) #0 { + ; CHECK-LABEL: name: v_constained_fadd_v2f32_fpexcept_maytrap + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[STRICT_FADD:%[0-9]+]]:_(<2 x s32>) = G_STRICT_FADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[STRICT_FADD]](<2 x s32>) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + %val = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") + ret <2 x float> %val +} + +define float @v_constained_fsub_f32_fpexcept_ignore_flags(float %x, float %y) #0 { + ; CHECK-LABEL: name: v_constained_fsub_f32_fpexcept_ignore_flags + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: %3:_(s32) = nsz nofpexcept G_STRICT_FSUB [[COPY]], [[COPY1]] + ; CHECK: $vgpr0 = COPY %3(s32) + ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + %val = call nsz float @llvm.experimental.constrained.fsub.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") + ret float %val +} + +define float @v_constained_fmul_f32_fpexcept_ignore_flags(float %x, float %y) #0 { + ; CHECK-LABEL: name: v_constained_fmul_f32_fpexcept_ignore_flags + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: %3:_(s32) = nsz nofpexcept G_STRICT_FMUL [[COPY]], [[COPY1]] + ; CHECK: $vgpr0 = COPY %3(s32) + ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + %val = call nsz float @llvm.experimental.constrained.fmul.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") + ret float %val +} + +define float @v_constained_fdiv_f32_fpexcept_ignore_flags(float %x, float %y) #0 { + ; CHECK-LABEL: name: v_constained_fdiv_f32_fpexcept_ignore_flags + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: %3:_(s32) = nsz nofpexcept G_STRICT_FDIV [[COPY]], [[COPY1]] + ; CHECK: $vgpr0 = COPY %3(s32) + ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + %val = call nsz float @llvm.experimental.constrained.fdiv.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") + ret float %val +} + +define float @v_constained_frem_f32_fpexcept_ignore_flags(float %x, float %y) #0 { + ; CHECK-LABEL: name: v_constained_frem_f32_fpexcept_ignore_flags + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: %3:_(s32) = nsz nofpexcept G_STRICT_FREM [[COPY]], [[COPY1]] + ; CHECK: $vgpr0 = COPY %3(s32) + ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + %val = call nsz float @llvm.experimental.constrained.frem.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") + ret float %val +} + +define float @v_constained_fma_f32_fpexcept_ignore_flags(float %x, float %y, float %z) #0 { + ; CHECK-LABEL: name: v_constained_fma_f32_fpexcept_ignore_flags + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: %4:_(s32) = nsz nofpexcept G_STRICT_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; CHECK: $vgpr0 = COPY %4(s32) + ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; CHECK: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + %val = call nsz float @llvm.experimental.constrained.fma.f32(float %x, float %y, float %z, metadata !"round.tonearest", metadata !"fpexcept.ignore") + ret float %val +} + +define float @v_constained_sqrt_f32_fpexcept_strict(float %x) #0 { + ; CHECK-LABEL: name: v_constained_sqrt_f32_fpexcept_strict + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[STRICT_FSQRT:%[0-9]+]]:_(s32) = G_STRICT_FSQRT [[COPY]] + ; CHECK: $vgpr0 = COPY [[STRICT_FSQRT]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + %val = call float @llvm.experimental.constrained.sqrt.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") + ret float %val +} + +declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) #1 +declare <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float>, <2 x float>, metadata, metadata) #1 +declare <3 x float> @llvm.experimental.constrained.fadd.v3f32(<3 x float>, <3 x float>, metadata, metadata) #1 +declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata) #1 +declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata) #1 +declare float @llvm.experimental.constrained.fdiv.f32(float, float, metadata, metadata) #1 +declare float @llvm.experimental.constrained.frem.f32(float, float, metadata, metadata) #1 +declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata) #1 +declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata) #1 + +attributes #0 = { strictfp } +attributes #1 = { inaccessiblememonly nounwind willreturn }