Index: lib/Target/AMDGPU/AMDGPULegalizerInfo.h =================================================================== --- lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -38,6 +38,9 @@ bool legalizeAddrSpaceCast(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const; + bool legalizeFrint(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder) const; + }; } // End llvm namespace. #endif Index: lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -293,6 +293,18 @@ .legalFor({S32, S64}) .scalarize(0); + if (ST.getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) { + getActionDefinitionsBuilder(G_FRINT) + .legalFor({S32, S64}) + .clampScalar(0, S32, S64) + .scalarize(0); + } else { + getActionDefinitionsBuilder(G_FRINT) + .legalFor({S32}) + .customFor({S64}) + .clampScalar(0, S32, S64) + .scalarize(0); + } getActionDefinitionsBuilder(G_GEP) .legalForCartesianProduct(AddrSpaces64, {S64}) @@ -675,6 +687,8 @@ switch (MI.getOpcode()) { case TargetOpcode::G_ADDRSPACE_CAST: return legalizeAddrSpaceCast(MI, MRI, MIRBuilder); + case TargetOpcode::G_FRINT: + return legalizeFrint(MI, MRI, MIRBuilder); default: return false; } @@ -831,3 +845,30 @@ MI.eraseFromParent(); return true; } + +bool AMDGPULegalizerInfo::legalizeFrint( + MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder) const { + MIRBuilder.setInstr(MI); + + unsigned Src = MI.getOperand(1).getReg(); + LLT Ty = MRI.getType(Src); + assert(Ty.isScalar() && Ty.getSizeInBits() == 64); + + APFloat C1Val(APFloat::IEEEdouble(), "0x1.0p+52"); + APFloat C2Val(APFloat::IEEEdouble(), "0x1.fffffffffffffp+51"); + + auto C1 = MIRBuilder.buildFConstant(Ty, C1Val); + auto CopySign = MIRBuilder.buildFCopysign(Ty, C1, Src); + + // TODO: Should this propagate fast-math-flags? + auto Tmp1 = MIRBuilder.buildFAdd(Ty, Src, CopySign); + auto Tmp2 = MIRBuilder.buildFSub(Ty, Tmp1, CopySign); + + auto C2 = MIRBuilder.buildFConstant(Ty, C2Val); + auto Fabs = MIRBuilder.buildFAbs(Ty, Src); + + auto Cond = MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, LLT::scalar(1), Fabs, C2); + MIRBuilder.buildSelect(MI.getOperand(0).getReg(), Cond, Src, Tmp2); + return true; +} Index: test/CodeGen/AMDGPU/GlobalISel/legalize-frint.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/legalize-frint.mir @@ -0,0 +1,160 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -o - %s | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -run-pass=legalizer -o - %s | FileCheck -check-prefix=CI %s + +--- +name: test_frint_s16 +body: | + bb.0: + liveins: $vgpr0 + + ; SI-LABEL: name: test_frint_s16 + ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[FPEXT]] + ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FRINT]](s32) + ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; SI: $vgpr0 = COPY [[ANYEXT]](s32) + ; CI-LABEL: name: test_frint_s16 + ; CI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; CI: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[FPEXT]] + ; CI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FRINT]](s32) + ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; CI: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s16) = G_TRUNC %0 + %2:_(s16) = G_FRINT %1 + %3:_(s32) = G_ANYEXT %2 + $vgpr0 = COPY %3 +... + +--- +name: test_frint_s32 +body: | + bb.0: + liveins: $vgpr0 + + ; SI-LABEL: name: test_frint_s32 + ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; SI: $vgpr0 = COPY [[COPY]](s32) + ; CI-LABEL: name: test_frint_s32 + ; CI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CI: $vgpr0 = COPY [[COPY]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = G_FRINT %0 + $vgpr0 = COPY %0 +... + +--- +name: test_frint_s64 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; SI-LABEL: name: test_frint_s64 + ; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; SI: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x4330000000000000 + ; SI: [[FCOPYSIGN:%[0-9]+]]:_(s64) = G_FCOPYSIGN [[C]], [[COPY]](s64) + ; SI: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[COPY]], [[FCOPYSIGN]] + ; SI: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[FCOPYSIGN]] + ; SI: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[FADD]], [[FNEG]] + ; SI: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x432FFFFFFFFFFFFF + ; SI: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[COPY]] + ; SI: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[FABS]](s64), [[C1]] + ; SI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[COPY]], [[FADD1]] + ; SI: [[FRINT:%[0-9]+]]:_(s64) = G_FRINT [[COPY]] + ; SI: $vgpr0_vgpr1 = COPY [[FRINT]](s64) + ; CI-LABEL: name: test_frint_s64 + ; CI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CI: [[FRINT:%[0-9]+]]:_(s64) = G_FRINT [[COPY]] + ; CI: $vgpr0_vgpr1 = COPY [[FRINT]](s64) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s64) = G_FRINT %0 + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: test_frint_v2s16 +body: | + bb.0: + liveins: $vgpr0 + + ; SI-LABEL: name: test_frint_v2s16 + ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; SI: $vgpr0 = COPY [[COPY]](<2 x s16>) + ; CI-LABEL: name: test_frint_v2s16 + ; CI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CI: $vgpr0 = COPY [[COPY]](<2 x s16>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = G_FRINT %0 + $vgpr0 = COPY %0 +... + +--- +name: test_frint_v2s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; SI-LABEL: name: test_frint_v2s32 + ; SI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; SI: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[UV]] + ; SI: [[FRINT1:%[0-9]+]]:_(s32) = G_FRINT [[UV1]] + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FRINT]](s32), [[FRINT1]](s32) + ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CI-LABEL: name: test_frint_v2s32 + ; CI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CI: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[UV]] + ; CI: [[FRINT1:%[0-9]+]]:_(s32) = G_FRINT [[UV1]] + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FRINT]](s32), [[FRINT1]](s32) + ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x s32>) = G_FRINT %0 + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: test_frint_v2s64 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + + ; SI-LABEL: name: test_frint_v2s64 + ; SI: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; SI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; SI: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x4330000000000000 + ; SI: [[FCOPYSIGN:%[0-9]+]]:_(s64) = G_FCOPYSIGN [[C]], [[UV]](s64) + ; SI: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[UV]], [[FCOPYSIGN]] + ; SI: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[FCOPYSIGN]] + ; SI: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[FADD]], [[FNEG]] + ; SI: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x432FFFFFFFFFFFFF + ; SI: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[UV]] + ; SI: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[FABS]](s64), [[C1]] + ; SI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[UV]], [[FADD1]] + ; SI: [[FRINT:%[0-9]+]]:_(s64) = G_FRINT [[UV]] + ; SI: [[FCOPYSIGN1:%[0-9]+]]:_(s64) = G_FCOPYSIGN [[C]], [[UV1]](s64) + ; SI: [[FADD2:%[0-9]+]]:_(s64) = G_FADD [[UV1]], [[FCOPYSIGN1]] + ; SI: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[FCOPYSIGN1]] + ; SI: [[FADD3:%[0-9]+]]:_(s64) = G_FADD [[FADD2]], [[FNEG1]] + ; SI: [[FABS1:%[0-9]+]]:_(s64) = G_FABS [[UV1]] + ; SI: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[FABS1]](s64), [[C1]] + ; SI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP1]](s1), [[UV1]], [[FADD3]] + ; SI: [[FRINT1:%[0-9]+]]:_(s64) = G_FRINT [[UV1]] + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FRINT]](s64), [[FRINT1]](s64) + ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; CI-LABEL: name: test_frint_v2s64 + ; CI: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; CI: [[FRINT:%[0-9]+]]:_(s64) = G_FRINT [[UV]] + ; CI: [[FRINT1:%[0-9]+]]:_(s64) = G_FRINT [[UV1]] + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FRINT]](s64), [[FRINT1]](s64) + ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x s64>) = G_FRINT %0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 +...