Index: llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -1432,6 +1432,12 @@ return buildInstr(TargetOpcode::G_FLOG2, {Dst}, {Src}, Flags); } + /// Build and insert \p Dst = G_FEXP2 \p Src + MachineInstrBuilder buildFExp2(const DstOp &Dst, const SrcOp &Src, + Optional Flags = None) { + return buildInstr(TargetOpcode::G_FEXP2, {Dst}, {Src}, Flags); + } + /// Build and insert \p Res = G_FCOPYSIGN \p Op0, \p Op1 MachineInstrBuilder buildFCopysign(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1) { Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -76,6 +76,8 @@ MachineIRBuilder &B) const; bool legalizeFlog(MachineInstr &MI, MachineIRBuilder &B, double Log2BaseInverted) const; + bool legalizeFExp(MachineInstr &MI, MachineIRBuilder &B) const; + Register getLiveInRegister(MachineRegisterInfo &MRI, Register Reg, LLT Ty) const; Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -522,20 +522,22 @@ .clampScalar(1, S32, S64) .scalarize(0); - // FIXME: fexp, flog2, flog10 needs to be custom lowered. - auto &FExp2Ops = getActionDefinitionsBuilder({G_FPOW, G_FEXP, - G_FEXP2, G_FLOG2}); + // FIXME: fpow has a selection paattern that should move to custom lowering. + auto &Exp2Ops = getActionDefinitionsBuilder({G_FEXP2, G_FLOG2, G_FPOW}); if (ST.has16BitInsts()) - FExp2Ops.legalFor({{S32}, {S16}}); + Exp2Ops.legalFor({S32, S16}); else - FExp2Ops.legalFor({S32}); - FExp2Ops.clampScalar(0, MinScalarFPTy, S32); - FExp2Ops.scalarize(0); + Exp2Ops.legalFor({S32}); + Exp2Ops.clampScalar(0, MinScalarFPTy, S32); + Exp2Ops.scalarize(0); - getActionDefinitionsBuilder({G_FLOG, G_FLOG10}) - .customFor({S32}) - .clampScalar(0, S32, S32) - .scalarize(0); + auto &ExpOps = getActionDefinitionsBuilder({G_FEXP, G_FLOG, G_FLOG10}); + if (ST.has16BitInsts()) + ExpOps.customFor({{S32}, {S16}}); + else + ExpOps.customFor({S32}); + ExpOps.clampScalar(0, MinScalarFPTy, S32) + .scalarize(0); // The 64-bit versions produce 32-bit results, but only on the SALU. getActionDefinitionsBuilder({G_CTLZ, G_CTLZ_ZERO_UNDEF, @@ -1179,6 +1181,8 @@ return legalizeFlog(MI, B, 1.0f / numbers::log2ef); case TargetOpcode::G_FLOG10: return legalizeFlog(MI, B, numbers::ln2f / numbers::ln10f); + case TargetOpcode::G_FEXP: + return legalizeFExp(MI, B); default: return false; } @@ -1828,6 +1832,22 @@ return true; } +bool AMDGPULegalizerInfo::legalizeFExp(MachineInstr &MI, + MachineIRBuilder &B) const { + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + unsigned Flags = MI.getFlags(); + LLT Ty = B.getMRI()->getType(Dst); + B.setInstr(MI); + + auto K = B.buildFConstant(Ty, numbers::log2e); + auto Mul = B.buildFMul(Ty, Src, K, Flags); + B.buildFExp2(Dst, Mul, Flags); + + MI.eraseFromParent(); + return true; +} + // Return the use branch instruction, otherwise null if the usage is invalid. static MachineInstr *verifyCFIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI, Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fexp.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fexp.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fexp.mir @@ -1,5 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX6 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX89 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX89 %s --- name: test_fexp_s32 @@ -7,28 +9,72 @@ bb.0: liveins: $vgpr0 - ; CHECK-LABEL: name: test_fexp_s32 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[FEXP_:%[0-9]+]]:_(s32) = G_FEXP [[COPY]] - ; CHECK: $vgpr0 = COPY [[FEXP_]](s32) + ; GFX6-LABEL: name: test_fexp_s32 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[C]] + ; GFX6: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]] + ; GFX6: $vgpr0 = COPY [[FEXP2_]](s32) + ; GFX89-LABEL: name: test_fexp_s32 + ; GFX89: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX89: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX89: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[C]] + ; GFX89: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]] + ; GFX89: $vgpr0 = COPY [[FEXP2_]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_FEXP %0 $vgpr0 = COPY %1 ... +--- +name: test_fexp_s32_nnan +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: test_fexp_s32_nnan + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[C]] + ; GFX6: [[FEXP2_:%[0-9]+]]:_(s32) = nnan G_FEXP2 [[FMUL]] + ; GFX6: $vgpr0 = COPY [[FEXP2_]](s32) + ; GFX89-LABEL: name: test_fexp_s32_nnan + ; GFX89: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX89: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX89: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[C]] + ; GFX89: [[FEXP2_:%[0-9]+]]:_(s32) = nnan G_FEXP2 [[FMUL]] + ; GFX89: $vgpr0 = COPY [[FEXP2_]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = nnan G_FEXP %0 + $vgpr0 = COPY %1 +... + --- name: test_fexp_v2s32 body: | bb.0: liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: test_fexp_v2s32 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK: [[FEXP_:%[0-9]+]]:_(s32) = G_FEXP [[UV]] - ; CHECK: [[FEXP_1:%[0-9]+]]:_(s32) = G_FEXP [[UV1]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FEXP_]](s32), [[FEXP_1]](s32) - ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX6-LABEL: name: test_fexp_v2s32 + ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]] + ; GFX6: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]] + ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] + ; GFX6: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL1]] + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FEXP2_]](s32), [[FEXP2_1]](s32) + ; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX89-LABEL: name: test_fexp_v2s32 + ; GFX89: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX89: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX89: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX89: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]] + ; GFX89: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]] + ; GFX89: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] + ; GFX89: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL1]] + ; GFX89: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FEXP2_]](s32), [[FEXP2_1]](s32) + ; GFX89: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = G_FEXP %0 $vgpr0_vgpr1 = COPY %1 @@ -40,15 +86,95 @@ bb.0: liveins: $vgpr0_vgpr1_vgpr2 - ; CHECK-LABEL: name: test_fexp_v3s32 - ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK: [[FEXP_:%[0-9]+]]:_(s32) = G_FEXP [[UV]] - ; CHECK: [[FEXP_1:%[0-9]+]]:_(s32) = G_FEXP [[UV1]] - ; CHECK: [[FEXP_2:%[0-9]+]]:_(s32) = G_FEXP [[UV2]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FEXP_]](s32), [[FEXP_1]](s32), [[FEXP_2]](s32) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX6-LABEL: name: test_fexp_v3s32 + ; GFX6: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]] + ; GFX6: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]] + ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] + ; GFX6: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL1]] + ; GFX6: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[C]] + ; GFX6: [[FEXP2_2:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL2]] + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FEXP2_]](s32), [[FEXP2_1]](s32), [[FEXP2_2]](s32) + ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX89-LABEL: name: test_fexp_v3s32 + ; GFX89: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX89: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; GFX89: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX89: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]] + ; GFX89: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]] + ; GFX89: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] + ; GFX89: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL1]] + ; GFX89: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[C]] + ; GFX89: [[FEXP2_2:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL2]] + ; GFX89: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FEXP2_]](s32), [[FEXP2_1]](s32), [[FEXP2_2]](s32) + ; GFX89: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = G_FEXP %0 $vgpr0_vgpr1_vgpr2 = COPY %1 ... + +--- +name: test_fexp_s16 +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: test_fexp_s16 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX6: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[C]] + ; GFX6: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]] + ; GFX6: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FEXP2_]](s32) + ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX89-LABEL: name: test_fexp_s16 + ; GFX89: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX89: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX89: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3DC5 + ; GFX89: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[C]] + ; GFX89: [[FEXP2_:%[0-9]+]]:_(s16) = G_FEXP2 [[FMUL]] + ; GFX89: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FEXP2_]](s16) + ; GFX89: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s16) = G_TRUNC %0 + %2:_(s16) = G_FEXP %1 + %3:_(s32) = G_ANYEXT %2 + $vgpr0 = COPY %3 +... + +--- +name: test_fexp_v2s16 +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: test_fexp_v2s16 + ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX6: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT %4(s16) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[C]] + ; GFX6: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]] + ; GFX6: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FEXP2_]](s32) + ; GFX6: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT %5(s16) + ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT1]], [[C]] + ; GFX6: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL1]] + ; GFX6: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FEXP2_1]](s32) + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) + ; GFX6: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX89-LABEL: name: test_fexp_v2s16 + ; GFX89: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX89: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3DC5 + ; GFX89: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL %4, [[C]] + ; GFX89: [[FEXP2_:%[0-9]+]]:_(s16) = G_FEXP2 [[FMUL]] + ; GFX89: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL %5, [[C]] + ; GFX89: [[FEXP2_1:%[0-9]+]]:_(s16) = G_FEXP2 [[FMUL1]] + ; GFX89: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FEXP2_]](s16), [[FEXP2_1]](s16) + ; GFX89: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = G_FEXP %1 + $vgpr0 = COPY %1 +...