Index: llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -1425,6 +1425,18 @@ return buildInstr(TargetOpcode::G_INTRINSIC_TRUNC, {Dst}, {Src0}, Flags); } + /// Build and insert \p Dst = G_FLOG \p Src + MachineInstrBuilder buildFLog(const DstOp &Dst, const SrcOp &Src, + Optional Flags = None) { + return buildInstr(TargetOpcode::G_FLOG, {Dst}, {Src}, Flags); + } + + /// Build and insert \p Dst = G_FLOG2 \p Src + MachineInstrBuilder buildFLog2(const DstOp &Dst, const SrcOp &Src, + Optional Flags = None) { + return buildInstr(TargetOpcode::G_FLOG2, {Dst}, {Src}, Flags); + } + /// Build and insert \p Res = G_FCOPYSIGN \p Op0, \p Op1 MachineInstrBuilder buildFCopysign(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1) { Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -74,7 +74,8 @@ bool legalizeAtomicCmpXChg(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const; - + bool legalizeFlog(MachineInstr &MI, MachineIRBuilder &B, + double Log2BaseInverted) const; Register getLiveInRegister(MachineRegisterInfo &MRI, Register Reg, LLT Ty) const; Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -519,10 +519,15 @@ // FIXME: fexp, flog2, flog10 needs to be custom lowered. getActionDefinitionsBuilder({G_FPOW, G_FEXP, G_FEXP2, - G_FLOG, G_FLOG2, G_FLOG10}) + G_FLOG2}) .legalFor({S32}) .scalarize(0); + getActionDefinitionsBuilder({G_FLOG,G_FLOG10}) + .customFor({S32}) + .clampScalar(0, S32, S32) + .scalarize(0); + // The 64-bit versions produce 32-bit results, but only on the SALU. getActionDefinitionsBuilder({G_CTLZ, G_CTLZ_ZERO_UNDEF, G_CTTZ, G_CTTZ_ZERO_UNDEF, @@ -1247,6 +1252,10 @@ return legalizeFDIV(MI, MRI, B); case TargetOpcode::G_ATOMIC_CMPXCHG: return legalizeAtomicCmpXChg(MI, MRI, B); + case TargetOpcode::G_FLOG: + return legalizeFlog(MI, B, 1.0f / numbers::log2ef); + case TargetOpcode::G_FLOG10: + return legalizeFlog(MI, B, numbers::ln2f / numbers::ln10f); default: return false; } @@ -1880,6 +1889,21 @@ return true; } +bool AMDGPULegalizerInfo::legalizeFlog( + MachineInstr &MI, MachineIRBuilder &B, double Log2BaseInverted) const { + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + LLT Ty = B.getMRI()->getType(Dst); + unsigned Flags = MI.getFlags(); + B.setInstr(MI); + + auto Log2Operand = B.buildFLog2(Ty, Src, Flags); + auto Log2BaseInvertedOperand = B.buildFConstant(Ty, Log2BaseInverted); + + B.buildFMul(Dst, Log2Operand, Log2BaseInvertedOperand, Flags); + return true; +} + // Return the use branch instruction, otherwise null if the usage is invalid. static MachineInstr *verifyCFIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI, Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-flog.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-flog.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-flog.mir @@ -9,6 +9,9 @@ ; CHECK-LABEL: name: test_flog_s32 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[FLOG2_:%[0-9]+]]:_(s32) = G_FLOG2 [[COPY]] + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FE62E4300000000 + ; CHECK: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FLOG2_]], [[C]] ; CHECK: [[FLOG:%[0-9]+]]:_(s32) = G_FLOG [[COPY]] ; CHECK: $vgpr0 = COPY [[FLOG]](s32) %0:_(s32) = COPY $vgpr0 @@ -16,6 +19,24 @@ $vgpr0 = COPY %1 ... +--- +name: test_flog_s32_flags +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_flog_s32_flags + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[FLOG2_:%[0-9]+]]:_(s32) = nnan G_FLOG2 [[COPY]] + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FE62E4300000000 + ; CHECK: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[FLOG2_]], [[C]] + ; CHECK: [[FLOG:%[0-9]+]]:_(s32) = nnan G_FLOG [[COPY]] + ; CHECK: $vgpr0 = COPY [[FLOG]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = nnan G_FLOG %0 + $vgpr0 = COPY %1 +... + --- name: test_flog_v2s32 body: | @@ -25,7 +46,12 @@ ; CHECK-LABEL: name: test_flog_v2s32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK: [[FLOG2_:%[0-9]+]]:_(s32) = G_FLOG2 [[UV]] + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FE62E4300000000 + ; CHECK: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FLOG2_]], [[C]] ; CHECK: [[FLOG:%[0-9]+]]:_(s32) = G_FLOG [[UV]] + ; CHECK: [[FLOG2_1:%[0-9]+]]:_(s32) = G_FLOG2 [[UV1]] + ; CHECK: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FLOG2_1]], [[C]] ; CHECK: [[FLOG1:%[0-9]+]]:_(s32) = G_FLOG [[UV1]] ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FLOG]](s32), [[FLOG1]](s32) ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) @@ -43,8 +69,15 @@ ; CHECK-LABEL: name: test_flog_v3s32 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK: [[FLOG2_:%[0-9]+]]:_(s32) = G_FLOG2 [[UV]] + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FE62E4300000000 + ; CHECK: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FLOG2_]], [[C]] ; CHECK: [[FLOG:%[0-9]+]]:_(s32) = G_FLOG [[UV]] + ; CHECK: [[FLOG2_1:%[0-9]+]]:_(s32) = G_FLOG2 [[UV1]] + ; CHECK: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FLOG2_1]], [[C]] ; CHECK: [[FLOG1:%[0-9]+]]:_(s32) = G_FLOG [[UV1]] + ; CHECK: [[FLOG2_2:%[0-9]+]]:_(s32) = G_FLOG2 [[UV2]] + ; CHECK: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FLOG2_2]], [[C]] ; CHECK: [[FLOG2:%[0-9]+]]:_(s32) = G_FLOG [[UV2]] ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FLOG]](s32), [[FLOG1]](s32), [[FLOG2]](s32) ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) @@ -52,3 +85,60 @@ %1:_(<3 x s32>) = G_FLOG %0 $vgpr0_vgpr1_vgpr2 = COPY %1 ... + +--- +name: test_flog_s16 +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_flog_s16 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; CHECK: [[FLOG2_:%[0-9]+]]:_(s32) = G_FLOG2 [[FPEXT]] + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FE62E4300000000 + ; CHECK: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FLOG2_]], [[C]] + ; CHECK: [[FLOG:%[0-9]+]]:_(s32) = G_FLOG [[FPEXT]] + ; CHECK: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FLOG]](s32) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s16) = G_TRUNC %0 + %2:_(s16) = G_FLOG %1 + %3:_(s32) = G_ANYEXT %2 + $vgpr0 = COPY %3 + +... + +--- +name: test_flog_v2s16 +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_flog_v2s16 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; CHECK: [[FLOG2_:%[0-9]+]]:_(s32) = G_FLOG2 [[FPEXT]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FE62E4300000000 + ; CHECK: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FLOG2_]], [[C1]] + ; CHECK: [[FLOG:%[0-9]+]]:_(s32) = G_FLOG [[FPEXT]] + ; CHECK: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FLOG]](s32) + ; CHECK: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; CHECK: [[FLOG2_1:%[0-9]+]]:_(s32) = G_FLOG2 [[FPEXT1]] + ; CHECK: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FLOG2_1]], [[C1]] + ; CHECK: [[FLOG1:%[0-9]+]]:_(s32) = G_FLOG [[FPEXT1]] + ; CHECK: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FLOG1]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) + ; CHECK: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = G_FLOG %0 + $vgpr0 = COPY %1 + +... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-flog10.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-flog10.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-flog10.mir @@ -9,6 +9,9 @@ ; CHECK-LABEL: name: test_flog10_s32 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[FLOG2_:%[0-9]+]]:_(s32) = G_FLOG2 [[COPY]] + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FD3441340000000 + ; CHECK: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FLOG2_]], [[C]] ; CHECK: [[FLOG10_:%[0-9]+]]:_(s32) = G_FLOG10 [[COPY]] ; CHECK: $vgpr0 = COPY [[FLOG10_]](s32) %0:_(s32) = COPY $vgpr0 @@ -16,6 +19,24 @@ $vgpr0 = COPY %1 ... +--- +name: test_flog10_s32_flags +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_flog10_s32_flags + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[FLOG2_:%[0-9]+]]:_(s32) = nnan G_FLOG2 [[COPY]] + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FD3441340000000 + ; CHECK: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[FLOG2_]], [[C]] + ; CHECK: [[FLOG10_:%[0-9]+]]:_(s32) = nnan G_FLOG10 [[COPY]] + ; CHECK: $vgpr0 = COPY [[FLOG10_]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = nnan G_FLOG10 %0 + $vgpr0 = COPY %1 +... + --- name: test_flog10_v2s32 body: | @@ -25,7 +46,12 @@ ; CHECK-LABEL: name: test_flog10_v2s32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK: [[FLOG2_:%[0-9]+]]:_(s32) = G_FLOG2 [[UV]] + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FD3441340000000 + ; CHECK: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FLOG2_]], [[C]] ; CHECK: [[FLOG10_:%[0-9]+]]:_(s32) = G_FLOG10 [[UV]] + ; CHECK: [[FLOG2_1:%[0-9]+]]:_(s32) = G_FLOG2 [[UV1]] + ; CHECK: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FLOG2_1]], [[C]] ; CHECK: [[FLOG10_1:%[0-9]+]]:_(s32) = G_FLOG10 [[UV1]] ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FLOG10_]](s32), [[FLOG10_1]](s32) ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) @@ -43,8 +69,15 @@ ; CHECK-LABEL: name: test_flog10_v3s32 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK: [[FLOG2_:%[0-9]+]]:_(s32) = G_FLOG2 [[UV]] + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FD3441340000000 + ; CHECK: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FLOG2_]], [[C]] ; CHECK: [[FLOG10_:%[0-9]+]]:_(s32) = G_FLOG10 [[UV]] + ; CHECK: [[FLOG2_1:%[0-9]+]]:_(s32) = G_FLOG2 [[UV1]] + ; CHECK: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FLOG2_1]], [[C]] ; CHECK: [[FLOG10_1:%[0-9]+]]:_(s32) = G_FLOG10 [[UV1]] + ; CHECK: [[FLOG2_2:%[0-9]+]]:_(s32) = G_FLOG2 [[UV2]] + ; CHECK: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FLOG2_2]], [[C]] ; CHECK: [[FLOG10_2:%[0-9]+]]:_(s32) = G_FLOG10 [[UV2]] ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FLOG10_]](s32), [[FLOG10_1]](s32), [[FLOG10_2]](s32) ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) @@ -52,3 +85,60 @@ %1:_(<3 x s32>) = G_FLOG10 %0 $vgpr0_vgpr1_vgpr2 = COPY %1 ... + +--- +name: test_flog10_s16 +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_flog10_s16 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; CHECK: [[FLOG2_:%[0-9]+]]:_(s32) = G_FLOG2 [[FPEXT]] + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FD3441340000000 + ; CHECK: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FLOG2_]], [[C]] + ; CHECK: [[FLOG10_:%[0-9]+]]:_(s32) = G_FLOG10 [[FPEXT]] + ; CHECK: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FLOG10_]](s32) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s16) = G_TRUNC %0 + %2:_(s16) = G_FLOG10 %1 + %3:_(s32) = G_ANYEXT %2 + $vgpr0 = COPY %3 + +... + +--- +name: test_flog10_v2s16 +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_flog10_v2s16 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; CHECK: [[FLOG2_:%[0-9]+]]:_(s32) = G_FLOG2 [[FPEXT]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FD3441340000000 + ; CHECK: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FLOG2_]], [[C1]] + ; CHECK: [[FLOG10_:%[0-9]+]]:_(s32) = G_FLOG10 [[FPEXT]] + ; CHECK: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FLOG10_]](s32) + ; CHECK: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; CHECK: [[FLOG2_1:%[0-9]+]]:_(s32) = G_FLOG2 [[FPEXT1]] + ; CHECK: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FLOG2_1]], [[C1]] + ; CHECK: [[FLOG10_1:%[0-9]+]]:_(s32) = G_FLOG10 [[FPEXT1]] + ; CHECK: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FLOG10_1]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) + ; CHECK: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = G_FLOG10 %0 + $vgpr0 = COPY %1 + +...