Index: llvm/lib/Analysis/ConstantFolding.cpp =================================================================== --- llvm/lib/Analysis/ConstantFolding.cpp +++ llvm/lib/Analysis/ConstantFolding.cpp @@ -1457,6 +1457,7 @@ case Intrinsic::amdgcn_cubetc: case Intrinsic::amdgcn_fmul_legacy: case Intrinsic::amdgcn_fract: + case Intrinsic::amdgcn_ldexp: case Intrinsic::x86_sse_cvtss2si: case Intrinsic::x86_sse_cvtss2si64: case Intrinsic::x86_sse_cvttss2si: @@ -2224,6 +2225,16 @@ return ConstantFP::get(Ty->getContext(), APFloat((double)std::pow((double)Op1V, (int)Op2C->getZExtValue()))); + + if (IntrinsicID == Intrinsic::amdgcn_ldexp) { + // FIXME: Should flush denorms depending on FP mode, but that's ignored + // everywhere else. + + // scalbn is equivalent to ldexp with float radix 2 + APFloat Result = scalbn(Op1->getValueAPF(), Op2C->getSExtValue(), + APFloat::rmNearestTiesToEven); + return ConstantFP::get(Ty->getContext(), Result); + } } return nullptr; } Index: llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -4067,6 +4067,53 @@ break; } + case Intrinsic::amdgcn_ldexp: { + // FIXME: This doesn't introduce new instructions and belongs in + // InstructionSimplify. + Type *Ty = II->getType(); + Value *Op0 = II->getArgOperand(0); + Value *Op1 = II->getArgOperand(1); + + // Folding undef to qnan is safe regardless of the FP mode. + if (isa(Op0)) { + auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics())); + return replaceInstUsesWith(*II, QNaN); + } + + const APFloat *C = nullptr; + match(Op0, m_APFloat(C)); + + // FIXME: Should flush denorms depending on FP mode, but that's ignored + // everywhere else. + // + // These cases should be safe, even with strictfp. + // ldexp(0.0, x) -> 0.0 + // ldexp(-0.0, x) -> -0.0 + // ldexp(inf, x) -> inf + // ldexp(-inf, x) -> -inf + if (C && (C->isZero() || C->isInfinity())) + return replaceInstUsesWith(*II, Op0); + + // With strictfp, be more careful about possibly needing to flush denormals + // or not, and snan behavior depends on ieee_mode. + if (II->isStrictFP()) + break; + + if (C && C->isNaN()) { + // FIXME: We just need to make the nan quiet here, but that's unavailable + // on APFloat, only IEEEfloat + auto *Quieted = ConstantFP::get( + Ty, scalbn(*C, 0, APFloat::rmNearestTiesToEven)); + return replaceInstUsesWith(*II, Quieted); + } + + // ldexp(x, 0) -> x + // ldexp(x, undef) -> x + if (isa(Op1) || match(Op1, m_ZeroInt())) + return replaceInstUsesWith(*II, Op0); + + break; + } case Intrinsic::hexagon_V6_vandvrt: case Intrinsic::hexagon_V6_vandvrt_128B: { // Simplify Q -> V -> Q conversion. Index: llvm/test/Transforms/InstCombine/AMDGPU/ldexp.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/InstCombine/AMDGPU/ldexp.ll @@ -0,0 +1,342 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + +define float @ldexp_f32_undef_undef() { +; CHECK-LABEL: @ldexp_f32_undef_undef( +; CHECK-NEXT: ret float 0x7FF8000000000000 +; + %call = call float @llvm.amdgcn.ldexp.f32(float undef, i32 undef) + ret float %call +} + +; If the exponent is 0, it doesn't matter if the first argument is +; constant or not. +define void @ldexp_f32_exp0(float %x) { +; CHECK-LABEL: @ldexp_f32_exp0( +; CHECK-NEXT: store volatile float [[X:%.*]], float addrspace(1)* undef, align 4 +; CHECK-NEXT: store volatile float [[X]], float addrspace(1)* undef, align 4 +; CHECK-NEXT: [[ONE:%.*]] = call float @llvm.amdgcn.ldexp.f32(float [[X]], i32 1) +; CHECK-NEXT: store volatile float [[ONE]], float addrspace(1)* undef, align 4 +; CHECK-NEXT: ret void +; + %zero = call float @llvm.amdgcn.ldexp.f32(float %x, i32 0) + store volatile float %zero, float addrspace(1)* undef + + %undef = call float @llvm.amdgcn.ldexp.f32(float %x, i32 undef) + store volatile float %undef, float addrspace(1)* undef + + %one = call float @llvm.amdgcn.ldexp.f32(float %x, i32 1) + store volatile float %one, float addrspace(1)* undef + ret void +} + +; Test variable exponent but zero or undef value. +define void @ldexp_f32_val0(i32 %y) { +; CHECK-LABEL: @ldexp_f32_val0( +; CHECK-NEXT: store volatile float 0.000000e+00, float addrspace(1)* undef, align 4 +; CHECK-NEXT: store volatile float -0.000000e+00, float addrspace(1)* undef, align 4 +; CHECK-NEXT: store volatile float 0x7FF8000000000000, float addrspace(1)* undef, align 4 +; CHECK-NEXT: ret void +; + %zero = call float @llvm.amdgcn.ldexp.f32(float 0.0, i32 %y) + store volatile float %zero, float addrspace(1)* undef + + %neg.zero = call float @llvm.amdgcn.ldexp.f32(float -0.0, i32 %y) + store volatile float %neg.zero, float addrspace(1)* undef + + %undef = call float @llvm.amdgcn.ldexp.f32(float undef, i32 %y) + store volatile float %undef, float addrspace(1)* undef + ret void +} + +define void @ldexp_f32_val_infinity(i32 %y) { +; CHECK-LABEL: @ldexp_f32_val_infinity( +; CHECK-NEXT: store volatile float 0x7FF0000000000000, float addrspace(1)* undef, align 4 +; CHECK-NEXT: store volatile float 0xFFF0000000000000, float addrspace(1)* undef, align 4 +; CHECK-NEXT: store volatile float 0x7FF0000000000000, float addrspace(1)* undef, align 4 +; CHECK-NEXT: store volatile float 0xFFF0000000000000, float addrspace(1)* undef, align 4 +; CHECK-NEXT: ret void +; + %inf = call float @llvm.amdgcn.ldexp.f32(float 0x7ff0000000000000, i32 %y) + store volatile float %inf, float addrspace(1)* undef + + %neg.inf = call float @llvm.amdgcn.ldexp.f32(float 0xfff0000000000000, i32 %y) + store volatile float %neg.inf, float addrspace(1)* undef + + %inf.zero = call float @llvm.amdgcn.ldexp.f32(float 0x7ff0000000000000, i32 0) + store volatile float %inf.zero, float addrspace(1)* undef + + %neg.inf.zero = call float @llvm.amdgcn.ldexp.f32(float 0xfff0000000000000, i32 0) + store volatile float %neg.inf.zero, float addrspace(1)* undef + + ret void +} + +; Signaling nan should be quieted. +; Technically this depends on the ieee_mode in the mode register. +define void @ldexp_f32_val_nan(i32 %y) { +; CHECK-LABEL: @ldexp_f32_val_nan( +; CHECK-NEXT: store volatile float 0x7FF8001000000000, float addrspace(1)* undef, align 4 +; CHECK-NEXT: store volatile float 0xFFF8000100000000, float addrspace(1)* undef, align 4 +; CHECK-NEXT: store volatile float 0x7FF8000020000000, float addrspace(1)* undef, align 4 +; CHECK-NEXT: store volatile float 0xFFFFFFFFE0000000, float addrspace(1)* undef, align 4 +; CHECK-NEXT: ret void +; + %plus.qnan = call float @llvm.amdgcn.ldexp.f32(float 0x7ff0001000000000, i32 %y) + store volatile float %plus.qnan, float addrspace(1)* undef + + %neg.qnan = call float @llvm.amdgcn.ldexp.f32(float 0xfff0000100000000, i32 %y) + store volatile float %neg.qnan, float addrspace(1)* undef + + %plus.snan = call float @llvm.amdgcn.ldexp.f32(float 0x7FF0000020000000, i32 %y) + store volatile float %plus.snan, float addrspace(1)* undef + + %neg.snan = call float @llvm.amdgcn.ldexp.f32(float 0xFFF7FFFFE0000000, i32 %y) + store volatile float %neg.snan, float addrspace(1)* undef + + ret void +} + +define void @ldexp_f32_val_nan_strictfp(i32 %y) #0 { +; CHECK-LABEL: @ldexp_f32_val_nan_strictfp( +; CHECK-NEXT: [[PLUS_QNAN:%.*]] = call float @llvm.amdgcn.ldexp.f32(float 0x7FF0001000000000, i32 [[Y:%.*]]) #0 +; CHECK-NEXT: store volatile float [[PLUS_QNAN]], float addrspace(1)* undef, align 4 +; CHECK-NEXT: [[NEG_QNAN:%.*]] = call float @llvm.amdgcn.ldexp.f32(float 0xFFF0000100000000, i32 [[Y]]) #0 +; CHECK-NEXT: store volatile float [[NEG_QNAN]], float addrspace(1)* undef, align 4 +; CHECK-NEXT: [[PLUS_SNAN:%.*]] = call float @llvm.amdgcn.ldexp.f32(float 0x7FF0000020000000, i32 [[Y]]) #0 +; CHECK-NEXT: store volatile float [[PLUS_SNAN]], float addrspace(1)* undef, align 4 +; CHECK-NEXT: [[NEG_SNAN:%.*]] = call float @llvm.amdgcn.ldexp.f32(float 0xFFF7FFFFE0000000, i32 [[Y]]) #0 +; CHECK-NEXT: store volatile float [[NEG_SNAN]], float addrspace(1)* undef, align 4 +; CHECK-NEXT: store volatile float 0x7FF8000000000000, float addrspace(1)* undef, align 4 +; CHECK-NEXT: ret void +; + %plus.qnan = call float @llvm.amdgcn.ldexp.f32(float 0x7ff0001000000000, i32 %y) #0 + store volatile float %plus.qnan, float addrspace(1)* undef + + %neg.qnan = call float @llvm.amdgcn.ldexp.f32(float 0xfff0000100000000, i32 %y) #0 + store volatile float %neg.qnan, float addrspace(1)* undef + + %plus.snan = call float @llvm.amdgcn.ldexp.f32(float 0x7FF0000020000000, i32 %y) #0 + store volatile float %plus.snan, float addrspace(1)* undef + + %neg.snan = call float @llvm.amdgcn.ldexp.f32(float 0xFFF7FFFFE0000000, i32 %y) #0 + store volatile float %neg.snan, float addrspace(1)* undef + + %undef = call float @llvm.amdgcn.ldexp.f32(float undef, i32 %y) #0 + store volatile float %undef, float addrspace(1)* undef + + ret void +} + +define void @ldexp_f32_0() { +; CHECK-LABEL: @ldexp_f32_0( +; CHECK-NEXT: store volatile float 0.000000e+00, float addrspace(1)* undef, align 4 +; CHECK-NEXT: store volatile float -0.000000e+00, float addrspace(1)* undef, align 4 +; CHECK-NEXT: store volatile float 0.000000e+00, float addrspace(1)* undef, align 4 +; CHECK-NEXT: store volatile float 0.000000e+00, float addrspace(1)* undef, align 4 +; CHECK-NEXT: store volatile float 0.000000e+00, float addrspace(1)* undef, align 4 +; CHECK-NEXT: store volatile float 0.000000e+00, float addrspace(1)* undef, align 4 +; CHECK-NEXT: store volatile float 0.000000e+00, float addrspace(1)* undef, align 4 +; CHECK-NEXT: ret void +; + %zero = call float @llvm.amdgcn.ldexp.f32(float 0.0, i32 0) + store volatile float %zero, float addrspace(1)* undef + + %neg.zero = call float @llvm.amdgcn.ldexp.f32(float -0.0, i32 0) + store volatile float %neg.zero, float addrspace(1)* undef + + %one = call float @llvm.amdgcn.ldexp.f32(float 0.0, i32 1) + store volatile float %one, float addrspace(1)* undef + + %min.exp = call float @llvm.amdgcn.ldexp.f32(float 0.0, i32 -126) + store volatile float %min.exp, float addrspace(1)* undef + + %min.exp.sub1 = call float @llvm.amdgcn.ldexp.f32(float 0.0, i32 -127) + store volatile float %min.exp.sub1, float addrspace(1)* undef + + %max.exp = call float @llvm.amdgcn.ldexp.f32(float 0.0, i32 127) + store volatile float %max.exp, float addrspace(1)* undef + + %max.exp.plus1 = call float @llvm.amdgcn.ldexp.f32(float 0.0, i32 128) + store volatile float %max.exp.plus1, float addrspace(1)* undef + + ret void +} + +; Should be able to ignore strictfp in this case +define void @ldexp_f32_0_strictfp(float %x) #0 { +; CHECK-LABEL: @ldexp_f32_0_strictfp( +; CHECK-NEXT: store volatile float 0.000000e+00, float addrspace(1)* undef, align 4 +; CHECK-NEXT: store volatile float -0.000000e+00, float addrspace(1)* undef, align 4 +; CHECK-NEXT: store volatile float 0.000000e+00, float addrspace(1)* undef, align 4 +; CHECK-NEXT: [[UNKNOWN_ZERO:%.*]] = call float @llvm.amdgcn.ldexp.f32(float [[X:%.*]], i32 0) #0 +; CHECK-NEXT: store volatile float [[UNKNOWN_ZERO]], float addrspace(1)* undef, align 4 +; CHECK-NEXT: [[UNKNOWN_UNDEF:%.*]] = call float @llvm.amdgcn.ldexp.f32(float [[X]], i32 undef) #0 +; CHECK-NEXT: store volatile float [[UNKNOWN_UNDEF]], float addrspace(1)* undef, align 4 +; CHECK-NEXT: [[DENORMAL_0:%.*]] = call float @llvm.amdgcn.ldexp.f32(float 0x380FFFFFC0000000, i32 0) #0 +; CHECK-NEXT: store volatile float [[DENORMAL_0]], float addrspace(1)* undef, align 4 +; CHECK-NEXT: [[DENORMAL_1:%.*]] = call float @llvm.amdgcn.ldexp.f32(float 0x380FFFFFC0000000, i32 1) #0 +; CHECK-NEXT: store volatile float [[DENORMAL_1]], float addrspace(1)* undef, align 4 +; CHECK-NEXT: ret void +; + %zero = call float @llvm.amdgcn.ldexp.f32(float 0.0, i32 0) #0 + store volatile float %zero, float addrspace(1)* undef + + %neg.zero = call float @llvm.amdgcn.ldexp.f32(float -0.0, i32 0) #0 + store volatile float %neg.zero, float addrspace(1)* undef + + %one = call float @llvm.amdgcn.ldexp.f32(float 0.0, i32 1) #0 + store volatile float %one, float addrspace(1)* undef + + %unknown.zero = call float @llvm.amdgcn.ldexp.f32(float %x, i32 0) #0 + store volatile float %unknown.zero, float addrspace(1)* undef + + %unknown.undef = call float @llvm.amdgcn.ldexp.f32(float %x, i32 undef) #0 + store volatile float %unknown.undef, float addrspace(1)* undef + + %denormal.0 = call float @llvm.amdgcn.ldexp.f32(float 0x380FFFFFC0000000, i32 0) #0 + store volatile float %denormal.0, float addrspace(1)* undef + + %denormal.1 = call float @llvm.amdgcn.ldexp.f32(float 0x380FFFFFC0000000, i32 1) #0 + store volatile float %denormal.1, float addrspace(1)* undef + + ret void +} + +define void @ldexp_f32() { +; CHECK-LABEL: @ldexp_f32( +; CHECK-NEXT: store volatile float 2.000000e+00, float addrspace(1)* undef, align 4 +; CHECK-NEXT: store volatile float 4.000000e+00, float addrspace(1)* undef, align 4 +; CHECK-NEXT: store volatile float 8.000000e+00, float addrspace(1)* undef, align 4 +; CHECK-NEXT: store volatile float 5.000000e-01, float addrspace(1)* undef, align 4 +; CHECK-NEXT: store volatile float 0x3810000000000000, float addrspace(1)* undef, align 4 +; CHECK-NEXT: store volatile float 0x3800000000000000, float addrspace(1)* undef, align 4 +; CHECK-NEXT: store volatile float 0x47E0000000000000, float addrspace(1)* undef, align 4 +; CHECK-NEXT: store volatile float 0x7FF0000000000000, float addrspace(1)* undef, align 4 +; CHECK-NEXT: store volatile float -2.000000e+00, float addrspace(1)* undef, align 4 +; CHECK-NEXT: store volatile float -4.000000e+00, float addrspace(1)* undef, align 4 +; CHECK-NEXT: store volatile float -8.000000e+00, float addrspace(1)* undef, align 4 +; CHECK-NEXT: store volatile float -5.000000e-01, float addrspace(1)* undef, align 4 +; CHECK-NEXT: store volatile float 0xB810000000000000, float addrspace(1)* undef, align 4 +; CHECK-NEXT: store volatile float 0xB800000000000000, float addrspace(1)* undef, align 4 +; CHECK-NEXT: store volatile float 0xC7E0000000000000, float addrspace(1)* undef, align 4 +; CHECK-NEXT: store volatile float 0xFFF0000000000000, float addrspace(1)* undef, align 4 +; CHECK-NEXT: store volatile float 0x44D5000000000000, float addrspace(1)* undef, align 4 +; CHECK-NEXT: ret void +; + %one.one = call float @llvm.amdgcn.ldexp.f32(float 1.0, i32 1) + store volatile float %one.one, float addrspace(1)* undef + + %one.two = call float @llvm.amdgcn.ldexp.f32(float 1.0, i32 2) + store volatile float %one.two, float addrspace(1)* undef + + %one.three = call float @llvm.amdgcn.ldexp.f32(float 1.0, i32 3) + store volatile float %one.three, float addrspace(1)* undef + + %one.negone = call float @llvm.amdgcn.ldexp.f32(float 1.0, i32 -1) + store volatile float %one.negone, float addrspace(1)* undef + + %one.min.exp = call float @llvm.amdgcn.ldexp.f32(float 1.0, i32 -126) + store volatile float %one.min.exp, float addrspace(1)* undef + + %one.min.exp.sub1 = call float @llvm.amdgcn.ldexp.f32(float 1.0, i32 -127) + store volatile float %one.min.exp.sub1, float addrspace(1)* undef + + %one.max.exp = call float @llvm.amdgcn.ldexp.f32(float 1.0, i32 127) + store volatile float %one.max.exp, float addrspace(1)* undef + + %one.max.exp.plus1 = call float @llvm.amdgcn.ldexp.f32(float 1.0, i32 128) + store volatile float %one.max.exp.plus1, float addrspace(1)* undef + + %neg.one.one = call float @llvm.amdgcn.ldexp.f32(float -1.0, i32 1) + store volatile float %neg.one.one, float addrspace(1)* undef + + %neg.one.two = call float @llvm.amdgcn.ldexp.f32(float -1.0, i32 2) + store volatile float %neg.one.two, float addrspace(1)* undef + + %neg.one.three = call float @llvm.amdgcn.ldexp.f32(float -1.0, i32 3) + store volatile float %neg.one.three, float addrspace(1)* undef + + %neg.one.negone = call float @llvm.amdgcn.ldexp.f32(float -1.0, i32 -1) + store volatile float %neg.one.negone, float addrspace(1)* undef + + %neg.one.min.exp = call float @llvm.amdgcn.ldexp.f32(float -1.0, i32 -126) + store volatile float %neg.one.min.exp, float addrspace(1)* undef + + %neg.one.min.exp.sub1 = call float @llvm.amdgcn.ldexp.f32(float -1.0, i32 -127) + store volatile float %neg.one.min.exp.sub1, float addrspace(1)* undef + + %neg.one.max.exp = call float @llvm.amdgcn.ldexp.f32(float -1.0, i32 127) + store volatile float %neg.one.max.exp, float addrspace(1)* undef + + %neg.one.max.exp.plus1 = call float @llvm.amdgcn.ldexp.f32(float -1.0, i32 128) + store volatile float %neg.one.max.exp.plus1, float addrspace(1)* undef + + %fortytwo.seven = call float @llvm.amdgcn.ldexp.f32(float 42.0, i32 73) + store volatile float %fortytwo.seven, float addrspace(1)* undef + + ret void +} + +; Technically we should probably flush these depending on the expected +; denormal mode of the function, but no other IR constant folding +; considers this. +define void @ldexp_f32_denormal() { +; CHECK-LABEL: @ldexp_f32_denormal( +; CHECK-NEXT: store volatile float 0x380FFFFFC0000000, float addrspace(1)* undef, align 4 +; CHECK-NEXT: store volatile float 0x381FFFFFC0000000, float addrspace(1)* undef, align 4 +; CHECK-NEXT: ret void +; + %denormal.0 = call float @llvm.amdgcn.ldexp.f32(float 0x380FFFFFC0000000, i32 0) + store volatile float %denormal.0, float addrspace(1)* undef + + %denormal.1 = call float @llvm.amdgcn.ldexp.f32(float 0x380FFFFFC0000000, i32 1) + store volatile float %denormal.1, float addrspace(1)* undef + + ret void +} + +define void @ldexp_f64() { +; CHECK-LABEL: @ldexp_f64( +; CHECK-NEXT: store volatile double 2.000000e+00, double addrspace(1)* undef, align 8 +; CHECK-NEXT: store volatile double 4.000000e+00, double addrspace(1)* undef, align 8 +; CHECK-NEXT: store volatile double 0x44D5000000000000, double addrspace(1)* undef, align 8 +; CHECK-NEXT: ret void +; + %one.one = call double @llvm.amdgcn.ldexp.f64(double 1.0, i32 1) + store volatile double %one.one, double addrspace(1)* undef + + %one.two = call double @llvm.amdgcn.ldexp.f64(double 1.0, i32 2) + store volatile double %one.two, double addrspace(1)* undef + + %fortytwo.seven = call double @llvm.amdgcn.ldexp.f64(double 42.0, i32 73) + store volatile double %fortytwo.seven, double addrspace(1)* undef + + ret void +} + +define void @ldexp_f16() { +; CHECK-LABEL: @ldexp_f16( +; CHECK-NEXT: store volatile half 0xH4000, half addrspace(1)* undef, align 2 +; CHECK-NEXT: store volatile half 0xH4400, half addrspace(1)* undef, align 2 +; CHECK-NEXT: store volatile half 0xH7C00, half addrspace(1)* undef, align 2 +; CHECK-NEXT: ret void +; + %one.one = call half @llvm.amdgcn.ldexp.f16(half 1.0, i32 1) + store volatile half %one.one, half addrspace(1)* undef + + %one.two = call half @llvm.amdgcn.ldexp.f16(half 1.0, i32 2) + store volatile half %one.two, half addrspace(1)* undef + + %fortytwo.seven = call half @llvm.amdgcn.ldexp.f16(half 42.0, i32 73) + store volatile half %fortytwo.seven, half addrspace(1)* undef + + ret void +} + +declare half @llvm.amdgcn.ldexp.f16(half, i32) #1 +declare float @llvm.amdgcn.ldexp.f32(float, i32) #1 +declare double @llvm.amdgcn.ldexp.f64(double, i32) #1 + +attributes #0 = { strictfp } +attributes #1 = { nounwind readnone speculatable }