Index: llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp @@ -996,6 +996,94 @@ break; } + case Intrinsic::amdgcn_trig_preop: { + // The intrinsic is declared with name mangling, but currently the + // instruction only exists for f64 + if (!II.getType()->isDoubleTy()) + break; + + Value *Src = II.getArgOperand(0); + Value *Segment = II.getArgOperand(1); + if (isa(Src)) + return IC.replaceInstUsesWith(II, Src); + + if (isa(Src)) { + auto *QNaN = ConstantFP::get( + II.getType(), APFloat::getQNaN(II.getType()->getFltSemantics())); + return IC.replaceInstUsesWith(II, QNaN); + } + + const ConstantFP *Csrc = dyn_cast(Src); + if (!Csrc) + break; + + if (II.isStrictFP()) + break; + + const APFloat &Fsrc = Csrc->getValueAPF(); + if (Fsrc.isNaN()) { + // FIXME: We just need to make the nan quiet here, but that's unavailable + // on APFloat, only IEEEfloat + auto *Quieted = ConstantFP::get( + II.getType(), scalbn(Fsrc, 0, APFloat::rmNearestTiesToEven)); + return IC.replaceInstUsesWith(II, Quieted); + } + + const ConstantInt *Cseg = dyn_cast(Segment); + if (!Cseg) + break; + + static const uint32_t TwoByPi[] = { + 0xa2f9836e, 0x4e441529, 0xfc2757d1, 0xf534ddc0, 0xdb629599, 0x3c439041, + 0xfe5163ab, 0xdebbc561, 0xb7246e3a, 0x424dd2e0, 0x06492eea, 0x09d1921c, + 0xfe1deb1c, 0xb129a73e, 0xe88235f5, 0x2ebb4484, 0xe99c7026, 0xb45f7e41, + 0x3991d639, 0x835339f4, 0x9c845f8b, 0xbdf9283b, 0x1ff897ff, 0xde05980f, + 0xef2f118b, 0x5a0a6d1f, 0x6d367ecf, 0x27cb09b7, 0x4f463f66, 0x9e5fea2d, + 0x7527bac7, 0xebe5f17b, 0x3d0739f7, 0x8a5292ea, 0x6bfb5fb1, 0x1f8d5d08, + 0x56033046}; + + const APInt &SegVal = Cseg->getValue(); + bool Ovflow; + unsigned Numbits = 32; + bool Signed = true; + + APInt EClamp(Numbits, 1077, Signed); + APInt E = Fsrc.bitcastToAPInt().ashr(52); + E &= 0x7ff; + E = E.trunc(Numbits); + APInt Shift = + (E.sgt(EClamp) ? E.ssub_ov(EClamp, Ovflow) : APInt(Numbits, 0, Signed)) + .sadd_ov(APInt(Numbits, 53, Signed).smul_ov(SegVal, Ovflow), + Ovflow); + int32_t I = Shift.ashr(5).getSExtValue(); + + assert(I >= 0 && static_cast(I) < std::size(TwoByPi)); + + APInt Bshift = Shift & 0x1f; + Numbits = 64; + Signed = false; + APInt Thi = APInt(Numbits, + (((uint64_t)TwoByPi[I] << 32) | (uint64_t)TwoByPi[I + 1]), + Signed); + APInt Tlo = APInt(Numbits, ((uint64_t)TwoByPi[I + 2] << 32), Signed); + + if (Bshift.sgt(0)) { + Numbits = 32; + Signed = true; + Thi = Thi.shl(Bshift) | + Tlo.lshr(APInt(Numbits, 64, Signed).ssub_ov(Bshift, Ovflow)); + } + + Thi = Thi.lshr(11); + APFloat Res = APFloat(Thi.roundToDouble()); + int32_t Scale = -53 - Shift.getSExtValue(); + + if (E.sge(0x7b0)) + Scale += 128; + + Res = scalbn(Res, Scale, RoundingMode::NearestTiesToEven); + return IC.replaceInstUsesWith(II, ConstantFP::get(Src->getType(), Res)); + } case Intrinsic::amdgcn_fmul_legacy: { Value *Op0 = II.getArgOperand(0); Value *Op1 = II.getArgOperand(1); Index: llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll =================================================================== --- llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll +++ llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll @@ -5478,8 +5478,7 @@ define double @trig_preop_constfold_variable_undef_arg(i32 %arg) { ; CHECK-LABEL: @trig_preop_constfold_variable_undef_arg( -; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double undef, i32 [[ARG:%.*]]) -; CHECK-NEXT: ret double [[VAL]] +; CHECK-NEXT: ret double 0x7FF8000000000000 ; %val = call double @llvm.amdgcn.trig.preop.f64(double undef, i32 %arg) ret double %val @@ -5487,8 +5486,7 @@ define double @trig_preop_constfold_variable_poison_arg(i32 %arg) { ; CHECK-LABEL: @trig_preop_constfold_variable_poison_arg( -; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double poison, i32 [[ARG:%.*]]) -; CHECK-NEXT: ret double [[VAL]] +; CHECK-NEXT: ret double poison ; %val = call double @llvm.amdgcn.trig.preop.f64(double poison, i32 %arg) ret double %val @@ -5523,8 +5521,7 @@ define double @trig_preop_qnan(i32 %arg) { ; CHECK-LABEL: @trig_preop_qnan( -; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double 0x7FF8000000000000, i32 [[ARG:%.*]]) -; CHECK-NEXT: ret double [[VAL]] +; CHECK-NEXT: ret double 0x7FF8000000000000 ; %val = call double @llvm.amdgcn.trig.preop.f64(double 0x7FF8000000000000, i32 %arg) ret double %val @@ -5532,8 +5529,7 @@ define double @trig_preop_snan(i32 %arg) { ; CHECK-LABEL: @trig_preop_snan( -; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double 0x7FF0000000000001, i32 [[ARG:%.*]]) -; CHECK-NEXT: ret double [[VAL]] +; CHECK-NEXT: ret double 0x7FF8000000000001 ; %val = call double @llvm.amdgcn.trig.preop.f64(double 0x7FF0000000000001, i32 %arg) ret double %val @@ -5541,8 +5537,7 @@ define double @trig_preop_inf_0() { ; CHECK-LABEL: @trig_preop_inf_0( -; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double 0x7FF0000000000000, i32 0) -; CHECK-NEXT: ret double [[VAL]] +; CHECK-NEXT: ret double 0xB43DD63F5F2F8BD ; %val = call double @llvm.amdgcn.trig.preop.f64(double 0x7FF0000000000000, i32 0) ret double %val @@ -5550,8 +5545,7 @@ define double @trig_preop_ninf_0() { ; CHECK-LABEL: @trig_preop_ninf_0( -; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double 0xFFF0000000000000, i32 0) -; CHECK-NEXT: ret double [[VAL]] +; CHECK-NEXT: ret double 0xB43DD63F5F2F8BD ; %val = call double @llvm.amdgcn.trig.preop.f64(double 0xFFF0000000000000, i32 0) ret double %val @@ -5577,8 +5571,7 @@ define double @trig_preop_constfold() { ; CHECK-LABEL: @trig_preop_constfold( -; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double 3.454350e+02, i32 5) -; CHECK-NEXT: ret double [[VAL]] +; CHECK-NEXT: ret double 0x2F42371D2126E970 ; %val = call double @llvm.amdgcn.trig.preop.f64(double 3.454350e+02, i32 5) ret double %val @@ -5595,8 +5588,7 @@ define double @trig_preop_constfold_0.0__0() { ; CHECK-LABEL: @trig_preop_constfold_0.0__0( -; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double 0.000000e+00, i32 0) -; CHECK-NEXT: ret double [[VAL]] +; CHECK-NEXT: ret double 0x3FE45F306DC9C882 ; %val = call double @llvm.amdgcn.trig.preop.f64(double 0.0, i32 0) ret double %val @@ -5604,8 +5596,7 @@ define double @trig_preop_constfold_0.0__1() { ; CHECK-LABEL: @trig_preop_constfold_0.0__1( -; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double 0.000000e+00, i32 1) -; CHECK-NEXT: ret double [[VAL]] +; CHECK-NEXT: ret double 0x3C94A7F09D5F47D4 ; %val = call double @llvm.amdgcn.trig.preop.f64(double 0.0, i32 1) ret double %val @@ -5613,8 +5604,7 @@ define double @trig_preop_constfold_0.0__neg1() { ; CHECK-LABEL: @trig_preop_constfold_0.0__neg1( -; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double 0.000000e+00, i32 -1) -; CHECK-NEXT: ret double [[VAL]] +; CHECK-NEXT: ret double 0.000000e+00 ; %val = call double @llvm.amdgcn.trig.preop.f64(double 0.0, i32 -1) ret double %val @@ -5622,8 +5612,7 @@ define double @trig_preop_constfold_0.0__9999999() { ; CHECK-LABEL: @trig_preop_constfold_0.0__9999999( -; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double 0.000000e+00, i32 9999999) -; CHECK-NEXT: ret double [[VAL]] +; CHECK-NEXT: ret double 0.000000e+00 ; %val = call double @llvm.amdgcn.trig.preop.f64(double 0.0, i32 9999999) ret double %val @@ -5631,8 +5620,7 @@ define double @trig_preop_constfold_0.0__neg999999() { ; CHECK-LABEL: @trig_preop_constfold_0.0__neg999999( -; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double 0.000000e+00, i32 -999999) -; CHECK-NEXT: ret double [[VAL]] +; CHECK-NEXT: ret double 0x7FF0000000000000 ; %val = call double @llvm.amdgcn.trig.preop.f64(double 0.0, i32 -999999) ret double %val @@ -5640,8 +5628,7 @@ define double @trig_preop_constfold_0x0020000000000000_0() { ; CHECK-LABEL: @trig_preop_constfold_0x0020000000000000_0( -; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double 0x10000000000000, i32 0) -; CHECK-NEXT: ret double [[VAL]] +; CHECK-NEXT: ret double 0x3FE45F306DC9C882 ; %val = call double @llvm.amdgcn.trig.preop.f64(double 0x0010000000000000, i32 0) ret double %val @@ -5649,8 +5636,7 @@ define double @trig_preop_constfold_0x001fffffffffffff_0() { ; CHECK-LABEL: @trig_preop_constfold_0x001fffffffffffff_0( -; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double 0xFFFFFFFFFFFFF, i32 0) -; CHECK-NEXT: ret double [[VAL]] +; CHECK-NEXT: ret double 0x3FE45F306DC9C882 ; %val = call double @llvm.amdgcn.trig.preop.f64(double 0x000fffffffffffff, i32 0) ret double %val @@ -5658,8 +5644,7 @@ define double @trig_preop_constfold_0x8020000000000000_0() { ; CHECK-LABEL: @trig_preop_constfold_0x8020000000000000_0( -; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double 0x8020000000000000, i32 0) -; CHECK-NEXT: ret double [[VAL]] +; CHECK-NEXT: ret double 0x3FE45F306DC9C882 ; %val = call double @llvm.amdgcn.trig.preop.f64(double 0x8020000000000000, i32 0) ret double %val @@ -5667,8 +5652,7 @@ define double @trig_preop_constfold_0x801fffffffffffff_0() { ; CHECK-LABEL: @trig_preop_constfold_0x801fffffffffffff_0( -; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double 0x801FFFFFFFFFFFFF, i32 0) -; CHECK-NEXT: ret double [[VAL]] +; CHECK-NEXT: ret double 0x3FE45F306DC9C882 ; %val = call double @llvm.amdgcn.trig.preop.f64(double 0x801fffffffffffff, i32 0) ret double %val