diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -22,6 +22,7 @@ #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" @@ -365,10 +366,72 @@ return false; } +/// Fold smin(smax(fptosi(x), C1), C2) to llvm.fptosi.sat(x), providing C1 and +/// C2 saturate the value of the fp conversion. The transform is not reversable +/// as the fptosi.sat is more defined than the input - all values produce a +/// valid value for the fptosi.sat, where as some produce poison for original +/// that were out of range of the integer conversion. The reversed pattern may +/// use fmax and fmin instead. As we cannot directly reverse the transform, and +/// it is not always profitable, we make it conditional on the cost being +/// reported as lower by TTI. +static bool tryToFPToSat(Instruction &I, TargetTransformInfo &TTI) { + // Look for min(max(fptosi, converting to fptosi_sat. + Value *In; + const APInt *MinC, *MaxC; + if (!match(&I, m_SMax(m_OneUse(m_SMin(m_OneUse(m_FPToSI(m_Value(In))), + m_APInt(MinC))), + m_APInt(MaxC))) && + !match(&I, m_SMin(m_OneUse(m_SMax(m_OneUse(m_FPToSI(m_Value(In))), + m_APInt(MaxC))), + m_APInt(MinC)))) + return false; + + // Check that the constants clamp a saturate. + if (!(*MinC + 1).isPowerOf2() || -*MaxC != *MinC + 1) + return false; + + Type *IntTy = I.getType(); + Type *FpTy = In->getType(); + Type *SatTy = + IntegerType::get(IntTy->getContext(), (*MinC + 1).exactLogBase2() + 1); + if (auto *VecTy = dyn_cast(IntTy)) + SatTy = VectorType::get(SatTy, VecTy->getElementCount()); + + // Get the cost of the intrinsic, and check that against the cost of + // fptosi+smin+smax + InstructionCost SatCost = TTI.getIntrinsicInstrCost( + IntrinsicCostAttributes(Intrinsic::fptosi_sat, SatTy, {In}, {FpTy}), + TTI::TCK_RecipThroughput); + SatCost += TTI.getCastInstrCost(Instruction::SExt, SatTy, IntTy, + TTI::CastContextHint::None, + TTI::TCK_RecipThroughput); + + InstructionCost MinMaxCost = TTI.getCastInstrCost( + Instruction::FPToSI, IntTy, FpTy, TTI::CastContextHint::None, + TTI::TCK_RecipThroughput); + MinMaxCost += TTI.getIntrinsicInstrCost( + IntrinsicCostAttributes(Intrinsic::smin, IntTy, {IntTy}), + TTI::TCK_RecipThroughput); + MinMaxCost += TTI.getIntrinsicInstrCost( + IntrinsicCostAttributes(Intrinsic::smax, IntTy, {IntTy}), + TTI::TCK_RecipThroughput); + + if (SatCost >= MinMaxCost) + return false; + + IRBuilder<> Builder(&I); + Function *Fn = Intrinsic::getDeclaration(I.getModule(), Intrinsic::fptosi_sat, + {SatTy, FpTy}); + Value *Sat = Builder.CreateCall(Fn, In); + I.replaceAllUsesWith(Builder.CreateSExt(Sat, IntTy)); + return true; +} + /// This is the entry point for folds that could be implemented in regular /// InstCombine, but they are separated because they are not expected to /// occur frequently and/or have more than a constant-length pattern match. -static bool foldUnusualPatterns(Function &F, DominatorTree &DT) { +static bool foldUnusualPatterns(Function &F, DominatorTree &DT, + TargetTransformInfo &TTI) { bool MadeChange = false; for (BasicBlock &BB : F) { // Ignore unreachable basic blocks. @@ -384,6 +447,7 @@ MadeChange |= foldAnyOrAllBitsSet(I); MadeChange |= foldGuardedFunnelShift(I, DT); MadeChange |= tryToRecognizePopCount(I); + MadeChange |= tryToFPToSat(I, TTI); } } @@ -397,13 +461,13 @@ /// This is the entry point for all transforms. Pass manager differences are /// handled in the callers of this function. -static bool runImpl(Function &F, AssumptionCache &AC, TargetLibraryInfo &TLI, - DominatorTree &DT) { +static bool runImpl(Function &F, AssumptionCache &AC, TargetTransformInfo &TTI, + TargetLibraryInfo &TLI, DominatorTree &DT) { bool MadeChange = false; const DataLayout &DL = F.getParent()->getDataLayout(); TruncInstCombine TIC(AC, TLI, DL, DT); MadeChange |= TIC.run(F); - MadeChange |= foldUnusualPatterns(F, DT); + MadeChange |= foldUnusualPatterns(F, DT, TTI); return MadeChange; } @@ -413,6 +477,7 @@ AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); @@ -423,7 +488,8 @@ auto &AC = getAnalysis().getAssumptionCache(F); auto &TLI = getAnalysis().getTLI(F); auto &DT = getAnalysis().getDomTree(); - return runImpl(F, AC, TLI, DT); + auto &TTI = getAnalysis().getTTI(F); + return runImpl(F, AC, TTI, TLI, DT); } PreservedAnalyses AggressiveInstCombinePass::run(Function &F, @@ -431,7 +497,8 @@ auto &AC = AM.getResult(F); auto &TLI = AM.getResult(F); auto &DT = AM.getResult(F); - if (!runImpl(F, AC, TLI, DT)) { + auto &TTI = AM.getResult(F); + if (!runImpl(F, AC, TTI, TLI, DT)) { // No changes, all analyses are preserved. return PreservedAnalyses::all(); } @@ -448,6 +515,7 @@ INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(AggressiveInstCombinerLegacyPass, "aggressive-instcombine", "Combine pattern based expressions", false, false) diff --git a/llvm/test/Transforms/AggressiveInstCombine/AArch64/fptosisat.ll b/llvm/test/Transforms/AggressiveInstCombine/AArch64/fptosisat.ll --- a/llvm/test/Transforms/AggressiveInstCombine/AArch64/fptosisat.ll +++ b/llvm/test/Transforms/AggressiveInstCombine/AArch64/fptosisat.ll @@ -1,13 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=aggressive-instcombine -mtriple aarch64-none-eabi -S | FileCheck %s -; RUN: opt < %s -passes=aggressive-instcombine -mtriple aarch64-none-eabi -mattr=+fullfp16 -S | FileCheck %s +; RUN: opt < %s -passes=aggressive-instcombine -mtriple aarch64-none-eabi -S | FileCheck %s --check-prefixes=CHECK,CHECK-FP +; RUN: opt < %s -passes=aggressive-instcombine -mtriple aarch64-none-eabi -mattr=+fullfp16 -S | FileCheck %s --check-prefixes=CHECK,CHECK-FP16 define i64 @f32_i32(float %in) { ; CHECK-LABEL: @f32_i32( -; CHECK-NEXT: [[CONV:%.*]] = fptosi float [[IN:%.*]] to i64 -; CHECK-NEXT: [[MIN:%.*]] = call i64 @llvm.smin.i64(i64 [[CONV]], i64 2147483647) -; CHECK-NEXT: [[MAX:%.*]] = call i64 @llvm.smax.i64(i64 [[MIN]], i64 -2147483648) -; CHECK-NEXT: ret i64 [[MAX]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.fptosi.sat.i32.f32(float [[IN:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 +; CHECK-NEXT: ret i64 [[TMP2]] ; %conv = fptosi float %in to i64 %min = call i64 @llvm.smin.i64(i64 %conv, i64 2147483647) @@ -56,10 +55,9 @@ define i64 @f64_i32(double %in) { ; CHECK-LABEL: @f64_i32( -; CHECK-NEXT: [[CONV:%.*]] = fptosi double [[IN:%.*]] to i64 -; CHECK-NEXT: [[MIN:%.*]] = call i64 @llvm.smin.i64(i64 [[CONV]], i64 2147483647) -; CHECK-NEXT: [[MAX:%.*]] = call i64 @llvm.smax.i64(i64 [[MIN]], i64 -2147483648) -; CHECK-NEXT: ret i64 [[MAX]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.fptosi.sat.i32.f64(double [[IN:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 +; CHECK-NEXT: ret i64 [[TMP2]] ; %conv = fptosi double %in to i64 %min = call i64 @llvm.smin.i64(i64 %conv, i64 2147483647) @@ -94,11 +92,16 @@ } define i64 @f16_i32(half %in) { -; CHECK-LABEL: @f16_i32( -; CHECK-NEXT: [[CONV:%.*]] = fptosi half [[IN:%.*]] to i64 -; CHECK-NEXT: [[MIN:%.*]] = call i64 @llvm.smin.i64(i64 [[CONV]], i64 2147483647) -; CHECK-NEXT: [[MAX:%.*]] = call i64 @llvm.smax.i64(i64 [[MIN]], i64 -2147483648) -; CHECK-NEXT: ret i64 [[MAX]] +; CHECK-FP-LABEL: @f16_i32( +; CHECK-FP-NEXT: [[CONV:%.*]] = fptosi half [[IN:%.*]] to i64 +; CHECK-FP-NEXT: [[MIN:%.*]] = call i64 @llvm.smin.i64(i64 [[CONV]], i64 2147483647) +; CHECK-FP-NEXT: [[MAX:%.*]] = call i64 @llvm.smax.i64(i64 [[MIN]], i64 -2147483648) +; CHECK-FP-NEXT: ret i64 [[MAX]] +; +; CHECK-FP16-LABEL: @f16_i32( +; CHECK-FP16-NEXT: [[TMP1:%.*]] = call i32 @llvm.fptosi.sat.i32.f16(half [[IN:%.*]]) +; CHECK-FP16-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 +; CHECK-FP16-NEXT: ret i64 [[TMP2]] ; %conv = fptosi half %in to i64 %min = call i64 @llvm.smin.i64(i64 %conv, i64 2147483647) @@ -147,10 +150,9 @@ define <2 x i64> @v2f32_i32(<2 x float> %in) { ; CHECK-LABEL: @v2f32_i32( -; CHECK-NEXT: [[CONV:%.*]] = fptosi <2 x float> [[IN:%.*]] to <2 x i64> -; CHECK-NEXT: [[MIN:%.*]] = call <2 x i64> @llvm.smin.v2i64(<2 x i64> [[CONV]], <2 x i64> ) -; CHECK-NEXT: [[MAX:%.*]] = call <2 x i64> @llvm.smax.v2i64(<2 x i64> [[MIN]], <2 x i64> ) -; CHECK-NEXT: ret <2 x i64> [[MAX]] +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> [[IN:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[TMP2]] ; %conv = fptosi <2 x float> %in to <2 x i64> %min = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %conv, <2 x i64> ) @@ -160,10 +162,9 @@ define <4 x i64> @v4f32_i32(<4 x float> %in) { ; CHECK-LABEL: @v4f32_i32( -; CHECK-NEXT: [[CONV:%.*]] = fptosi <4 x float> [[IN:%.*]] to <4 x i64> -; CHECK-NEXT: [[MIN:%.*]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[CONV]], <4 x i64> ) -; CHECK-NEXT: [[MAX:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[MIN]], <4 x i64> ) -; CHECK-NEXT: ret <4 x i64> [[MAX]] +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> [[IN:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i32> [[TMP1]] to <4 x i64> +; CHECK-NEXT: ret <4 x i64> [[TMP2]] ; %conv = fptosi <4 x float> %in to <4 x i64> %min = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> ) @@ -173,10 +174,9 @@ define <8 x i64> @v8f32_i32(<8 x float> %in) { ; CHECK-LABEL: @v8f32_i32( -; CHECK-NEXT: [[CONV:%.*]] = fptosi <8 x float> [[IN:%.*]] to <8 x i64> -; CHECK-NEXT: [[MIN:%.*]] = call <8 x i64> @llvm.smin.v8i64(<8 x i64> [[CONV]], <8 x i64> ) -; CHECK-NEXT: [[MAX:%.*]] = call <8 x i64> @llvm.smax.v8i64(<8 x i64> [[MIN]], <8 x i64> ) -; CHECK-NEXT: ret <8 x i64> [[MAX]] +; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> [[IN:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i32> [[TMP1]] to <8 x i64> +; CHECK-NEXT: ret <8 x i64> [[TMP2]] ; %conv = fptosi <8 x float> %in to <8 x i64> %min = call <8 x i64> @llvm.smin.v8i64(<8 x i64> %conv, <8 x i64> ) @@ -185,11 +185,16 @@ } define <4 x i32> @v4f16_i16(<4 x half> %in) { -; CHECK-LABEL: @v4f16_i16( -; CHECK-NEXT: [[CONV:%.*]] = fptosi <4 x half> [[IN:%.*]] to <4 x i32> -; CHECK-NEXT: [[MIN:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[CONV]], <4 x i32> ) -; CHECK-NEXT: [[MAX:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[MIN]], <4 x i32> ) -; CHECK-NEXT: ret <4 x i32> [[MAX]] +; CHECK-FP-LABEL: @v4f16_i16( +; CHECK-FP-NEXT: [[CONV:%.*]] = fptosi <4 x half> [[IN:%.*]] to <4 x i32> +; CHECK-FP-NEXT: [[MIN:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[CONV]], <4 x i32> ) +; CHECK-FP-NEXT: [[MAX:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[MIN]], <4 x i32> ) +; CHECK-FP-NEXT: ret <4 x i32> [[MAX]] +; +; CHECK-FP16-LABEL: @v4f16_i16( +; CHECK-FP16-NEXT: [[TMP1:%.*]] = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> [[IN:%.*]]) +; CHECK-FP16-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> +; CHECK-FP16-NEXT: ret <4 x i32> [[TMP2]] ; %conv = fptosi <4 x half> %in to <4 x i32> %min = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %conv, <4 x i32> ) @@ -198,11 +203,16 @@ } define <8 x i32> @v8f16_i16(<8 x half> %in) { -; CHECK-LABEL: @v8f16_i16( -; CHECK-NEXT: [[CONV:%.*]] = fptosi <8 x half> [[IN:%.*]] to <8 x i32> -; CHECK-NEXT: [[MIN:%.*]] = call <8 x i32> @llvm.smin.v8i32(<8 x i32> [[CONV]], <8 x i32> ) -; CHECK-NEXT: [[MAX:%.*]] = call <8 x i32> @llvm.smax.v8i32(<8 x i32> [[MIN]], <8 x i32> ) -; CHECK-NEXT: ret <8 x i32> [[MAX]] +; CHECK-FP-LABEL: @v8f16_i16( +; CHECK-FP-NEXT: [[CONV:%.*]] = fptosi <8 x half> [[IN:%.*]] to <8 x i32> +; CHECK-FP-NEXT: [[MIN:%.*]] = call <8 x i32> @llvm.smin.v8i32(<8 x i32> [[CONV]], <8 x i32> ) +; CHECK-FP-NEXT: [[MAX:%.*]] = call <8 x i32> @llvm.smax.v8i32(<8 x i32> [[MIN]], <8 x i32> ) +; CHECK-FP-NEXT: ret <8 x i32> [[MAX]] +; +; CHECK-FP16-LABEL: @v8f16_i16( +; CHECK-FP16-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> [[IN:%.*]]) +; CHECK-FP16-NEXT: [[TMP2:%.*]] = sext <8 x i16> [[TMP1]] to <8 x i32> +; CHECK-FP16-NEXT: ret <8 x i32> [[TMP2]] ; %conv = fptosi <8 x half> %in to <8 x i32> %min = call <8 x i32> @llvm.smin.v8i32(<8 x i32> %conv, <8 x i32> ) @@ -256,10 +266,9 @@ define i64 @f32_i32_maxmin(float %in) { ; CHECK-LABEL: @f32_i32_maxmin( -; CHECK-NEXT: [[CONV:%.*]] = fptosi float [[IN:%.*]] to i64 -; CHECK-NEXT: [[MAX:%.*]] = call i64 @llvm.smax.i64(i64 [[CONV]], i64 -2147483648) -; CHECK-NEXT: [[MIN:%.*]] = call i64 @llvm.smin.i64(i64 [[MAX]], i64 2147483647) -; CHECK-NEXT: ret i64 [[MIN]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.fptosi.sat.i32.f32(float [[IN:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 +; CHECK-NEXT: ret i64 [[TMP2]] ; %conv = fptosi float %in to i64 %max = call i64 @llvm.smax.i64(i64 %conv, i64 -2147483648) diff --git a/llvm/test/Transforms/AggressiveInstCombine/ARM/fptosisat.ll b/llvm/test/Transforms/AggressiveInstCombine/ARM/fptosisat.ll --- a/llvm/test/Transforms/AggressiveInstCombine/ARM/fptosisat.ll +++ b/llvm/test/Transforms/AggressiveInstCombine/ARM/fptosisat.ll @@ -1,14 +1,24 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=aggressive-instcombine -mtriple thumbv8.1m.main-none-eabi -S | FileCheck %s -; RUN: opt < %s -passes=aggressive-instcombine -mtriple thumbv8.1m.main-none-eabi -mattr=+mve.fp -S | FileCheck %s -; RUN: opt < %s -passes=aggressive-instcombine -mtriple thumbv8.1m.main-none-eabi -mattr=+mve.fp,+fp64 -S | FileCheck %s +; RUN: opt < %s -passes=aggressive-instcombine -mtriple thumbv8.1m.main-none-eabi -S | FileCheck %s --check-prefixes=CHECK,CHECK-BASE +; RUN: opt < %s -passes=aggressive-instcombine -mtriple thumbv8.1m.main-none-eabi -mattr=+mve.fp -S | FileCheck %s --check-prefixes=CHECK,CHECK-MVEFP +; RUN: opt < %s -passes=aggressive-instcombine -mtriple thumbv8.1m.main-none-eabi -mattr=+mve.fp,+fp64 -S | FileCheck %s --check-prefixes=CHECK,CHECK-FP64 define i64 @f32_i32(float %in) { -; CHECK-LABEL: @f32_i32( -; CHECK-NEXT: [[CONV:%.*]] = fptosi float [[IN:%.*]] to i64 -; CHECK-NEXT: [[MIN:%.*]] = call i64 @llvm.smin.i64(i64 [[CONV]], i64 2147483647) -; CHECK-NEXT: [[MAX:%.*]] = call i64 @llvm.smax.i64(i64 [[MIN]], i64 -2147483648) -; CHECK-NEXT: ret i64 [[MAX]] +; CHECK-BASE-LABEL: @f32_i32( +; CHECK-BASE-NEXT: [[CONV:%.*]] = fptosi float [[IN:%.*]] to i64 +; CHECK-BASE-NEXT: [[MIN:%.*]] = call i64 @llvm.smin.i64(i64 [[CONV]], i64 2147483647) +; CHECK-BASE-NEXT: [[MAX:%.*]] = call i64 @llvm.smax.i64(i64 [[MIN]], i64 -2147483648) +; CHECK-BASE-NEXT: ret i64 [[MAX]] +; +; CHECK-MVEFP-LABEL: @f32_i32( +; CHECK-MVEFP-NEXT: [[TMP1:%.*]] = call i32 @llvm.fptosi.sat.i32.f32(float [[IN:%.*]]) +; CHECK-MVEFP-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 +; CHECK-MVEFP-NEXT: ret i64 [[TMP2]] +; +; CHECK-FP64-LABEL: @f32_i32( +; CHECK-FP64-NEXT: [[TMP1:%.*]] = call i32 @llvm.fptosi.sat.i32.f32(float [[IN:%.*]]) +; CHECK-FP64-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 +; CHECK-FP64-NEXT: ret i64 [[TMP2]] ; %conv = fptosi float %in to i64 %min = call i64 @llvm.smin.i64(i64 %conv, i64 2147483647) @@ -17,11 +27,21 @@ } define i64 @f32_i31(float %in) { -; CHECK-LABEL: @f32_i31( -; CHECK-NEXT: [[CONV:%.*]] = fptosi float [[IN:%.*]] to i64 -; CHECK-NEXT: [[MIN:%.*]] = call i64 @llvm.smin.i64(i64 [[CONV]], i64 1073741823) -; CHECK-NEXT: [[MAX:%.*]] = call i64 @llvm.smax.i64(i64 [[MIN]], i64 -1073741824) -; CHECK-NEXT: ret i64 [[MAX]] +; CHECK-BASE-LABEL: @f32_i31( +; CHECK-BASE-NEXT: [[CONV:%.*]] = fptosi float [[IN:%.*]] to i64 +; CHECK-BASE-NEXT: [[MIN:%.*]] = call i64 @llvm.smin.i64(i64 [[CONV]], i64 1073741823) +; CHECK-BASE-NEXT: [[MAX:%.*]] = call i64 @llvm.smax.i64(i64 [[MIN]], i64 -1073741824) +; CHECK-BASE-NEXT: ret i64 [[MAX]] +; +; CHECK-MVEFP-LABEL: @f32_i31( +; CHECK-MVEFP-NEXT: [[TMP1:%.*]] = call i31 @llvm.fptosi.sat.i31.f32(float [[IN:%.*]]) +; CHECK-MVEFP-NEXT: [[TMP2:%.*]] = sext i31 [[TMP1]] to i64 +; CHECK-MVEFP-NEXT: ret i64 [[TMP2]] +; +; CHECK-FP64-LABEL: @f32_i31( +; CHECK-FP64-NEXT: [[TMP1:%.*]] = call i31 @llvm.fptosi.sat.i31.f32(float [[IN:%.*]]) +; CHECK-FP64-NEXT: [[TMP2:%.*]] = sext i31 [[TMP1]] to i64 +; CHECK-FP64-NEXT: ret i64 [[TMP2]] ; %conv = fptosi float %in to i64 %min = call i64 @llvm.smin.i64(i64 %conv, i64 1073741823) @@ -56,11 +76,22 @@ } define i64 @f64_i32(double %in) { -; CHECK-LABEL: @f64_i32( -; CHECK-NEXT: [[CONV:%.*]] = fptosi double [[IN:%.*]] to i64 -; CHECK-NEXT: [[MIN:%.*]] = call i64 @llvm.smin.i64(i64 [[CONV]], i64 2147483647) -; CHECK-NEXT: [[MAX:%.*]] = call i64 @llvm.smax.i64(i64 [[MIN]], i64 -2147483648) -; CHECK-NEXT: ret i64 [[MAX]] +; CHECK-BASE-LABEL: @f64_i32( +; CHECK-BASE-NEXT: [[CONV:%.*]] = fptosi double [[IN:%.*]] to i64 +; CHECK-BASE-NEXT: [[MIN:%.*]] = call i64 @llvm.smin.i64(i64 [[CONV]], i64 2147483647) +; CHECK-BASE-NEXT: [[MAX:%.*]] = call i64 @llvm.smax.i64(i64 [[MIN]], i64 -2147483648) +; CHECK-BASE-NEXT: ret i64 [[MAX]] +; +; CHECK-MVEFP-LABEL: @f64_i32( +; CHECK-MVEFP-NEXT: [[CONV:%.*]] = fptosi double [[IN:%.*]] to i64 +; CHECK-MVEFP-NEXT: [[MIN:%.*]] = call i64 @llvm.smin.i64(i64 [[CONV]], i64 2147483647) +; CHECK-MVEFP-NEXT: [[MAX:%.*]] = call i64 @llvm.smax.i64(i64 [[MIN]], i64 -2147483648) +; CHECK-MVEFP-NEXT: ret i64 [[MAX]] +; +; CHECK-FP64-LABEL: @f64_i32( +; CHECK-FP64-NEXT: [[TMP1:%.*]] = call i32 @llvm.fptosi.sat.i32.f64(double [[IN:%.*]]) +; CHECK-FP64-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 +; CHECK-FP64-NEXT: ret i64 [[TMP2]] ; %conv = fptosi double %in to i64 %min = call i64 @llvm.smin.i64(i64 %conv, i64 2147483647) @@ -95,11 +126,21 @@ } define i64 @f16_i32(half %in) { -; CHECK-LABEL: @f16_i32( -; CHECK-NEXT: [[CONV:%.*]] = fptosi half [[IN:%.*]] to i64 -; CHECK-NEXT: [[MIN:%.*]] = call i64 @llvm.smin.i64(i64 [[CONV]], i64 2147483647) -; CHECK-NEXT: [[MAX:%.*]] = call i64 @llvm.smax.i64(i64 [[MIN]], i64 -2147483648) -; CHECK-NEXT: ret i64 [[MAX]] +; CHECK-BASE-LABEL: @f16_i32( +; CHECK-BASE-NEXT: [[CONV:%.*]] = fptosi half [[IN:%.*]] to i64 +; CHECK-BASE-NEXT: [[MIN:%.*]] = call i64 @llvm.smin.i64(i64 [[CONV]], i64 2147483647) +; CHECK-BASE-NEXT: [[MAX:%.*]] = call i64 @llvm.smax.i64(i64 [[MIN]], i64 -2147483648) +; CHECK-BASE-NEXT: ret i64 [[MAX]] +; +; CHECK-MVEFP-LABEL: @f16_i32( +; CHECK-MVEFP-NEXT: [[TMP1:%.*]] = call i32 @llvm.fptosi.sat.i32.f16(half [[IN:%.*]]) +; CHECK-MVEFP-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 +; CHECK-MVEFP-NEXT: ret i64 [[TMP2]] +; +; CHECK-FP64-LABEL: @f16_i32( +; CHECK-FP64-NEXT: [[TMP1:%.*]] = call i32 @llvm.fptosi.sat.i32.f16(half [[IN:%.*]]) +; CHECK-FP64-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 +; CHECK-FP64-NEXT: ret i64 [[TMP2]] ; %conv = fptosi half %in to i64 %min = call i64 @llvm.smin.i64(i64 %conv, i64 2147483647) @@ -108,11 +149,21 @@ } define i64 @f16_i31(half %in) { -; CHECK-LABEL: @f16_i31( -; CHECK-NEXT: [[CONV:%.*]] = fptosi half [[IN:%.*]] to i64 -; CHECK-NEXT: [[MIN:%.*]] = call i64 @llvm.smin.i64(i64 [[CONV]], i64 1073741823) -; CHECK-NEXT: [[MAX:%.*]] = call i64 @llvm.smax.i64(i64 [[MIN]], i64 -1073741824) -; CHECK-NEXT: ret i64 [[MAX]] +; CHECK-BASE-LABEL: @f16_i31( +; CHECK-BASE-NEXT: [[CONV:%.*]] = fptosi half [[IN:%.*]] to i64 +; CHECK-BASE-NEXT: [[MIN:%.*]] = call i64 @llvm.smin.i64(i64 [[CONV]], i64 1073741823) +; CHECK-BASE-NEXT: [[MAX:%.*]] = call i64 @llvm.smax.i64(i64 [[MIN]], i64 -1073741824) +; CHECK-BASE-NEXT: ret i64 [[MAX]] +; +; CHECK-MVEFP-LABEL: @f16_i31( +; CHECK-MVEFP-NEXT: [[TMP1:%.*]] = call i31 @llvm.fptosi.sat.i31.f16(half [[IN:%.*]]) +; CHECK-MVEFP-NEXT: [[TMP2:%.*]] = sext i31 [[TMP1]] to i64 +; CHECK-MVEFP-NEXT: ret i64 [[TMP2]] +; +; CHECK-FP64-LABEL: @f16_i31( +; CHECK-FP64-NEXT: [[TMP1:%.*]] = call i31 @llvm.fptosi.sat.i31.f16(half [[IN:%.*]]) +; CHECK-FP64-NEXT: [[TMP2:%.*]] = sext i31 [[TMP1]] to i64 +; CHECK-FP64-NEXT: ret i64 [[TMP2]] ; %conv = fptosi half %in to i64 %min = call i64 @llvm.smin.i64(i64 %conv, i64 1073741823) @@ -147,11 +198,21 @@ } define <2 x i64> @v2f32_i32(<2 x float> %in) { -; CHECK-LABEL: @v2f32_i32( -; CHECK-NEXT: [[CONV:%.*]] = fptosi <2 x float> [[IN:%.*]] to <2 x i64> -; CHECK-NEXT: [[MIN:%.*]] = call <2 x i64> @llvm.smin.v2i64(<2 x i64> [[CONV]], <2 x i64> ) -; CHECK-NEXT: [[MAX:%.*]] = call <2 x i64> @llvm.smax.v2i64(<2 x i64> [[MIN]], <2 x i64> ) -; CHECK-NEXT: ret <2 x i64> [[MAX]] +; CHECK-BASE-LABEL: @v2f32_i32( +; CHECK-BASE-NEXT: [[CONV:%.*]] = fptosi <2 x float> [[IN:%.*]] to <2 x i64> +; CHECK-BASE-NEXT: [[MIN:%.*]] = call <2 x i64> @llvm.smin.v2i64(<2 x i64> [[CONV]], <2 x i64> ) +; CHECK-BASE-NEXT: [[MAX:%.*]] = call <2 x i64> @llvm.smax.v2i64(<2 x i64> [[MIN]], <2 x i64> ) +; CHECK-BASE-NEXT: ret <2 x i64> [[MAX]] +; +; CHECK-MVEFP-LABEL: @v2f32_i32( +; CHECK-MVEFP-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> [[IN:%.*]]) +; CHECK-MVEFP-NEXT: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> +; CHECK-MVEFP-NEXT: ret <2 x i64> [[TMP2]] +; +; CHECK-FP64-LABEL: @v2f32_i32( +; CHECK-FP64-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> [[IN:%.*]]) +; CHECK-FP64-NEXT: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> +; CHECK-FP64-NEXT: ret <2 x i64> [[TMP2]] ; %conv = fptosi <2 x float> %in to <2 x i64> %min = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %conv, <2 x i64> ) @@ -160,11 +221,21 @@ } define <4 x i64> @v4f32_i32(<4 x float> %in) { -; CHECK-LABEL: @v4f32_i32( -; CHECK-NEXT: [[CONV:%.*]] = fptosi <4 x float> [[IN:%.*]] to <4 x i64> -; CHECK-NEXT: [[MIN:%.*]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[CONV]], <4 x i64> ) -; CHECK-NEXT: [[MAX:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[MIN]], <4 x i64> ) -; CHECK-NEXT: ret <4 x i64> [[MAX]] +; CHECK-BASE-LABEL: @v4f32_i32( +; CHECK-BASE-NEXT: [[CONV:%.*]] = fptosi <4 x float> [[IN:%.*]] to <4 x i64> +; CHECK-BASE-NEXT: [[MIN:%.*]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[CONV]], <4 x i64> ) +; CHECK-BASE-NEXT: [[MAX:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[MIN]], <4 x i64> ) +; CHECK-BASE-NEXT: ret <4 x i64> [[MAX]] +; +; CHECK-MVEFP-LABEL: @v4f32_i32( +; CHECK-MVEFP-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> [[IN:%.*]]) +; CHECK-MVEFP-NEXT: [[TMP2:%.*]] = sext <4 x i32> [[TMP1]] to <4 x i64> +; CHECK-MVEFP-NEXT: ret <4 x i64> [[TMP2]] +; +; CHECK-FP64-LABEL: @v4f32_i32( +; CHECK-FP64-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> [[IN:%.*]]) +; CHECK-FP64-NEXT: [[TMP2:%.*]] = sext <4 x i32> [[TMP1]] to <4 x i64> +; CHECK-FP64-NEXT: ret <4 x i64> [[TMP2]] ; %conv = fptosi <4 x float> %in to <4 x i64> %min = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> ) @@ -173,11 +244,21 @@ } define <8 x i64> @v8f32_i32(<8 x float> %in) { -; CHECK-LABEL: @v8f32_i32( -; CHECK-NEXT: [[CONV:%.*]] = fptosi <8 x float> [[IN:%.*]] to <8 x i64> -; CHECK-NEXT: [[MIN:%.*]] = call <8 x i64> @llvm.smin.v8i64(<8 x i64> [[CONV]], <8 x i64> ) -; CHECK-NEXT: [[MAX:%.*]] = call <8 x i64> @llvm.smax.v8i64(<8 x i64> [[MIN]], <8 x i64> ) -; CHECK-NEXT: ret <8 x i64> [[MAX]] +; CHECK-BASE-LABEL: @v8f32_i32( +; CHECK-BASE-NEXT: [[CONV:%.*]] = fptosi <8 x float> [[IN:%.*]] to <8 x i64> +; CHECK-BASE-NEXT: [[MIN:%.*]] = call <8 x i64> @llvm.smin.v8i64(<8 x i64> [[CONV]], <8 x i64> ) +; CHECK-BASE-NEXT: [[MAX:%.*]] = call <8 x i64> @llvm.smax.v8i64(<8 x i64> [[MIN]], <8 x i64> ) +; CHECK-BASE-NEXT: ret <8 x i64> [[MAX]] +; +; CHECK-MVEFP-LABEL: @v8f32_i32( +; CHECK-MVEFP-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> [[IN:%.*]]) +; CHECK-MVEFP-NEXT: [[TMP2:%.*]] = sext <8 x i32> [[TMP1]] to <8 x i64> +; CHECK-MVEFP-NEXT: ret <8 x i64> [[TMP2]] +; +; CHECK-FP64-LABEL: @v8f32_i32( +; CHECK-FP64-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> [[IN:%.*]]) +; CHECK-FP64-NEXT: [[TMP2:%.*]] = sext <8 x i32> [[TMP1]] to <8 x i64> +; CHECK-FP64-NEXT: ret <8 x i64> [[TMP2]] ; %conv = fptosi <8 x float> %in to <8 x i64> %min = call <8 x i64> @llvm.smin.v8i64(<8 x i64> %conv, <8 x i64> ) @@ -186,11 +267,21 @@ } define <4 x i32> @v4f16_i16(<4 x half> %in) { -; CHECK-LABEL: @v4f16_i16( -; CHECK-NEXT: [[CONV:%.*]] = fptosi <4 x half> [[IN:%.*]] to <4 x i32> -; CHECK-NEXT: [[MIN:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[CONV]], <4 x i32> ) -; CHECK-NEXT: [[MAX:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[MIN]], <4 x i32> ) -; CHECK-NEXT: ret <4 x i32> [[MAX]] +; CHECK-BASE-LABEL: @v4f16_i16( +; CHECK-BASE-NEXT: [[CONV:%.*]] = fptosi <4 x half> [[IN:%.*]] to <4 x i32> +; CHECK-BASE-NEXT: [[MIN:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[CONV]], <4 x i32> ) +; CHECK-BASE-NEXT: [[MAX:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[MIN]], <4 x i32> ) +; CHECK-BASE-NEXT: ret <4 x i32> [[MAX]] +; +; CHECK-MVEFP-LABEL: @v4f16_i16( +; CHECK-MVEFP-NEXT: [[TMP1:%.*]] = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> [[IN:%.*]]) +; CHECK-MVEFP-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> +; CHECK-MVEFP-NEXT: ret <4 x i32> [[TMP2]] +; +; CHECK-FP64-LABEL: @v4f16_i16( +; CHECK-FP64-NEXT: [[TMP1:%.*]] = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> [[IN:%.*]]) +; CHECK-FP64-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> +; CHECK-FP64-NEXT: ret <4 x i32> [[TMP2]] ; %conv = fptosi <4 x half> %in to <4 x i32> %min = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %conv, <4 x i32> ) @@ -199,11 +290,21 @@ } define <8 x i32> @v8f16_i16(<8 x half> %in) { -; CHECK-LABEL: @v8f16_i16( -; CHECK-NEXT: [[CONV:%.*]] = fptosi <8 x half> [[IN:%.*]] to <8 x i32> -; CHECK-NEXT: [[MIN:%.*]] = call <8 x i32> @llvm.smin.v8i32(<8 x i32> [[CONV]], <8 x i32> ) -; CHECK-NEXT: [[MAX:%.*]] = call <8 x i32> @llvm.smax.v8i32(<8 x i32> [[MIN]], <8 x i32> ) -; CHECK-NEXT: ret <8 x i32> [[MAX]] +; CHECK-BASE-LABEL: @v8f16_i16( +; CHECK-BASE-NEXT: [[CONV:%.*]] = fptosi <8 x half> [[IN:%.*]] to <8 x i32> +; CHECK-BASE-NEXT: [[MIN:%.*]] = call <8 x i32> @llvm.smin.v8i32(<8 x i32> [[CONV]], <8 x i32> ) +; CHECK-BASE-NEXT: [[MAX:%.*]] = call <8 x i32> @llvm.smax.v8i32(<8 x i32> [[MIN]], <8 x i32> ) +; CHECK-BASE-NEXT: ret <8 x i32> [[MAX]] +; +; CHECK-MVEFP-LABEL: @v8f16_i16( +; CHECK-MVEFP-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> [[IN:%.*]]) +; CHECK-MVEFP-NEXT: [[TMP2:%.*]] = sext <8 x i16> [[TMP1]] to <8 x i32> +; CHECK-MVEFP-NEXT: ret <8 x i32> [[TMP2]] +; +; CHECK-FP64-LABEL: @v8f16_i16( +; CHECK-FP64-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> [[IN:%.*]]) +; CHECK-FP64-NEXT: [[TMP2:%.*]] = sext <8 x i16> [[TMP1]] to <8 x i32> +; CHECK-FP64-NEXT: ret <8 x i32> [[TMP2]] ; %conv = fptosi <8 x half> %in to <8 x i32> %min = call <8 x i32> @llvm.smin.v8i32(<8 x i32> %conv, <8 x i32> )