Index: llvm/lib/Target/PowerPC/PPCCallingConv.td =================================================================== --- llvm/lib/Target/PowerPC/PPCCallingConv.td +++ llvm/lib/Target/PowerPC/PPCCallingConv.td @@ -277,13 +277,19 @@ F27, F28, F29, F30, F31 )>; def CSR_SPE : CalleeSavedRegs<(add S14, S15, S16, S17, S18, S19, S20, S21, S22, - S23, S24, S25, S26, S27, S28, S29, S30, S31 + S23, S24, S25, S26, S27, S28, S29, S30 )>; +def CSR_SPE_NO_S30_31 : CalleeSavedRegs<(add S14, S15, S16, S17, S18, S19, S20, S21, + S22, S23, S24, S25, S26, S27, S28, S29 + )>; + def CSR_SVR432_Altivec : CalleeSavedRegs<(add CSR_SVR432, CSR_Altivec)>; def CSR_SVR432_SPE : CalleeSavedRegs<(add CSR_SVR432_COMM, CSR_SPE)>; +def CSR_SVR432_SPE_NO_S30_31 : CalleeSavedRegs<(add CSR_SVR432_COMM, CSR_SPE_NO_S30_31)>; + def CSR_AIX32 : CalleeSavedRegs<(add R13, R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, F14, F15, F16, F17, F18, Index: llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -253,8 +253,11 @@ return CSR_SVR432_VSRP_SaveList; if (Subtarget.hasAltivec()) return CSR_SVR432_Altivec_SaveList; - else if (Subtarget.hasSPE()) + else if (Subtarget.hasSPE()) { + if (TM.isPositionIndependent() && !TM.isPPC64()) + return CSR_SVR432_SPE_NO_S30_31_SaveList; return CSR_SVR432_SPE_SaveList; + } return CSR_SVR432_SaveList; } @@ -313,8 +316,11 @@ ? CSR_SVR432_VSRP_RegMask : (Subtarget.hasAltivec() ? CSR_SVR432_Altivec_RegMask - : (Subtarget.hasSPE() ? CSR_SVR432_SPE_RegMask - : CSR_SVR432_RegMask)); + : (Subtarget.hasSPE() + ? (TM.isPositionIndependent() + ? CSR_SVR432_SPE_NO_S30_31_RegMask + : CSR_SVR432_SPE_RegMask) + : CSR_SVR432_RegMask)); } const uint32_t* Index: llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp =================================================================== --- llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -22,6 +22,7 @@ #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" @@ -365,10 +366,72 @@ return false; } +/// Fold smin(smax(fptosi(x), C1), C2) to llvm.fptosi.sat(x), providing C1 and +/// C2 saturate the value of the fp conversion. The transform is not reversable +/// as the fptosi.sat is more defined than the input - all values produce a +/// valid value for the fptosi.sat, where as some produce poison for original +/// that were out of range of the integer conversion. The reversed pattern may +/// use fmax and fmin instead. As we cannot directly reverse the transform, and +/// it is not always profitable, we make it conditional on the cost being +/// reported as lower by TTI. +static bool tryToFPToSat(Instruction &I, TargetTransformInfo &TTI) { + // Look for min(max(fptosi, converting to fptosi_sat. + Value *In; + const APInt *MinC, *MaxC; + if (!match(&I, m_SMax(m_OneUse(m_SMin(m_OneUse(m_FPToSI(m_Value(In))), + m_APInt(MinC))), + m_APInt(MaxC))) && + !match(&I, m_SMin(m_OneUse(m_SMax(m_OneUse(m_FPToSI(m_Value(In))), + m_APInt(MaxC))), + m_APInt(MinC)))) + return false; + + // Check that the constants clamp a saturate. + if (!(*MinC + 1).isPowerOf2() || -*MaxC != *MinC + 1) + return false; + + Type *IntTy = I.getType(); + Type *FpTy = In->getType(); + Type *SatTy = + IntegerType::get(IntTy->getContext(), (*MinC + 1).exactLogBase2() + 1); + if (auto *VecTy = dyn_cast(IntTy)) + SatTy = VectorType::get(SatTy, VecTy->getElementCount()); + + // Get the cost of the intrinsic, and check that against the cost of + // fptosi+smin+smax + InstructionCost SatCost = TTI.getIntrinsicInstrCost( + IntrinsicCostAttributes(Intrinsic::fptosi_sat, SatTy, {In}, {FpTy}), + TTI::TCK_RecipThroughput); + SatCost += TTI.getCastInstrCost(Instruction::SExt, SatTy, IntTy, + TTI::CastContextHint::None, + TTI::TCK_RecipThroughput); + + InstructionCost MinMaxCost = TTI.getCastInstrCost( + Instruction::FPToSI, IntTy, FpTy, TTI::CastContextHint::None, + TTI::TCK_RecipThroughput); + MinMaxCost += TTI.getIntrinsicInstrCost( + IntrinsicCostAttributes(Intrinsic::smin, IntTy, {IntTy}), + TTI::TCK_RecipThroughput); + MinMaxCost += TTI.getIntrinsicInstrCost( + IntrinsicCostAttributes(Intrinsic::smax, IntTy, {IntTy}), + TTI::TCK_RecipThroughput); + + if (SatCost >= MinMaxCost) + return false; + + IRBuilder<> Builder(&I); + Function *Fn = Intrinsic::getDeclaration(I.getModule(), Intrinsic::fptosi_sat, + {SatTy, FpTy}); + Value *Sat = Builder.CreateCall(Fn, In); + I.replaceAllUsesWith(Builder.CreateSExt(Sat, IntTy)); + return true; +} + /// This is the entry point for folds that could be implemented in regular /// InstCombine, but they are separated because they are not expected to /// occur frequently and/or have more than a constant-length pattern match. -static bool foldUnusualPatterns(Function &F, DominatorTree &DT) { +static bool foldUnusualPatterns(Function &F, DominatorTree &DT, + TargetTransformInfo &TTI) { bool MadeChange = false; for (BasicBlock &BB : F) { // Ignore unreachable basic blocks. @@ -384,6 +447,7 @@ MadeChange |= foldAnyOrAllBitsSet(I); MadeChange |= foldGuardedFunnelShift(I, DT); MadeChange |= tryToRecognizePopCount(I); + MadeChange |= tryToFPToSat(I, TTI); } } @@ -397,13 +461,13 @@ /// This is the entry point for all transforms. Pass manager differences are /// handled in the callers of this function. -static bool runImpl(Function &F, AssumptionCache &AC, TargetLibraryInfo &TLI, - DominatorTree &DT) { +static bool runImpl(Function &F, AssumptionCache &AC, TargetTransformInfo &TTI, + TargetLibraryInfo &TLI, DominatorTree &DT) { bool MadeChange = false; const DataLayout &DL = F.getParent()->getDataLayout(); TruncInstCombine TIC(AC, TLI, DL, DT); MadeChange |= TIC.run(F); - MadeChange |= foldUnusualPatterns(F, DT); + MadeChange |= foldUnusualPatterns(F, DT, TTI); return MadeChange; } @@ -413,6 +477,7 @@ AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); @@ -423,7 +488,8 @@ auto &AC = getAnalysis().getAssumptionCache(F); auto &TLI = getAnalysis().getTLI(F); auto &DT = getAnalysis().getDomTree(); - return runImpl(F, AC, TLI, DT); + auto &TTI = getAnalysis().getTTI(F); + return runImpl(F, AC, TTI, TLI, DT); } PreservedAnalyses AggressiveInstCombinePass::run(Function &F, @@ -431,7 +497,8 @@ auto &AC = AM.getResult(F); auto &TLI = AM.getResult(F); auto &DT = AM.getResult(F); - if (!runImpl(F, AC, TLI, DT)) { + auto &TTI = AM.getResult(F); + if (!runImpl(F, AC, TTI, TLI, DT)) { // No changes, all analyses are preserved. return PreservedAnalyses::all(); } @@ -448,6 +515,7 @@ INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(AggressiveInstCombinerLegacyPass, "aggressive-instcombine", "Combine pattern based expressions", false, false) Index: llvm/test/CodeGen/PowerPC/pr55857.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/pr55857.ll @@ -0,0 +1,18 @@ +; RUN: llc --relocation-model=pic \ +; RUN: -mtriple=ppc32 < %s | FileCheck %s + +@g = global i32 10, align 4 + +; Function Attrs: noinline nounwind optnone uwtable +define i32 @main() #0 { +; CHECK-LABEL: main: +; CHECK-NOT: evstdd +entry: + %retval = alloca i32, align 4 + store i32 0, ptr %retval, align 4 + %0 = load i32, ptr @g, align 4 + ret i32 %0 +} + +attributes #0 = { noinline nounwind optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="e500" "target-features"="+spe,-altivec,-bpermd,-crbits,-crypto,-direct-move,-extdiv,-htm,-isa-v206-instructions,-isa-v207-instructions,-isa-v30-instructions,-power8-vector,-power9-vector,-privileged,-quadword-atomics,-rop-protect,-vsx" } + Index: llvm/test/CodeGen/PowerPC/spe.ll =================================================================== --- llvm/test/CodeGen/PowerPC/spe.ll +++ llvm/test/CodeGen/PowerPC/spe.ll @@ -1639,14 +1639,13 @@ ; SPE: # %bb.0: # %entry ; SPE-NEXT: mflr 0 ; SPE-NEXT: stw 0, 4(1) -; SPE-NEXT: stwu 1, -272(1) +; SPE-NEXT: stwu 1, -288(1) ; SPE-NEXT: li 5, 256 -; SPE-NEXT: evstddx 30, 1, 5 # 8-byte Folded Spill -; SPE-NEXT: li 5, 264 -; SPE-NEXT: evstddx 31, 1, 5 # 8-byte Folded Spill -; SPE-NEXT: li 5, .LCPI55_0@l +; SPE-NEXT: evstddx 30, 1, 5 # 8-byte Folded Spill +; SPE-NEXT: li 5, .LCPI55_0@ ; SPE-NEXT: lis 6, .LCPI55_0@ha ; SPE-NEXT: evlddx 5, 6, 5 +; SPE-NEXT: stw 31, 284(1) # 4-byte Folded Spill ; SPE-NEXT: evstdd 14, 128(1) # 8-byte Folded Spill ; SPE-NEXT: evstdd 15, 136(1) # 8-byte Folded Spill ; SPE-NEXT: evstdd 16, 144(1) # 8-byte Folded Spill @@ -1664,7 +1663,7 @@ ; SPE-NEXT: evstdd 28, 240(1) # 8-byte Folded Spill ; SPE-NEXT: evstdd 29, 248(1) # 8-byte Folded Spill ; SPE-NEXT: evmergelo 3, 3, 4 -; SPE-NEXT: lwz 4, 280(1) +; SPE-NEXT: lwz 4, 296(1) ; SPE-NEXT: efdadd 3, 3, 3 ; SPE-NEXT: efdadd 3, 3, 5 ; SPE-NEXT: evstdd 3, 24(1) # 8-byte Folded Spill @@ -1686,13 +1685,11 @@ ; SPE-NEXT: li 6, 1 ; SPE-NEXT: bl test_memset ; SPE-NEXT: evldd 4, 24(1) # 8-byte Folded Reload -; SPE-NEXT: li 5, 264 -; SPE-NEXT: evmergehi 3, 4, 4 -; SPE-NEXT: evlddx 31, 1, 5 # 8-byte Folded Reload ; SPE-NEXT: li 5, 256 -; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: evmergehi 3, 4, 4 ; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 ; SPE-NEXT: evlddx 30, 1, 5 # 8-byte Folded Reload +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; SPE-NEXT: evldd 29, 248(1) # 8-byte Folded Reload ; SPE-NEXT: evldd 28, 240(1) # 8-byte Folded Reload ; SPE-NEXT: evldd 27, 232(1) # 8-byte Folded Reload @@ -1709,8 +1706,9 @@ ; SPE-NEXT: evldd 16, 144(1) # 8-byte Folded Reload ; SPE-NEXT: evldd 15, 136(1) # 8-byte Folded Reload ; SPE-NEXT: evldd 14, 128(1) # 8-byte Folded Reload -; SPE-NEXT: lwz 0, 276(1) -; SPE-NEXT: addi 1, 1, 272 +; SPE-NEXT: lwz 31, 284(1) # 4-byte Folded Reload +; SPE-NEXT: lwz 0, 292(1) +; SPE-NEXT: addi 1, 1, 288 ; SPE-NEXT: mtlr 0 ; SPE-NEXT: blr ; Index: llvm/test/Transforms/AggressiveInstCombine/AArch64/fptosisat.ll =================================================================== --- llvm/test/Transforms/AggressiveInstCombine/AArch64/fptosisat.ll +++ llvm/test/Transforms/AggressiveInstCombine/AArch64/fptosisat.ll @@ -1,13 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=aggressive-instcombine -mtriple aarch64-none-eabi -S | FileCheck %s -; RUN: opt < %s -passes=aggressive-instcombine -mtriple aarch64-none-eabi -mattr=+fullfp16 -S | FileCheck %s +; RUN: opt < %s -passes=aggressive-instcombine -mtriple aarch64-none-eabi -S | FileCheck %s --check-prefixes=CHECK,CHECK-FP +; RUN: opt < %s -passes=aggressive-instcombine -mtriple aarch64-none-eabi -mattr=+fullfp16 -S | FileCheck %s --check-prefixes=CHECK,CHECK-FP16 define i64 @f32_i32(float %in) { ; CHECK-LABEL: @f32_i32( -; CHECK-NEXT: [[CONV:%.*]] = fptosi float [[IN:%.*]] to i64 -; CHECK-NEXT: [[MIN:%.*]] = call i64 @llvm.smin.i64(i64 [[CONV]], i64 2147483647) -; CHECK-NEXT: [[MAX:%.*]] = call i64 @llvm.smax.i64(i64 [[MIN]], i64 -2147483648) -; CHECK-NEXT: ret i64 [[MAX]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.fptosi.sat.i32.f32(float [[IN:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 +; CHECK-NEXT: ret i64 [[TMP2]] ; %conv = fptosi float %in to i64 %min = call i64 @llvm.smin.i64(i64 %conv, i64 2147483647) @@ -56,10 +55,9 @@ define i64 @f64_i32(double %in) { ; CHECK-LABEL: @f64_i32( -; CHECK-NEXT: [[CONV:%.*]] = fptosi double [[IN:%.*]] to i64 -; CHECK-NEXT: [[MIN:%.*]] = call i64 @llvm.smin.i64(i64 [[CONV]], i64 2147483647) -; CHECK-NEXT: [[MAX:%.*]] = call i64 @llvm.smax.i64(i64 [[MIN]], i64 -2147483648) -; CHECK-NEXT: ret i64 [[MAX]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.fptosi.sat.i32.f64(double [[IN:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 +; CHECK-NEXT: ret i64 [[TMP2]] ; %conv = fptosi double %in to i64 %min = call i64 @llvm.smin.i64(i64 %conv, i64 2147483647) @@ -94,11 +92,16 @@ } define i64 @f16_i32(half %in) { -; CHECK-LABEL: @f16_i32( -; CHECK-NEXT: [[CONV:%.*]] = fptosi half [[IN:%.*]] to i64 -; CHECK-NEXT: [[MIN:%.*]] = call i64 @llvm.smin.i64(i64 [[CONV]], i64 2147483647) -; CHECK-NEXT: [[MAX:%.*]] = call i64 @llvm.smax.i64(i64 [[MIN]], i64 -2147483648) -; CHECK-NEXT: ret i64 [[MAX]] +; CHECK-FP-LABEL: @f16_i32( +; CHECK-FP-NEXT: [[CONV:%.*]] = fptosi half [[IN:%.*]] to i64 +; CHECK-FP-NEXT: [[MIN:%.*]] = call i64 @llvm.smin.i64(i64 [[CONV]], i64 2147483647) +; CHECK-FP-NEXT: [[MAX:%.*]] = call i64 @llvm.smax.i64(i64 [[MIN]], i64 -2147483648) +; CHECK-FP-NEXT: ret i64 [[MAX]] +; +; CHECK-FP16-LABEL: @f16_i32( +; CHECK-FP16-NEXT: [[TMP1:%.*]] = call i32 @llvm.fptosi.sat.i32.f16(half [[IN:%.*]]) +; CHECK-FP16-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 +; CHECK-FP16-NEXT: ret i64 [[TMP2]] ; %conv = fptosi half %in to i64 %min = call i64 @llvm.smin.i64(i64 %conv, i64 2147483647) @@ -147,10 +150,9 @@ define <2 x i64> @v2f32_i32(<2 x float> %in) { ; CHECK-LABEL: @v2f32_i32( -; CHECK-NEXT: [[CONV:%.*]] = fptosi <2 x float> [[IN:%.*]] to <2 x i64> -; CHECK-NEXT: [[MIN:%.*]] = call <2 x i64> @llvm.smin.v2i64(<2 x i64> [[CONV]], <2 x i64> ) -; CHECK-NEXT: [[MAX:%.*]] = call <2 x i64> @llvm.smax.v2i64(<2 x i64> [[MIN]], <2 x i64> ) -; CHECK-NEXT: ret <2 x i64> [[MAX]] +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> [[IN:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[TMP2]] ; %conv = fptosi <2 x float> %in to <2 x i64> %min = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %conv, <2 x i64> ) @@ -160,10 +162,9 @@ define <4 x i64> @v4f32_i32(<4 x float> %in) { ; CHECK-LABEL: @v4f32_i32( -; CHECK-NEXT: [[CONV:%.*]] = fptosi <4 x float> [[IN:%.*]] to <4 x i64> -; CHECK-NEXT: [[MIN:%.*]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[CONV]], <4 x i64> ) -; CHECK-NEXT: [[MAX:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[MIN]], <4 x i64> ) -; CHECK-NEXT: ret <4 x i64> [[MAX]] +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> [[IN:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i32> [[TMP1]] to <4 x i64> +; CHECK-NEXT: ret <4 x i64> [[TMP2]] ; %conv = fptosi <4 x float> %in to <4 x i64> %min = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> ) @@ -173,10 +174,9 @@ define <8 x i64> @v8f32_i32(<8 x float> %in) { ; CHECK-LABEL: @v8f32_i32( -; CHECK-NEXT: [[CONV:%.*]] = fptosi <8 x float> [[IN:%.*]] to <8 x i64> -; CHECK-NEXT: [[MIN:%.*]] = call <8 x i64> @llvm.smin.v8i64(<8 x i64> [[CONV]], <8 x i64> ) -; CHECK-NEXT: [[MAX:%.*]] = call <8 x i64> @llvm.smax.v8i64(<8 x i64> [[MIN]], <8 x i64> ) -; CHECK-NEXT: ret <8 x i64> [[MAX]] +; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> [[IN:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i32> [[TMP1]] to <8 x i64> +; CHECK-NEXT: ret <8 x i64> [[TMP2]] ; %conv = fptosi <8 x float> %in to <8 x i64> %min = call <8 x i64> @llvm.smin.v8i64(<8 x i64> %conv, <8 x i64> ) @@ -185,11 +185,16 @@ } define <4 x i32> @v4f16_i16(<4 x half> %in) { -; CHECK-LABEL: @v4f16_i16( -; CHECK-NEXT: [[CONV:%.*]] = fptosi <4 x half> [[IN:%.*]] to <4 x i32> -; CHECK-NEXT: [[MIN:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[CONV]], <4 x i32> ) -; CHECK-NEXT: [[MAX:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[MIN]], <4 x i32> ) -; CHECK-NEXT: ret <4 x i32> [[MAX]] +; CHECK-FP-LABEL: @v4f16_i16( +; CHECK-FP-NEXT: [[CONV:%.*]] = fptosi <4 x half> [[IN:%.*]] to <4 x i32> +; CHECK-FP-NEXT: [[MIN:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[CONV]], <4 x i32> ) +; CHECK-FP-NEXT: [[MAX:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[MIN]], <4 x i32> ) +; CHECK-FP-NEXT: ret <4 x i32> [[MAX]] +; +; CHECK-FP16-LABEL: @v4f16_i16( +; CHECK-FP16-NEXT: [[TMP1:%.*]] = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> [[IN:%.*]]) +; CHECK-FP16-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> +; CHECK-FP16-NEXT: ret <4 x i32> [[TMP2]] ; %conv = fptosi <4 x half> %in to <4 x i32> %min = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %conv, <4 x i32> ) @@ -198,11 +203,16 @@ } define <8 x i32> @v8f16_i16(<8 x half> %in) { -; CHECK-LABEL: @v8f16_i16( -; CHECK-NEXT: [[CONV:%.*]] = fptosi <8 x half> [[IN:%.*]] to <8 x i32> -; CHECK-NEXT: [[MIN:%.*]] = call <8 x i32> @llvm.smin.v8i32(<8 x i32> [[CONV]], <8 x i32> ) -; CHECK-NEXT: [[MAX:%.*]] = call <8 x i32> @llvm.smax.v8i32(<8 x i32> [[MIN]], <8 x i32> ) -; CHECK-NEXT: ret <8 x i32> [[MAX]] +; CHECK-FP-LABEL: @v8f16_i16( +; CHECK-FP-NEXT: [[CONV:%.*]] = fptosi <8 x half> [[IN:%.*]] to <8 x i32> +; CHECK-FP-NEXT: [[MIN:%.*]] = call <8 x i32> @llvm.smin.v8i32(<8 x i32> [[CONV]], <8 x i32> ) +; CHECK-FP-NEXT: [[MAX:%.*]] = call <8 x i32> @llvm.smax.v8i32(<8 x i32> [[MIN]], <8 x i32> ) +; CHECK-FP-NEXT: ret <8 x i32> [[MAX]] +; +; CHECK-FP16-LABEL: @v8f16_i16( +; CHECK-FP16-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> [[IN:%.*]]) +; CHECK-FP16-NEXT: [[TMP2:%.*]] = sext <8 x i16> [[TMP1]] to <8 x i32> +; CHECK-FP16-NEXT: ret <8 x i32> [[TMP2]] ; %conv = fptosi <8 x half> %in to <8 x i32> %min = call <8 x i32> @llvm.smin.v8i32(<8 x i32> %conv, <8 x i32> ) @@ -256,10 +266,9 @@ define i64 @f32_i32_maxmin(float %in) { ; CHECK-LABEL: @f32_i32_maxmin( -; CHECK-NEXT: [[CONV:%.*]] = fptosi float [[IN:%.*]] to i64 -; CHECK-NEXT: [[MAX:%.*]] = call i64 @llvm.smax.i64(i64 [[CONV]], i64 -2147483648) -; CHECK-NEXT: [[MIN:%.*]] = call i64 @llvm.smin.i64(i64 [[MAX]], i64 2147483647) -; CHECK-NEXT: ret i64 [[MIN]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.fptosi.sat.i32.f32(float [[IN:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 +; CHECK-NEXT: ret i64 [[TMP2]] ; %conv = fptosi float %in to i64 %max = call i64 @llvm.smax.i64(i64 %conv, i64 -2147483648) Index: llvm/test/Transforms/AggressiveInstCombine/ARM/fptosisat.ll =================================================================== --- llvm/test/Transforms/AggressiveInstCombine/ARM/fptosisat.ll +++ llvm/test/Transforms/AggressiveInstCombine/ARM/fptosisat.ll @@ -1,14 +1,24 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=aggressive-instcombine -mtriple thumbv8.1m.main-none-eabi -S | FileCheck %s -; RUN: opt < %s -passes=aggressive-instcombine -mtriple thumbv8.1m.main-none-eabi -mattr=+mve.fp -S | FileCheck %s -; RUN: opt < %s -passes=aggressive-instcombine -mtriple thumbv8.1m.main-none-eabi -mattr=+mve.fp,+fp64 -S | FileCheck %s +; RUN: opt < %s -passes=aggressive-instcombine -mtriple thumbv8.1m.main-none-eabi -S | FileCheck %s --check-prefixes=CHECK,CHECK-BASE +; RUN: opt < %s -passes=aggressive-instcombine -mtriple thumbv8.1m.main-none-eabi -mattr=+mve.fp -S | FileCheck %s --check-prefixes=CHECK,CHECK-MVEFP +; RUN: opt < %s -passes=aggressive-instcombine -mtriple thumbv8.1m.main-none-eabi -mattr=+mve.fp,+fp64 -S | FileCheck %s --check-prefixes=CHECK,CHECK-FP64 define i64 @f32_i32(float %in) { -; CHECK-LABEL: @f32_i32( -; CHECK-NEXT: [[CONV:%.*]] = fptosi float [[IN:%.*]] to i64 -; CHECK-NEXT: [[MIN:%.*]] = call i64 @llvm.smin.i64(i64 [[CONV]], i64 2147483647) -; CHECK-NEXT: [[MAX:%.*]] = call i64 @llvm.smax.i64(i64 [[MIN]], i64 -2147483648) -; CHECK-NEXT: ret i64 [[MAX]] +; CHECK-BASE-LABEL: @f32_i32( +; CHECK-BASE-NEXT: [[CONV:%.*]] = fptosi float [[IN:%.*]] to i64 +; CHECK-BASE-NEXT: [[MIN:%.*]] = call i64 @llvm.smin.i64(i64 [[CONV]], i64 2147483647) +; CHECK-BASE-NEXT: [[MAX:%.*]] = call i64 @llvm.smax.i64(i64 [[MIN]], i64 -2147483648) +; CHECK-BASE-NEXT: ret i64 [[MAX]] +; +; CHECK-MVEFP-LABEL: @f32_i32( +; CHECK-MVEFP-NEXT: [[TMP1:%.*]] = call i32 @llvm.fptosi.sat.i32.f32(float [[IN:%.*]]) +; CHECK-MVEFP-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 +; CHECK-MVEFP-NEXT: ret i64 [[TMP2]] +; +; CHECK-FP64-LABEL: @f32_i32( +; CHECK-FP64-NEXT: [[TMP1:%.*]] = call i32 @llvm.fptosi.sat.i32.f32(float [[IN:%.*]]) +; CHECK-FP64-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 +; CHECK-FP64-NEXT: ret i64 [[TMP2]] ; %conv = fptosi float %in to i64 %min = call i64 @llvm.smin.i64(i64 %conv, i64 2147483647) @@ -17,11 +27,21 @@ } define i64 @f32_i31(float %in) { -; CHECK-LABEL: @f32_i31( -; CHECK-NEXT: [[CONV:%.*]] = fptosi float [[IN:%.*]] to i64 -; CHECK-NEXT: [[MIN:%.*]] = call i64 @llvm.smin.i64(i64 [[CONV]], i64 1073741823) -; CHECK-NEXT: [[MAX:%.*]] = call i64 @llvm.smax.i64(i64 [[MIN]], i64 -1073741824) -; CHECK-NEXT: ret i64 [[MAX]] +; CHECK-BASE-LABEL: @f32_i31( +; CHECK-BASE-NEXT: [[CONV:%.*]] = fptosi float [[IN:%.*]] to i64 +; CHECK-BASE-NEXT: [[MIN:%.*]] = call i64 @llvm.smin.i64(i64 [[CONV]], i64 1073741823) +; CHECK-BASE-NEXT: [[MAX:%.*]] = call i64 @llvm.smax.i64(i64 [[MIN]], i64 -1073741824) +; CHECK-BASE-NEXT: ret i64 [[MAX]] +; +; CHECK-MVEFP-LABEL: @f32_i31( +; CHECK-MVEFP-NEXT: [[TMP1:%.*]] = call i31 @llvm.fptosi.sat.i31.f32(float [[IN:%.*]]) +; CHECK-MVEFP-NEXT: [[TMP2:%.*]] = sext i31 [[TMP1]] to i64 +; CHECK-MVEFP-NEXT: ret i64 [[TMP2]] +; +; CHECK-FP64-LABEL: @f32_i31( +; CHECK-FP64-NEXT: [[TMP1:%.*]] = call i31 @llvm.fptosi.sat.i31.f32(float [[IN:%.*]]) +; CHECK-FP64-NEXT: [[TMP2:%.*]] = sext i31 [[TMP1]] to i64 +; CHECK-FP64-NEXT: ret i64 [[TMP2]] ; %conv = fptosi float %in to i64 %min = call i64 @llvm.smin.i64(i64 %conv, i64 1073741823) @@ -56,11 +76,22 @@ } define i64 @f64_i32(double %in) { -; CHECK-LABEL: @f64_i32( -; CHECK-NEXT: [[CONV:%.*]] = fptosi double [[IN:%.*]] to i64 -; CHECK-NEXT: [[MIN:%.*]] = call i64 @llvm.smin.i64(i64 [[CONV]], i64 2147483647) -; CHECK-NEXT: [[MAX:%.*]] = call i64 @llvm.smax.i64(i64 [[MIN]], i64 -2147483648) -; CHECK-NEXT: ret i64 [[MAX]] +; CHECK-BASE-LABEL: @f64_i32( +; CHECK-BASE-NEXT: [[CONV:%.*]] = fptosi double [[IN:%.*]] to i64 +; CHECK-BASE-NEXT: [[MIN:%.*]] = call i64 @llvm.smin.i64(i64 [[CONV]], i64 2147483647) +; CHECK-BASE-NEXT: [[MAX:%.*]] = call i64 @llvm.smax.i64(i64 [[MIN]], i64 -2147483648) +; CHECK-BASE-NEXT: ret i64 [[MAX]] +; +; CHECK-MVEFP-LABEL: @f64_i32( +; CHECK-MVEFP-NEXT: [[CONV:%.*]] = fptosi double [[IN:%.*]] to i64 +; CHECK-MVEFP-NEXT: [[MIN:%.*]] = call i64 @llvm.smin.i64(i64 [[CONV]], i64 2147483647) +; CHECK-MVEFP-NEXT: [[MAX:%.*]] = call i64 @llvm.smax.i64(i64 [[MIN]], i64 -2147483648) +; CHECK-MVEFP-NEXT: ret i64 [[MAX]] +; +; CHECK-FP64-LABEL: @f64_i32( +; CHECK-FP64-NEXT: [[TMP1:%.*]] = call i32 @llvm.fptosi.sat.i32.f64(double [[IN:%.*]]) +; CHECK-FP64-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 +; CHECK-FP64-NEXT: ret i64 [[TMP2]] ; %conv = fptosi double %in to i64 %min = call i64 @llvm.smin.i64(i64 %conv, i64 2147483647) @@ -95,11 +126,21 @@ } define i64 @f16_i32(half %in) { -; CHECK-LABEL: @f16_i32( -; CHECK-NEXT: [[CONV:%.*]] = fptosi half [[IN:%.*]] to i64 -; CHECK-NEXT: [[MIN:%.*]] = call i64 @llvm.smin.i64(i64 [[CONV]], i64 2147483647) -; CHECK-NEXT: [[MAX:%.*]] = call i64 @llvm.smax.i64(i64 [[MIN]], i64 -2147483648) -; CHECK-NEXT: ret i64 [[MAX]] +; CHECK-BASE-LABEL: @f16_i32( +; CHECK-BASE-NEXT: [[CONV:%.*]] = fptosi half [[IN:%.*]] to i64 +; CHECK-BASE-NEXT: [[MIN:%.*]] = call i64 @llvm.smin.i64(i64 [[CONV]], i64 2147483647) +; CHECK-BASE-NEXT: [[MAX:%.*]] = call i64 @llvm.smax.i64(i64 [[MIN]], i64 -2147483648) +; CHECK-BASE-NEXT: ret i64 [[MAX]] +; +; CHECK-MVEFP-LABEL: @f16_i32( +; CHECK-MVEFP-NEXT: [[TMP1:%.*]] = call i32 @llvm.fptosi.sat.i32.f16(half [[IN:%.*]]) +; CHECK-MVEFP-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 +; CHECK-MVEFP-NEXT: ret i64 [[TMP2]] +; +; CHECK-FP64-LABEL: @f16_i32( +; CHECK-FP64-NEXT: [[TMP1:%.*]] = call i32 @llvm.fptosi.sat.i32.f16(half [[IN:%.*]]) +; CHECK-FP64-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 +; CHECK-FP64-NEXT: ret i64 [[TMP2]] ; %conv = fptosi half %in to i64 %min = call i64 @llvm.smin.i64(i64 %conv, i64 2147483647) @@ -108,11 +149,21 @@ } define i64 @f16_i31(half %in) { -; CHECK-LABEL: @f16_i31( -; CHECK-NEXT: [[CONV:%.*]] = fptosi half [[IN:%.*]] to i64 -; CHECK-NEXT: [[MIN:%.*]] = call i64 @llvm.smin.i64(i64 [[CONV]], i64 1073741823) -; CHECK-NEXT: [[MAX:%.*]] = call i64 @llvm.smax.i64(i64 [[MIN]], i64 -1073741824) -; CHECK-NEXT: ret i64 [[MAX]] +; CHECK-BASE-LABEL: @f16_i31( +; CHECK-BASE-NEXT: [[CONV:%.*]] = fptosi half [[IN:%.*]] to i64 +; CHECK-BASE-NEXT: [[MIN:%.*]] = call i64 @llvm.smin.i64(i64 [[CONV]], i64 1073741823) +; CHECK-BASE-NEXT: [[MAX:%.*]] = call i64 @llvm.smax.i64(i64 [[MIN]], i64 -1073741824) +; CHECK-BASE-NEXT: ret i64 [[MAX]] +; +; CHECK-MVEFP-LABEL: @f16_i31( +; CHECK-MVEFP-NEXT: [[TMP1:%.*]] = call i31 @llvm.fptosi.sat.i31.f16(half [[IN:%.*]]) +; CHECK-MVEFP-NEXT: [[TMP2:%.*]] = sext i31 [[TMP1]] to i64 +; CHECK-MVEFP-NEXT: ret i64 [[TMP2]] +; +; CHECK-FP64-LABEL: @f16_i31( +; CHECK-FP64-NEXT: [[TMP1:%.*]] = call i31 @llvm.fptosi.sat.i31.f16(half [[IN:%.*]]) +; CHECK-FP64-NEXT: [[TMP2:%.*]] = sext i31 [[TMP1]] to i64 +; CHECK-FP64-NEXT: ret i64 [[TMP2]] ; %conv = fptosi half %in to i64 %min = call i64 @llvm.smin.i64(i64 %conv, i64 1073741823) @@ -147,11 +198,21 @@ } define <2 x i64> @v2f32_i32(<2 x float> %in) { -; CHECK-LABEL: @v2f32_i32( -; CHECK-NEXT: [[CONV:%.*]] = fptosi <2 x float> [[IN:%.*]] to <2 x i64> -; CHECK-NEXT: [[MIN:%.*]] = call <2 x i64> @llvm.smin.v2i64(<2 x i64> [[CONV]], <2 x i64> ) -; CHECK-NEXT: [[MAX:%.*]] = call <2 x i64> @llvm.smax.v2i64(<2 x i64> [[MIN]], <2 x i64> ) -; CHECK-NEXT: ret <2 x i64> [[MAX]] +; CHECK-BASE-LABEL: @v2f32_i32( +; CHECK-BASE-NEXT: [[CONV:%.*]] = fptosi <2 x float> [[IN:%.*]] to <2 x i64> +; CHECK-BASE-NEXT: [[MIN:%.*]] = call <2 x i64> @llvm.smin.v2i64(<2 x i64> [[CONV]], <2 x i64> ) +; CHECK-BASE-NEXT: [[MAX:%.*]] = call <2 x i64> @llvm.smax.v2i64(<2 x i64> [[MIN]], <2 x i64> ) +; CHECK-BASE-NEXT: ret <2 x i64> [[MAX]] +; +; CHECK-MVEFP-LABEL: @v2f32_i32( +; CHECK-MVEFP-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> [[IN:%.*]]) +; CHECK-MVEFP-NEXT: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> +; CHECK-MVEFP-NEXT: ret <2 x i64> [[TMP2]] +; +; CHECK-FP64-LABEL: @v2f32_i32( +; CHECK-FP64-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> [[IN:%.*]]) +; CHECK-FP64-NEXT: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> +; CHECK-FP64-NEXT: ret <2 x i64> [[TMP2]] ; %conv = fptosi <2 x float> %in to <2 x i64> %min = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %conv, <2 x i64> ) @@ -160,11 +221,21 @@ } define <4 x i64> @v4f32_i32(<4 x float> %in) { -; CHECK-LABEL: @v4f32_i32( -; CHECK-NEXT: [[CONV:%.*]] = fptosi <4 x float> [[IN:%.*]] to <4 x i64> -; CHECK-NEXT: [[MIN:%.*]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[CONV]], <4 x i64> ) -; CHECK-NEXT: [[MAX:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[MIN]], <4 x i64> ) -; CHECK-NEXT: ret <4 x i64> [[MAX]] +; CHECK-BASE-LABEL: @v4f32_i32( +; CHECK-BASE-NEXT: [[CONV:%.*]] = fptosi <4 x float> [[IN:%.*]] to <4 x i64> +; CHECK-BASE-NEXT: [[MIN:%.*]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[CONV]], <4 x i64> ) +; CHECK-BASE-NEXT: [[MAX:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[MIN]], <4 x i64> ) +; CHECK-BASE-NEXT: ret <4 x i64> [[MAX]] +; +; CHECK-MVEFP-LABEL: @v4f32_i32( +; CHECK-MVEFP-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> [[IN:%.*]]) +; CHECK-MVEFP-NEXT: [[TMP2:%.*]] = sext <4 x i32> [[TMP1]] to <4 x i64> +; CHECK-MVEFP-NEXT: ret <4 x i64> [[TMP2]] +; +; CHECK-FP64-LABEL: @v4f32_i32( +; CHECK-FP64-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> [[IN:%.*]]) +; CHECK-FP64-NEXT: [[TMP2:%.*]] = sext <4 x i32> [[TMP1]] to <4 x i64> +; CHECK-FP64-NEXT: ret <4 x i64> [[TMP2]] ; %conv = fptosi <4 x float> %in to <4 x i64> %min = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> ) @@ -173,11 +244,21 @@ } define <8 x i64> @v8f32_i32(<8 x float> %in) { -; CHECK-LABEL: @v8f32_i32( -; CHECK-NEXT: [[CONV:%.*]] = fptosi <8 x float> [[IN:%.*]] to <8 x i64> -; CHECK-NEXT: [[MIN:%.*]] = call <8 x i64> @llvm.smin.v8i64(<8 x i64> [[CONV]], <8 x i64> ) -; CHECK-NEXT: [[MAX:%.*]] = call <8 x i64> @llvm.smax.v8i64(<8 x i64> [[MIN]], <8 x i64> ) -; CHECK-NEXT: ret <8 x i64> [[MAX]] +; CHECK-BASE-LABEL: @v8f32_i32( +; CHECK-BASE-NEXT: [[CONV:%.*]] = fptosi <8 x float> [[IN:%.*]] to <8 x i64> +; CHECK-BASE-NEXT: [[MIN:%.*]] = call <8 x i64> @llvm.smin.v8i64(<8 x i64> [[CONV]], <8 x i64> ) +; CHECK-BASE-NEXT: [[MAX:%.*]] = call <8 x i64> @llvm.smax.v8i64(<8 x i64> [[MIN]], <8 x i64> ) +; CHECK-BASE-NEXT: ret <8 x i64> [[MAX]] +; +; CHECK-MVEFP-LABEL: @v8f32_i32( +; CHECK-MVEFP-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> [[IN:%.*]]) +; CHECK-MVEFP-NEXT: [[TMP2:%.*]] = sext <8 x i32> [[TMP1]] to <8 x i64> +; CHECK-MVEFP-NEXT: ret <8 x i64> [[TMP2]] +; +; CHECK-FP64-LABEL: @v8f32_i32( +; CHECK-FP64-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> [[IN:%.*]]) +; CHECK-FP64-NEXT: [[TMP2:%.*]] = sext <8 x i32> [[TMP1]] to <8 x i64> +; CHECK-FP64-NEXT: ret <8 x i64> [[TMP2]] ; %conv = fptosi <8 x float> %in to <8 x i64> %min = call <8 x i64> @llvm.smin.v8i64(<8 x i64> %conv, <8 x i64> ) @@ -186,11 +267,21 @@ } define <4 x i32> @v4f16_i16(<4 x half> %in) { -; CHECK-LABEL: @v4f16_i16( -; CHECK-NEXT: [[CONV:%.*]] = fptosi <4 x half> [[IN:%.*]] to <4 x i32> -; CHECK-NEXT: [[MIN:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[CONV]], <4 x i32> ) -; CHECK-NEXT: [[MAX:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[MIN]], <4 x i32> ) -; CHECK-NEXT: ret <4 x i32> [[MAX]] +; CHECK-BASE-LABEL: @v4f16_i16( +; CHECK-BASE-NEXT: [[CONV:%.*]] = fptosi <4 x half> [[IN:%.*]] to <4 x i32> +; CHECK-BASE-NEXT: [[MIN:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[CONV]], <4 x i32> ) +; CHECK-BASE-NEXT: [[MAX:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[MIN]], <4 x i32> ) +; CHECK-BASE-NEXT: ret <4 x i32> [[MAX]] +; +; CHECK-MVEFP-LABEL: @v4f16_i16( +; CHECK-MVEFP-NEXT: [[TMP1:%.*]] = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> [[IN:%.*]]) +; CHECK-MVEFP-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> +; CHECK-MVEFP-NEXT: ret <4 x i32> [[TMP2]] +; +; CHECK-FP64-LABEL: @v4f16_i16( +; CHECK-FP64-NEXT: [[TMP1:%.*]] = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> [[IN:%.*]]) +; CHECK-FP64-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> +; CHECK-FP64-NEXT: ret <4 x i32> [[TMP2]] ; %conv = fptosi <4 x half> %in to <4 x i32> %min = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %conv, <4 x i32> ) @@ -199,11 +290,21 @@ } define <8 x i32> @v8f16_i16(<8 x half> %in) { -; CHECK-LABEL: @v8f16_i16( -; CHECK-NEXT: [[CONV:%.*]] = fptosi <8 x half> [[IN:%.*]] to <8 x i32> -; CHECK-NEXT: [[MIN:%.*]] = call <8 x i32> @llvm.smin.v8i32(<8 x i32> [[CONV]], <8 x i32> ) -; CHECK-NEXT: [[MAX:%.*]] = call <8 x i32> @llvm.smax.v8i32(<8 x i32> [[MIN]], <8 x i32> ) -; CHECK-NEXT: ret <8 x i32> [[MAX]] +; CHECK-BASE-LABEL: @v8f16_i16( +; CHECK-BASE-NEXT: [[CONV:%.*]] = fptosi <8 x half> [[IN:%.*]] to <8 x i32> +; CHECK-BASE-NEXT: [[MIN:%.*]] = call <8 x i32> @llvm.smin.v8i32(<8 x i32> [[CONV]], <8 x i32> ) +; CHECK-BASE-NEXT: [[MAX:%.*]] = call <8 x i32> @llvm.smax.v8i32(<8 x i32> [[MIN]], <8 x i32> ) +; CHECK-BASE-NEXT: ret <8 x i32> [[MAX]] +; +; CHECK-MVEFP-LABEL: @v8f16_i16( +; CHECK-MVEFP-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> [[IN:%.*]]) +; CHECK-MVEFP-NEXT: [[TMP2:%.*]] = sext <8 x i16> [[TMP1]] to <8 x i32> +; CHECK-MVEFP-NEXT: ret <8 x i32> [[TMP2]] +; +; CHECK-FP64-LABEL: @v8f16_i16( +; CHECK-FP64-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> [[IN:%.*]]) +; CHECK-FP64-NEXT: [[TMP2:%.*]] = sext <8 x i16> [[TMP1]] to <8 x i32> +; CHECK-FP64-NEXT: ret <8 x i32> [[TMP2]] ; %conv = fptosi <8 x half> %in to <8 x i32> %min = call <8 x i32> @llvm.smin.v8i32(<8 x i32> %conv, <8 x i32> )