Index: llvm/include/llvm/Analysis/VectorUtils.h =================================================================== --- llvm/include/llvm/Analysis/VectorUtils.h +++ llvm/include/llvm/Analysis/VectorUtils.h @@ -315,10 +315,9 @@ /// Identifies if the vector form of the intrinsic has a scalar operand. bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx); -/// Identifies if the vector form of the intrinsic has a scalar operand that has +/// Identifies if the vector form of the intrinsic has a operand that has /// an overloaded type. -bool hasVectorInstrinsicOverloadedScalarOpd(Intrinsic::ID ID, - unsigned ScalarOpdIdx); +bool hasVectorInstrinsicOverloadedOpd(Intrinsic::ID ID, unsigned OpdIdx); /// Returns intrinsic ID for call. /// For the input call instruction it finds mapping intrinsic and returns Index: llvm/lib/Analysis/VectorUtils.cpp =================================================================== --- llvm/lib/Analysis/VectorUtils.cpp +++ llvm/lib/Analysis/VectorUtils.cpp @@ -89,6 +89,8 @@ case Intrinsic::fmuladd: case Intrinsic::powi: case Intrinsic::canonicalize: + case Intrinsic::fptosi_sat: + case Intrinsic::fptoui_sat: return true; default: return false; @@ -114,11 +116,13 @@ } } -bool llvm::hasVectorInstrinsicOverloadedScalarOpd(Intrinsic::ID ID, - unsigned ScalarOpdIdx) { +bool llvm::hasVectorInstrinsicOverloadedOpd(Intrinsic::ID ID, unsigned OpdIdx) { switch (ID) { + case Intrinsic::fptosi_sat: + case Intrinsic::fptoui_sat: + return OpdIdx == 0; case Intrinsic::powi: - return (ScalarOpdIdx == 1); + return OpdIdx == 1; default: return false; } Index: llvm/lib/Transforms/Scalar/Scalarizer.cpp =================================================================== --- llvm/lib/Transforms/Scalar/Scalarizer.cpp +++ llvm/lib/Transforms/Scalar/Scalarizer.cpp @@ -575,9 +575,11 @@ if (OpI->getType()->isVectorTy()) { Scattered[I] = scatter(&CI, OpI); assert(Scattered[I].size() == NumElems && "mismatched call operands"); + if (hasVectorInstrinsicOverloadedOpd(ID, I)) + Tys.push_back(OpI->getType()->getScalarType()); } else { ScalarOperands[I] = OpI; - if (hasVectorInstrinsicOverloadedScalarOpd(ID, I)) + if (hasVectorInstrinsicOverloadedOpd(ID, I)) Tys.push_back(OpI->getType()); } } Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4242,11 +4242,10 @@ Value *Arg; if (!UseVectorIntrinsic || !hasVectorInstrinsicScalarOpd(ID, I.index())) Arg = State.get(I.value(), Part); - else { + else Arg = State.get(I.value(), VPIteration(0, 0)); - if (hasVectorInstrinsicOverloadedScalarOpd(ID, I.index())) - TysForDecl.push_back(Arg->getType()); - } + if (hasVectorInstrinsicOverloadedOpd(ID, I.index())) + TysForDecl.push_back(Arg->getType()); Args.push_back(Arg); } Index: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -7459,7 +7459,7 @@ CallInst *CEI = cast(VL0); ScalarArg = CEI->getArgOperand(j); OpVecs.push_back(CEI->getArgOperand(j)); - if (hasVectorInstrinsicOverloadedScalarOpd(IID, j)) + if (hasVectorInstrinsicOverloadedOpd(IID, j)) TysForDecl.push_back(ScalarArg->getType()); continue; } @@ -7467,6 +7467,8 @@ Value *OpVec = vectorizeTree(E->getOperand(j)); LLVM_DEBUG(dbgs() << "SLP: OpVec[" << j << "]: " << *OpVec << "\n"); OpVecs.push_back(OpVec); + if (hasVectorInstrinsicOverloadedOpd(IID, j)) + TysForDecl.push_back(OpVec->getType()); } Function *CF; Index: llvm/test/Transforms/LoopVectorize/fpsat.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/fpsat.ll +++ llvm/test/Transforms/LoopVectorize/fpsat.ll @@ -8,21 +8,53 @@ ; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] ; CHECK: for.body.preheader: ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] +; CHECK: vector.memcheck: +; CHECK-NEXT: [[TMP0:%.*]] = shl nuw nsw i64 [[WIDE_TRIP_COUNT]], 2 +; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[Y:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[X:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[Y]], [[UGLYGEP1]] +; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[X]], [[UGLYGEP]] +; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP3]], align 4, !alias.scope !0 +; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 +; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[TMP6]], align 4, !alias.scope !3, !noalias !0 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void ; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X:%.*]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.fptosi.sat.i32.f32(float [[TMP0]]) -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[Y:%.*]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store i32 [[TMP1]], ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = tail call i32 @llvm.fptosi.sat.i32.f32(float [[TMP8]]) +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[INDVARS_IV]] +; CHECK-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX2]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; entry: %cmp6 = icmp sgt i32 %n, 0 @@ -54,21 +86,53 @@ ; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] ; CHECK: for.body.preheader: ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] +; CHECK: vector.memcheck: +; CHECK-NEXT: [[TMP0:%.*]] = shl nuw nsw i64 [[WIDE_TRIP_COUNT]], 2 +; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[Y:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[X:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[Y]], [[UGLYGEP1]] +; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[X]], [[UGLYGEP]] +; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP3]], align 4, !alias.scope !8 +; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 +; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[TMP6]], align 4, !alias.scope !11, !noalias !8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void ; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X:%.*]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.fptoui.sat.i32.f32(float [[TMP0]]) -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[Y:%.*]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store i32 [[TMP1]], ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = tail call i32 @llvm.fptoui.sat.i32.f32(float [[TMP8]]) +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[INDVARS_IV]] +; CHECK-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX2]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; entry: %cmp6 = icmp sgt i32 %n, 0 Index: llvm/test/Transforms/SLPVectorizer/AArch64/fpsat.ll =================================================================== --- llvm/test/Transforms/SLPVectorizer/AArch64/fpsat.ll +++ llvm/test/Transforms/SLPVectorizer/AArch64/fpsat.ll @@ -6,24 +6,9 @@ define void @signed(ptr %x, ptr %y, i32 %n) { ; CHECK-LABEL: @signed( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[L0:%.*]] = load float, ptr [[X:%.*]], align 4 -; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 1 -; CHECK-NEXT: [[L2:%.*]] = load float, ptr [[ARRAYIDX_1]], align 4 -; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 2 -; CHECK-NEXT: [[L4:%.*]] = load float, ptr [[ARRAYIDX_2]], align 4 -; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, ptr [[X]], i64 3 -; CHECK-NEXT: [[L6:%.*]] = load float, ptr [[ARRAYIDX_3]], align 4 -; CHECK-NEXT: [[L1:%.*]] = tail call i32 @llvm.fptosi.sat.i32.f32(float [[L0]]) -; CHECK-NEXT: [[L3:%.*]] = tail call i32 @llvm.fptosi.sat.i32.f32(float [[L2]]) -; CHECK-NEXT: [[L5:%.*]] = tail call i32 @llvm.fptosi.sat.i32.f32(float [[L4]]) -; CHECK-NEXT: [[L7:%.*]] = tail call i32 @llvm.fptosi.sat.i32.f32(float [[L6]]) -; CHECK-NEXT: store i32 [[L1]], ptr [[Y:%.*]], align 4 -; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 1 -; CHECK-NEXT: store i32 [[L3]], ptr [[ARRAYIDX2_1]], align 4 -; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 2 -; CHECK-NEXT: store i32 [[L5]], ptr [[ARRAYIDX2_2]], align 4 -; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 3 -; CHECK-NEXT: store i32 [[L7]], ptr [[ARRAYIDX2_3]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[X:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> [[TMP0]]) +; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[Y:%.*]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -51,24 +36,9 @@ define void @unsigned(ptr %x, ptr %y, i32 %n) { ; CHECK-LABEL: @unsigned( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[L0:%.*]] = load float, ptr [[X:%.*]], align 4 -; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 1 -; CHECK-NEXT: [[L2:%.*]] = load float, ptr [[ARRAYIDX_1]], align 4 -; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 2 -; CHECK-NEXT: [[L4:%.*]] = load float, ptr [[ARRAYIDX_2]], align 4 -; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, ptr [[X]], i64 3 -; CHECK-NEXT: [[L6:%.*]] = load float, ptr [[ARRAYIDX_3]], align 4 -; CHECK-NEXT: [[L1:%.*]] = tail call i32 @llvm.fptoui.sat.i32.f32(float [[L0]]) -; CHECK-NEXT: [[L3:%.*]] = tail call i32 @llvm.fptoui.sat.i32.f32(float [[L2]]) -; CHECK-NEXT: [[L5:%.*]] = tail call i32 @llvm.fptoui.sat.i32.f32(float [[L4]]) -; CHECK-NEXT: [[L7:%.*]] = tail call i32 @llvm.fptoui.sat.i32.f32(float [[L6]]) -; CHECK-NEXT: store i32 [[L1]], ptr [[Y:%.*]], align 4 -; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 1 -; CHECK-NEXT: store i32 [[L3]], ptr [[ARRAYIDX2_1]], align 4 -; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 2 -; CHECK-NEXT: store i32 [[L5]], ptr [[ARRAYIDX2_2]], align 4 -; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 3 -; CHECK-NEXT: store i32 [[L7]], ptr [[ARRAYIDX2_3]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[X:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> [[TMP0]]) +; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[Y:%.*]], align 4 ; CHECK-NEXT: ret void ; entry: Index: llvm/test/Transforms/Scalarizer/intrinsics.ll =================================================================== --- llvm/test/Transforms/Scalarizer/intrinsics.ll +++ llvm/test/Transforms/Scalarizer/intrinsics.ll @@ -24,6 +24,9 @@ declare <2 x i32> @llvm.smul.fix.sat.v2i32(<2 x i32>, <2 x i32>, i32) declare <2 x i32> @llvm.umul.fix.sat.v2i32(<2 x i32>, <2 x i32>, i32) +declare <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float>) +declare <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float>) + ; CHECK-LABEL: @scalarize_sqrt_v2f32( ; CHECK: %sqrt.i0 = call float @llvm.sqrt.f32(float %x.i0) @@ -134,3 +137,25 @@ %umulfixsat = call <2 x i32> @llvm.umul.fix.sat.v2i32(<2 x i32> %x, <2 x i32> , i32 31) ret <2 x i32> %umulfixsat } + +; CHECK-LABEL: @scalarize_fptosi_sat( +; CHECK: %sat.i0 = call i32 @llvm.fptosi.sat.i32.f32(float %x.i0) +; CHECK: %sat.i1 = call i32 @llvm.fptosi.sat.i32.f32(float %x.i1) +; CHECK: %sat.upto0 = insertelement <2 x i32> poison, i32 %sat.i0, i32 0 +; CHECK: %sat = insertelement <2 x i32> %sat.upto0, i32 %sat.i1, i32 1 +; CHECK: ret <2 x i32> %sat +define <2 x i32> @scalarize_fptosi_sat(<2 x float> %x) #0 { + %sat = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> %x) + ret <2 x i32> %sat +} + +; CHECK-LABEL: @scalarize_fptoui_sat( +; CHECK: %sat.i0 = call i32 @llvm.fptoui.sat.i32.f32(float %x.i0) +; CHECK: %sat.i1 = call i32 @llvm.fptoui.sat.i32.f32(float %x.i1) +; CHECK: %sat.upto0 = insertelement <2 x i32> poison, i32 %sat.i0, i32 0 +; CHECK: %sat = insertelement <2 x i32> %sat.upto0, i32 %sat.i1, i32 1 +; CHECK: ret <2 x i32> %sat +define <2 x i32> @scalarize_fptoui_sat(<2 x float> %x) #0 { + %sat = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> %x) + ret <2 x i32> %sat +}