diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3549,6 +3549,55 @@ int ScalarCost = VecTy->getNumElements() * ScalarEltCost; int VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy, CostKind, VL0); + // Check if the select can be folded into a min/max intrinsic and check if + // that is a cheaper alternative. Lowering to the actual intrinsics/target + // instructions is not done by SLPVectorizer and needs to be done by the + // backend. + // TODO: Support floating point min/max. + bool AllCmpSingleUse = true; + SelectPatternResult SelectPattern; + SelectPattern.Flavor = SPF_UNKNOWN; + if (all_of(VL, [&SelectPattern, &AllCmpSingleUse](Value *I) { + Value *LHS, *RHS; + auto CurrentPattern = matchSelectPattern(I, LHS, RHS); + if (!SelectPatternResult::isMinOrMax(CurrentPattern.Flavor) || + CurrentPattern.Flavor == SPF_FMINNUM || + CurrentPattern.Flavor == SPF_FMAXNUM) + return false; + if (SelectPattern.Flavor != SPF_UNKNOWN && + SelectPattern.Flavor != CurrentPattern.Flavor) + return false; + SelectPattern = CurrentPattern; + AllCmpSingleUse &= + match(I, m_Select(m_OneUse(m_Value()), m_Value(), m_Value())); + return true; + })) { + Intrinsic::ID IID = Intrinsic::not_intrinsic; + switch (SelectPattern.Flavor) { + case SPF_SMIN: + IID = Intrinsic::smin; + break; + case SPF_UMIN: + IID = Intrinsic::umin; + break; + case SPF_SMAX: + IID = Intrinsic::smax; + break; + case SPF_UMAX: + IID = Intrinsic::umax; + break; + default: + llvm_unreachable("unepxected select pattern flavor"); + } + IntrinsicCostAttributes CostAttrs(IID, VecTy, {VecTy, VecTy}); + int IntrinsicCost = TTI->getIntrinsicInstrCost(CostAttrs, CostKind); + // If the selects are the only uses of the compares, they will be dead + // and we can adjust the cost by removing their cost. + if (AllCmpSingleUse) + IntrinsicCost -= TTI->getCmpSelInstrCost(Instruction::ICmp, VecTy, + MaskTy, CostKind); + VecCost = std::min(VecCost, IntrinsicCost); + } return ReuseShuffleCost + VecCost - ScalarCost; } case Instruction::FNeg: diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-min-max.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-min-max.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-min-max.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-min-max.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -slp-vectorizer -S %s | FileCheck %s -; RUN: opt -passes='slp-vectorizer' -S %s | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -passes='slp-vectorizer' -S %s | FileCheck %s target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-ios5.0.0" @@ -8,45 +8,19 @@ define void @select_umin_8xi16(i16* %ptr, i16 %x) { ; CHECK-LABEL: @select_umin_8xi16( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[L_0:%.*]] = load i16, i16* [[PTR:%.*]], align 2 -; CHECK-NEXT: [[CMP_0:%.*]] = icmp ult i16 [[L_0]], 16383 -; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i16 [[L_0]], i16 16383 -; CHECK-NEXT: store i16 [[S_0]], i16* [[PTR]], align 2 -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 1 -; CHECK-NEXT: [[L_1:%.*]] = load i16, i16* [[GEP_1]], align 2 -; CHECK-NEXT: [[CMP_1:%.*]] = icmp ult i16 [[L_1]], 16383 -; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i16 [[L_1]], i16 16383 -; CHECK-NEXT: store i16 [[S_1]], i16* [[GEP_1]], align 2 +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1 ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2 -; CHECK-NEXT: [[L_2:%.*]] = load i16, i16* [[GEP_2]], align 2 -; CHECK-NEXT: [[CMP_2:%.*]] = icmp ult i16 [[L_2]], 16383 -; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i16 [[L_2]], i16 16383 -; CHECK-NEXT: store i16 [[S_2]], i16* [[GEP_2]], align 2 ; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3 -; CHECK-NEXT: [[L_3:%.*]] = load i16, i16* [[GEP_3]], align 2 -; CHECK-NEXT: [[CMP_3:%.*]] = icmp ult i16 [[L_3]], 16383 -; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i16 [[L_3]], i16 16383 -; CHECK-NEXT: store i16 [[S_3]], i16* [[GEP_3]], align 2 ; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4 -; CHECK-NEXT: [[L_4:%.*]] = load i16, i16* [[GEP_4]], align 2 -; CHECK-NEXT: [[CMP_4:%.*]] = icmp ult i16 [[L_4]], 16383 -; CHECK-NEXT: [[S_4:%.*]] = select i1 [[CMP_4]], i16 [[L_4]], i16 16383 -; CHECK-NEXT: store i16 [[S_4]], i16* [[GEP_4]], align 2 ; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5 -; CHECK-NEXT: [[L_5:%.*]] = load i16, i16* [[GEP_5]], align 2 -; CHECK-NEXT: [[CMP_5:%.*]] = icmp ult i16 [[L_5]], 16383 -; CHECK-NEXT: [[S_5:%.*]] = select i1 [[CMP_5]], i16 [[L_5]], i16 16383 -; CHECK-NEXT: store i16 [[S_5]], i16* [[GEP_5]], align 2 ; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6 -; CHECK-NEXT: [[L_6:%.*]] = load i16, i16* [[GEP_6]], align 2 -; CHECK-NEXT: [[CMP_6:%.*]] = icmp ult i16 [[L_6]], 16383 -; CHECK-NEXT: [[S_6:%.*]] = select i1 [[CMP_6]], i16 [[L_6]], i16 16383 -; CHECK-NEXT: store i16 [[S_6]], i16* [[GEP_6]], align 2 ; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7 -; CHECK-NEXT: [[L_7:%.*]] = load i16, i16* [[GEP_7]], align 2 -; CHECK-NEXT: [[CMP_7:%.*]] = icmp ult i16 [[L_7]], 16383 -; CHECK-NEXT: [[S_7:%.*]] = select i1 [[CMP_7]], i16 [[L_7]], i16 16383 -; CHECK-NEXT: store i16 [[S_7]], i16* [[GEP_7]], align 2 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>* +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <8 x i16> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>* +; CHECK-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2 ; CHECK-NEXT: ret void ; entry: @@ -102,25 +76,15 @@ define void @select_umin_4xi32(i32* %ptr, i32 %x) { ; CHECK-LABEL: @select_umin_4xi32( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[L_0:%.*]] = load i32, i32* [[PTR:%.*]], align 4 -; CHECK-NEXT: [[CMP_0:%.*]] = icmp ult i32 [[L_0]], 16383 -; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i32 [[L_0]], i32 16383 -; CHECK-NEXT: store i32 [[S_0]], i32* [[PTR]], align 4 -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 1 -; CHECK-NEXT: [[L_1:%.*]] = load i32, i32* [[GEP_1]], align 4 -; CHECK-NEXT: [[CMP_1:%.*]] = icmp ult i32 [[L_1]], 16383 -; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i32 [[L_1]], i32 16383 -; CHECK-NEXT: store i32 [[S_1]], i32* [[GEP_1]], align 4 +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1 ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2 -; CHECK-NEXT: [[L_2:%.*]] = load i32, i32* [[GEP_2]], align 4 -; CHECK-NEXT: [[CMP_2:%.*]] = icmp ult i32 [[L_2]], 16383 -; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i32 [[L_2]], i32 16383 -; CHECK-NEXT: store i32 [[S_2]], i32* [[GEP_2]], align 4 ; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3 -; CHECK-NEXT: [[L_3:%.*]] = load i32, i32* [[GEP_3]], align 4 -; CHECK-NEXT: [[CMP_3:%.*]] = icmp ult i32 [[L_3]], 16383 -; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i32 [[L_3]], i32 16383 -; CHECK-NEXT: store i32 [[S_3]], i32* [[GEP_3]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>* +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -235,45 +199,19 @@ define void @select_umin_ule_8xi16(i16* %ptr, i16 %x) { ; CHECK-LABEL: @select_umin_ule_8xi16( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[L_0:%.*]] = load i16, i16* [[PTR:%.*]], align 2 -; CHECK-NEXT: [[CMP_0:%.*]] = icmp ule i16 [[L_0]], 16383 -; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i16 [[L_0]], i16 16383 -; CHECK-NEXT: store i16 [[S_0]], i16* [[PTR]], align 2 -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 1 -; CHECK-NEXT: [[L_1:%.*]] = load i16, i16* [[GEP_1]], align 2 -; CHECK-NEXT: [[CMP_1:%.*]] = icmp ule i16 [[L_1]], 16383 -; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i16 [[L_1]], i16 16383 -; CHECK-NEXT: store i16 [[S_1]], i16* [[GEP_1]], align 2 +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1 ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2 -; CHECK-NEXT: [[L_2:%.*]] = load i16, i16* [[GEP_2]], align 2 -; CHECK-NEXT: [[CMP_2:%.*]] = icmp ule i16 [[L_2]], 16383 -; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i16 [[L_2]], i16 16383 -; CHECK-NEXT: store i16 [[S_2]], i16* [[GEP_2]], align 2 ; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3 -; CHECK-NEXT: [[L_3:%.*]] = load i16, i16* [[GEP_3]], align 2 -; CHECK-NEXT: [[CMP_3:%.*]] = icmp ule i16 [[L_3]], 16383 -; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i16 [[L_3]], i16 16383 -; CHECK-NEXT: store i16 [[S_3]], i16* [[GEP_3]], align 2 ; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4 -; CHECK-NEXT: [[L_4:%.*]] = load i16, i16* [[GEP_4]], align 2 -; CHECK-NEXT: [[CMP_4:%.*]] = icmp ule i16 [[L_4]], 16383 -; CHECK-NEXT: [[S_4:%.*]] = select i1 [[CMP_4]], i16 [[L_4]], i16 16383 -; CHECK-NEXT: store i16 [[S_4]], i16* [[GEP_4]], align 2 ; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5 -; CHECK-NEXT: [[L_5:%.*]] = load i16, i16* [[GEP_5]], align 2 -; CHECK-NEXT: [[CMP_5:%.*]] = icmp ule i16 [[L_5]], 16383 -; CHECK-NEXT: [[S_5:%.*]] = select i1 [[CMP_5]], i16 [[L_5]], i16 16383 -; CHECK-NEXT: store i16 [[S_5]], i16* [[GEP_5]], align 2 ; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6 -; CHECK-NEXT: [[L_6:%.*]] = load i16, i16* [[GEP_6]], align 2 -; CHECK-NEXT: [[CMP_6:%.*]] = icmp ule i16 [[L_6]], 16383 -; CHECK-NEXT: [[S_6:%.*]] = select i1 [[CMP_6]], i16 [[L_6]], i16 16383 -; CHECK-NEXT: store i16 [[S_6]], i16* [[GEP_6]], align 2 ; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7 -; CHECK-NEXT: [[L_7:%.*]] = load i16, i16* [[GEP_7]], align 2 -; CHECK-NEXT: [[CMP_7:%.*]] = icmp ule i16 [[L_7]], 16383 -; CHECK-NEXT: [[S_7:%.*]] = select i1 [[CMP_7]], i16 [[L_7]], i16 16383 -; CHECK-NEXT: store i16 [[S_7]], i16* [[GEP_7]], align 2 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>* +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <8 x i16> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>* +; CHECK-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2 ; CHECK-NEXT: ret void ; entry: @@ -329,25 +267,15 @@ define void @select_umin_ule_4xi32(i32* %ptr, i32 %x) { ; CHECK-LABEL: @select_umin_ule_4xi32( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[L_0:%.*]] = load i32, i32* [[PTR:%.*]], align 4 -; CHECK-NEXT: [[CMP_0:%.*]] = icmp ule i32 [[L_0]], 16383 -; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i32 [[L_0]], i32 16383 -; CHECK-NEXT: store i32 [[S_0]], i32* [[PTR]], align 4 -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 1 -; CHECK-NEXT: [[L_1:%.*]] = load i32, i32* [[GEP_1]], align 4 -; CHECK-NEXT: [[CMP_1:%.*]] = icmp ule i32 [[L_1]], 16383 -; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i32 [[L_1]], i32 16383 -; CHECK-NEXT: store i32 [[S_1]], i32* [[GEP_1]], align 4 +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1 ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2 -; CHECK-NEXT: [[L_2:%.*]] = load i32, i32* [[GEP_2]], align 4 -; CHECK-NEXT: [[CMP_2:%.*]] = icmp ule i32 [[L_2]], 16383 -; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i32 [[L_2]], i32 16383 -; CHECK-NEXT: store i32 [[S_2]], i32* [[GEP_2]], align 4 ; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3 -; CHECK-NEXT: [[L_3:%.*]] = load i32, i32* [[GEP_3]], align 4 -; CHECK-NEXT: [[CMP_3:%.*]] = icmp ule i32 [[L_3]], 16383 -; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i32 [[L_3]], i32 16383 -; CHECK-NEXT: store i32 [[S_3]], i32* [[GEP_3]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>* +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -410,45 +338,19 @@ define void @select_smin_8xi16(i16* %ptr, i16 %x) { ; CHECK-LABEL: @select_smin_8xi16( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[L_0:%.*]] = load i16, i16* [[PTR:%.*]], align 2 -; CHECK-NEXT: [[CMP_0:%.*]] = icmp slt i16 [[L_0]], 16383 -; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i16 [[L_0]], i16 16383 -; CHECK-NEXT: store i16 [[S_0]], i16* [[PTR]], align 2 -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 1 -; CHECK-NEXT: [[L_1:%.*]] = load i16, i16* [[GEP_1]], align 2 -; CHECK-NEXT: [[CMP_1:%.*]] = icmp slt i16 [[L_1]], 16383 -; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i16 [[L_1]], i16 16383 -; CHECK-NEXT: store i16 [[S_1]], i16* [[GEP_1]], align 2 +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1 ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2 -; CHECK-NEXT: [[L_2:%.*]] = load i16, i16* [[GEP_2]], align 2 -; CHECK-NEXT: [[CMP_2:%.*]] = icmp slt i16 [[L_2]], 16383 -; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i16 [[L_2]], i16 16383 -; CHECK-NEXT: store i16 [[S_2]], i16* [[GEP_2]], align 2 ; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3 -; CHECK-NEXT: [[L_3:%.*]] = load i16, i16* [[GEP_3]], align 2 -; CHECK-NEXT: [[CMP_3:%.*]] = icmp slt i16 [[L_3]], 16383 -; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i16 [[L_3]], i16 16383 -; CHECK-NEXT: store i16 [[S_3]], i16* [[GEP_3]], align 2 ; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4 -; CHECK-NEXT: [[L_4:%.*]] = load i16, i16* [[GEP_4]], align 2 -; CHECK-NEXT: [[CMP_4:%.*]] = icmp slt i16 [[L_4]], 16383 -; CHECK-NEXT: [[S_4:%.*]] = select i1 [[CMP_4]], i16 [[L_4]], i16 16383 -; CHECK-NEXT: store i16 [[S_4]], i16* [[GEP_4]], align 2 ; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5 -; CHECK-NEXT: [[L_5:%.*]] = load i16, i16* [[GEP_5]], align 2 -; CHECK-NEXT: [[CMP_5:%.*]] = icmp slt i16 [[L_5]], 16383 -; CHECK-NEXT: [[S_5:%.*]] = select i1 [[CMP_5]], i16 [[L_5]], i16 16383 -; CHECK-NEXT: store i16 [[S_5]], i16* [[GEP_5]], align 2 ; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6 -; CHECK-NEXT: [[L_6:%.*]] = load i16, i16* [[GEP_6]], align 2 -; CHECK-NEXT: [[CMP_6:%.*]] = icmp slt i16 [[L_6]], 16383 -; CHECK-NEXT: [[S_6:%.*]] = select i1 [[CMP_6]], i16 [[L_6]], i16 16383 -; CHECK-NEXT: store i16 [[S_6]], i16* [[GEP_6]], align 2 ; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7 -; CHECK-NEXT: [[L_7:%.*]] = load i16, i16* [[GEP_7]], align 2 -; CHECK-NEXT: [[CMP_7:%.*]] = icmp slt i16 [[L_7]], 16383 -; CHECK-NEXT: [[S_7:%.*]] = select i1 [[CMP_7]], i16 [[L_7]], i16 16383 -; CHECK-NEXT: store i16 [[S_7]], i16* [[GEP_7]], align 2 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>* +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2 +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <8 x i16> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>* +; CHECK-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2 ; CHECK-NEXT: ret void ; entry: @@ -504,25 +406,15 @@ define void @select_smin_4xi32(i32* %ptr, i32 %x) { ; CHECK-LABEL: @select_smin_4xi32( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[L_0:%.*]] = load i32, i32* [[PTR:%.*]], align 4 -; CHECK-NEXT: [[CMP_0:%.*]] = icmp slt i32 [[L_0]], 16383 -; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i32 [[L_0]], i32 16383 -; CHECK-NEXT: store i32 [[S_0]], i32* [[PTR]], align 4 -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 1 -; CHECK-NEXT: [[L_1:%.*]] = load i32, i32* [[GEP_1]], align 4 -; CHECK-NEXT: [[CMP_1:%.*]] = icmp slt i32 [[L_1]], 16383 -; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i32 [[L_1]], i32 16383 -; CHECK-NEXT: store i32 [[S_1]], i32* [[GEP_1]], align 4 +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1 ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2 -; CHECK-NEXT: [[L_2:%.*]] = load i32, i32* [[GEP_2]], align 4 -; CHECK-NEXT: [[CMP_2:%.*]] = icmp slt i32 [[L_2]], 16383 -; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i32 [[L_2]], i32 16383 -; CHECK-NEXT: store i32 [[S_2]], i32* [[GEP_2]], align 4 ; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3 -; CHECK-NEXT: [[L_3:%.*]] = load i32, i32* [[GEP_3]], align 4 -; CHECK-NEXT: [[CMP_3:%.*]] = icmp slt i32 [[L_3]], 16383 -; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i32 [[L_3]], i32 16383 -; CHECK-NEXT: store i32 [[S_3]], i32* [[GEP_3]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>* +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -585,45 +477,19 @@ define void @select_smin_sle_8xi16(i16* %ptr, i16 %x) { ; CHECK-LABEL: @select_smin_sle_8xi16( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[L_0:%.*]] = load i16, i16* [[PTR:%.*]], align 2 -; CHECK-NEXT: [[CMP_0:%.*]] = icmp sle i16 [[L_0]], 16383 -; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i16 [[L_0]], i16 16383 -; CHECK-NEXT: store i16 [[S_0]], i16* [[PTR]], align 2 -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 1 -; CHECK-NEXT: [[L_1:%.*]] = load i16, i16* [[GEP_1]], align 2 -; CHECK-NEXT: [[CMP_1:%.*]] = icmp sle i16 [[L_1]], 16383 -; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i16 [[L_1]], i16 16383 -; CHECK-NEXT: store i16 [[S_1]], i16* [[GEP_1]], align 2 +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1 ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2 -; CHECK-NEXT: [[L_2:%.*]] = load i16, i16* [[GEP_2]], align 2 -; CHECK-NEXT: [[CMP_2:%.*]] = icmp sle i16 [[L_2]], 16383 -; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i16 [[L_2]], i16 16383 -; CHECK-NEXT: store i16 [[S_2]], i16* [[GEP_2]], align 2 ; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3 -; CHECK-NEXT: [[L_3:%.*]] = load i16, i16* [[GEP_3]], align 2 -; CHECK-NEXT: [[CMP_3:%.*]] = icmp sle i16 [[L_3]], 16383 -; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i16 [[L_3]], i16 16383 -; CHECK-NEXT: store i16 [[S_3]], i16* [[GEP_3]], align 2 ; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4 -; CHECK-NEXT: [[L_4:%.*]] = load i16, i16* [[GEP_4]], align 2 -; CHECK-NEXT: [[CMP_4:%.*]] = icmp sle i16 [[L_4]], 16383 -; CHECK-NEXT: [[S_4:%.*]] = select i1 [[CMP_4]], i16 [[L_4]], i16 16383 -; CHECK-NEXT: store i16 [[S_4]], i16* [[GEP_4]], align 2 ; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5 -; CHECK-NEXT: [[L_5:%.*]] = load i16, i16* [[GEP_5]], align 2 -; CHECK-NEXT: [[CMP_5:%.*]] = icmp sle i16 [[L_5]], 16383 -; CHECK-NEXT: [[S_5:%.*]] = select i1 [[CMP_5]], i16 [[L_5]], i16 16383 -; CHECK-NEXT: store i16 [[S_5]], i16* [[GEP_5]], align 2 ; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6 -; CHECK-NEXT: [[L_6:%.*]] = load i16, i16* [[GEP_6]], align 2 -; CHECK-NEXT: [[CMP_6:%.*]] = icmp sle i16 [[L_6]], 16383 -; CHECK-NEXT: [[S_6:%.*]] = select i1 [[CMP_6]], i16 [[L_6]], i16 16383 -; CHECK-NEXT: store i16 [[S_6]], i16* [[GEP_6]], align 2 ; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7 -; CHECK-NEXT: [[L_7:%.*]] = load i16, i16* [[GEP_7]], align 2 -; CHECK-NEXT: [[CMP_7:%.*]] = icmp sle i16 [[L_7]], 16383 -; CHECK-NEXT: [[S_7:%.*]] = select i1 [[CMP_7]], i16 [[L_7]], i16 16383 -; CHECK-NEXT: store i16 [[S_7]], i16* [[GEP_7]], align 2 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>* +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2 +; CHECK-NEXT: [[TMP2:%.*]] = icmp sle <8 x i16> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>* +; CHECK-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2 ; CHECK-NEXT: ret void ; entry: @@ -679,25 +545,15 @@ define void @select_smin_sle_4xi32(i32* %ptr, i32 %x) { ; CHECK-LABEL: @select_smin_sle_4xi32( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[L_0:%.*]] = load i32, i32* [[PTR:%.*]], align 4 -; CHECK-NEXT: [[CMP_0:%.*]] = icmp sle i32 [[L_0]], 16383 -; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i32 [[L_0]], i32 16383 -; CHECK-NEXT: store i32 [[S_0]], i32* [[PTR]], align 4 -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 1 -; CHECK-NEXT: [[L_1:%.*]] = load i32, i32* [[GEP_1]], align 4 -; CHECK-NEXT: [[CMP_1:%.*]] = icmp sle i32 [[L_1]], 16383 -; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i32 [[L_1]], i32 16383 -; CHECK-NEXT: store i32 [[S_1]], i32* [[GEP_1]], align 4 +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1 ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2 -; CHECK-NEXT: [[L_2:%.*]] = load i32, i32* [[GEP_2]], align 4 -; CHECK-NEXT: [[CMP_2:%.*]] = icmp sle i32 [[L_2]], 16383 -; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i32 [[L_2]], i32 16383 -; CHECK-NEXT: store i32 [[S_2]], i32* [[GEP_2]], align 4 ; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3 -; CHECK-NEXT: [[L_3:%.*]] = load i32, i32* [[GEP_3]], align 4 -; CHECK-NEXT: [[CMP_3:%.*]] = icmp sle i32 [[L_3]], 16383 -; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i32 [[L_3]], i32 16383 -; CHECK-NEXT: store i32 [[S_3]], i32* [[GEP_3]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>* +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp sle <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -759,45 +615,19 @@ define void @select_umax_8xi16(i16* %ptr, i16 %x) { ; CHECK-LABEL: @select_umax_8xi16( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[L_0:%.*]] = load i16, i16* [[PTR:%.*]], align 2 -; CHECK-NEXT: [[CMP_0:%.*]] = icmp ugt i16 [[L_0]], 16383 -; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i16 [[L_0]], i16 16383 -; CHECK-NEXT: store i16 [[S_0]], i16* [[PTR]], align 2 -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 1 -; CHECK-NEXT: [[L_1:%.*]] = load i16, i16* [[GEP_1]], align 2 -; CHECK-NEXT: [[CMP_1:%.*]] = icmp ugt i16 [[L_1]], 16383 -; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i16 [[L_1]], i16 16383 -; CHECK-NEXT: store i16 [[S_1]], i16* [[GEP_1]], align 2 +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1 ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2 -; CHECK-NEXT: [[L_2:%.*]] = load i16, i16* [[GEP_2]], align 2 -; CHECK-NEXT: [[CMP_2:%.*]] = icmp ugt i16 [[L_2]], 16383 -; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i16 [[L_2]], i16 16383 -; CHECK-NEXT: store i16 [[S_2]], i16* [[GEP_2]], align 2 ; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3 -; CHECK-NEXT: [[L_3:%.*]] = load i16, i16* [[GEP_3]], align 2 -; CHECK-NEXT: [[CMP_3:%.*]] = icmp ugt i16 [[L_3]], 16383 -; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i16 [[L_3]], i16 16383 -; CHECK-NEXT: store i16 [[S_3]], i16* [[GEP_3]], align 2 ; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4 -; CHECK-NEXT: [[L_4:%.*]] = load i16, i16* [[GEP_4]], align 2 -; CHECK-NEXT: [[CMP_4:%.*]] = icmp ugt i16 [[L_4]], 16383 -; CHECK-NEXT: [[S_4:%.*]] = select i1 [[CMP_4]], i16 [[L_4]], i16 16383 -; CHECK-NEXT: store i16 [[S_4]], i16* [[GEP_4]], align 2 ; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5 -; CHECK-NEXT: [[L_5:%.*]] = load i16, i16* [[GEP_5]], align 2 -; CHECK-NEXT: [[CMP_5:%.*]] = icmp ugt i16 [[L_5]], 16383 -; CHECK-NEXT: [[S_5:%.*]] = select i1 [[CMP_5]], i16 [[L_5]], i16 16383 -; CHECK-NEXT: store i16 [[S_5]], i16* [[GEP_5]], align 2 ; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6 -; CHECK-NEXT: [[L_6:%.*]] = load i16, i16* [[GEP_6]], align 2 -; CHECK-NEXT: [[CMP_6:%.*]] = icmp ugt i16 [[L_6]], 16383 -; CHECK-NEXT: [[S_6:%.*]] = select i1 [[CMP_6]], i16 [[L_6]], i16 16383 -; CHECK-NEXT: store i16 [[S_6]], i16* [[GEP_6]], align 2 ; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7 -; CHECK-NEXT: [[L_7:%.*]] = load i16, i16* [[GEP_7]], align 2 -; CHECK-NEXT: [[CMP_7:%.*]] = icmp ugt i16 [[L_7]], 16383 -; CHECK-NEXT: [[S_7:%.*]] = select i1 [[CMP_7]], i16 [[L_7]], i16 16383 -; CHECK-NEXT: store i16 [[S_7]], i16* [[GEP_7]], align 2 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>* +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <8 x i16> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>* +; CHECK-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2 ; CHECK-NEXT: ret void ; entry: @@ -853,25 +683,15 @@ define void @select_umax_4xi32(i32* %ptr, i32 %x) { ; CHECK-LABEL: @select_umax_4xi32( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[L_0:%.*]] = load i32, i32* [[PTR:%.*]], align 4 -; CHECK-NEXT: [[CMP_0:%.*]] = icmp ugt i32 [[L_0]], 16383 -; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i32 [[L_0]], i32 16383 -; CHECK-NEXT: store i32 [[S_0]], i32* [[PTR]], align 4 -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 1 -; CHECK-NEXT: [[L_1:%.*]] = load i32, i32* [[GEP_1]], align 4 -; CHECK-NEXT: [[CMP_1:%.*]] = icmp ugt i32 [[L_1]], 16383 -; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i32 [[L_1]], i32 16383 -; CHECK-NEXT: store i32 [[S_1]], i32* [[GEP_1]], align 4 +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1 ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2 -; CHECK-NEXT: [[L_2:%.*]] = load i32, i32* [[GEP_2]], align 4 -; CHECK-NEXT: [[CMP_2:%.*]] = icmp ugt i32 [[L_2]], 16383 -; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i32 [[L_2]], i32 16383 -; CHECK-NEXT: store i32 [[S_2]], i32* [[GEP_2]], align 4 ; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3 -; CHECK-NEXT: [[L_3:%.*]] = load i32, i32* [[GEP_3]], align 4 -; CHECK-NEXT: [[CMP_3:%.*]] = icmp ugt i32 [[L_3]], 16383 -; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i32 [[L_3]], i32 16383 -; CHECK-NEXT: store i32 [[S_3]], i32* [[GEP_3]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>* +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -934,45 +754,19 @@ define void @select_umax_uge_8xi16(i16* %ptr, i16 %x) { ; CHECK-LABEL: @select_umax_uge_8xi16( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[L_0:%.*]] = load i16, i16* [[PTR:%.*]], align 2 -; CHECK-NEXT: [[CMP_0:%.*]] = icmp uge i16 [[L_0]], 16383 -; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i16 [[L_0]], i16 16383 -; CHECK-NEXT: store i16 [[S_0]], i16* [[PTR]], align 2 -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 1 -; CHECK-NEXT: [[L_1:%.*]] = load i16, i16* [[GEP_1]], align 2 -; CHECK-NEXT: [[CMP_1:%.*]] = icmp uge i16 [[L_1]], 16383 -; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i16 [[L_1]], i16 16383 -; CHECK-NEXT: store i16 [[S_1]], i16* [[GEP_1]], align 2 +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1 ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2 -; CHECK-NEXT: [[L_2:%.*]] = load i16, i16* [[GEP_2]], align 2 -; CHECK-NEXT: [[CMP_2:%.*]] = icmp uge i16 [[L_2]], 16383 -; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i16 [[L_2]], i16 16383 -; CHECK-NEXT: store i16 [[S_2]], i16* [[GEP_2]], align 2 ; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3 -; CHECK-NEXT: [[L_3:%.*]] = load i16, i16* [[GEP_3]], align 2 -; CHECK-NEXT: [[CMP_3:%.*]] = icmp uge i16 [[L_3]], 16383 -; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i16 [[L_3]], i16 16383 -; CHECK-NEXT: store i16 [[S_3]], i16* [[GEP_3]], align 2 ; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4 -; CHECK-NEXT: [[L_4:%.*]] = load i16, i16* [[GEP_4]], align 2 -; CHECK-NEXT: [[CMP_4:%.*]] = icmp uge i16 [[L_4]], 16383 -; CHECK-NEXT: [[S_4:%.*]] = select i1 [[CMP_4]], i16 [[L_4]], i16 16383 -; CHECK-NEXT: store i16 [[S_4]], i16* [[GEP_4]], align 2 ; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5 -; CHECK-NEXT: [[L_5:%.*]] = load i16, i16* [[GEP_5]], align 2 -; CHECK-NEXT: [[CMP_5:%.*]] = icmp uge i16 [[L_5]], 16383 -; CHECK-NEXT: [[S_5:%.*]] = select i1 [[CMP_5]], i16 [[L_5]], i16 16383 -; CHECK-NEXT: store i16 [[S_5]], i16* [[GEP_5]], align 2 ; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6 -; CHECK-NEXT: [[L_6:%.*]] = load i16, i16* [[GEP_6]], align 2 -; CHECK-NEXT: [[CMP_6:%.*]] = icmp uge i16 [[L_6]], 16383 -; CHECK-NEXT: [[S_6:%.*]] = select i1 [[CMP_6]], i16 [[L_6]], i16 16383 -; CHECK-NEXT: store i16 [[S_6]], i16* [[GEP_6]], align 2 ; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7 -; CHECK-NEXT: [[L_7:%.*]] = load i16, i16* [[GEP_7]], align 2 -; CHECK-NEXT: [[CMP_7:%.*]] = icmp uge i16 [[L_7]], 16383 -; CHECK-NEXT: [[S_7:%.*]] = select i1 [[CMP_7]], i16 [[L_7]], i16 16383 -; CHECK-NEXT: store i16 [[S_7]], i16* [[GEP_7]], align 2 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>* +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2 +; CHECK-NEXT: [[TMP2:%.*]] = icmp uge <8 x i16> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>* +; CHECK-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2 ; CHECK-NEXT: ret void ; entry: @@ -1028,25 +822,15 @@ define void @select_umax_uge_4xi32(i32* %ptr, i32 %x) { ; CHECK-LABEL: @select_umax_uge_4xi32( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[L_0:%.*]] = load i32, i32* [[PTR:%.*]], align 4 -; CHECK-NEXT: [[CMP_0:%.*]] = icmp uge i32 [[L_0]], 16383 -; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i32 [[L_0]], i32 16383 -; CHECK-NEXT: store i32 [[S_0]], i32* [[PTR]], align 4 -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 1 -; CHECK-NEXT: [[L_1:%.*]] = load i32, i32* [[GEP_1]], align 4 -; CHECK-NEXT: [[CMP_1:%.*]] = icmp uge i32 [[L_1]], 16383 -; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i32 [[L_1]], i32 16383 -; CHECK-NEXT: store i32 [[S_1]], i32* [[GEP_1]], align 4 +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1 ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2 -; CHECK-NEXT: [[L_2:%.*]] = load i32, i32* [[GEP_2]], align 4 -; CHECK-NEXT: [[CMP_2:%.*]] = icmp uge i32 [[L_2]], 16383 -; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i32 [[L_2]], i32 16383 -; CHECK-NEXT: store i32 [[S_2]], i32* [[GEP_2]], align 4 ; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3 -; CHECK-NEXT: [[L_3:%.*]] = load i32, i32* [[GEP_3]], align 4 -; CHECK-NEXT: [[CMP_3:%.*]] = icmp uge i32 [[L_3]], 16383 -; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i32 [[L_3]], i32 16383 -; CHECK-NEXT: store i32 [[S_3]], i32* [[GEP_3]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>* +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp uge <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -1109,45 +893,19 @@ define void @select_smax_8xi16(i16* %ptr, i16 %x) { ; CHECK-LABEL: @select_smax_8xi16( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[L_0:%.*]] = load i16, i16* [[PTR:%.*]], align 2 -; CHECK-NEXT: [[CMP_0:%.*]] = icmp sgt i16 [[L_0]], 16383 -; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i16 [[L_0]], i16 16383 -; CHECK-NEXT: store i16 [[S_0]], i16* [[PTR]], align 2 -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 1 -; CHECK-NEXT: [[L_1:%.*]] = load i16, i16* [[GEP_1]], align 2 -; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i16 [[L_1]], 16383 -; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i16 [[L_1]], i16 16383 -; CHECK-NEXT: store i16 [[S_1]], i16* [[GEP_1]], align 2 +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1 ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2 -; CHECK-NEXT: [[L_2:%.*]] = load i16, i16* [[GEP_2]], align 2 -; CHECK-NEXT: [[CMP_2:%.*]] = icmp sgt i16 [[L_2]], 16383 -; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i16 [[L_2]], i16 16383 -; CHECK-NEXT: store i16 [[S_2]], i16* [[GEP_2]], align 2 ; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3 -; CHECK-NEXT: [[L_3:%.*]] = load i16, i16* [[GEP_3]], align 2 -; CHECK-NEXT: [[CMP_3:%.*]] = icmp sgt i16 [[L_3]], 16383 -; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i16 [[L_3]], i16 16383 -; CHECK-NEXT: store i16 [[S_3]], i16* [[GEP_3]], align 2 ; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4 -; CHECK-NEXT: [[L_4:%.*]] = load i16, i16* [[GEP_4]], align 2 -; CHECK-NEXT: [[CMP_4:%.*]] = icmp sgt i16 [[L_4]], 16383 -; CHECK-NEXT: [[S_4:%.*]] = select i1 [[CMP_4]], i16 [[L_4]], i16 16383 -; CHECK-NEXT: store i16 [[S_4]], i16* [[GEP_4]], align 2 ; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5 -; CHECK-NEXT: [[L_5:%.*]] = load i16, i16* [[GEP_5]], align 2 -; CHECK-NEXT: [[CMP_5:%.*]] = icmp sgt i16 [[L_5]], 16383 -; CHECK-NEXT: [[S_5:%.*]] = select i1 [[CMP_5]], i16 [[L_5]], i16 16383 -; CHECK-NEXT: store i16 [[S_5]], i16* [[GEP_5]], align 2 ; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6 -; CHECK-NEXT: [[L_6:%.*]] = load i16, i16* [[GEP_6]], align 2 -; CHECK-NEXT: [[CMP_6:%.*]] = icmp sgt i16 [[L_6]], 16383 -; CHECK-NEXT: [[S_6:%.*]] = select i1 [[CMP_6]], i16 [[L_6]], i16 16383 -; CHECK-NEXT: store i16 [[S_6]], i16* [[GEP_6]], align 2 ; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7 -; CHECK-NEXT: [[L_7:%.*]] = load i16, i16* [[GEP_7]], align 2 -; CHECK-NEXT: [[CMP_7:%.*]] = icmp sgt i16 [[L_7]], 16383 -; CHECK-NEXT: [[S_7:%.*]] = select i1 [[CMP_7]], i16 [[L_7]], i16 16383 -; CHECK-NEXT: store i16 [[S_7]], i16* [[GEP_7]], align 2 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>* +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2 +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <8 x i16> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>* +; CHECK-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2 ; CHECK-NEXT: ret void ; entry: @@ -1203,25 +961,15 @@ define void @select_smax_4xi32(i32* %ptr, i32 %x) { ; CHECK-LABEL: @select_smax_4xi32( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[L_0:%.*]] = load i32, i32* [[PTR:%.*]], align 4 -; CHECK-NEXT: [[CMP_0:%.*]] = icmp sgt i32 [[L_0]], 16383 -; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i32 [[L_0]], i32 16383 -; CHECK-NEXT: store i32 [[S_0]], i32* [[PTR]], align 4 -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 1 -; CHECK-NEXT: [[L_1:%.*]] = load i32, i32* [[GEP_1]], align 4 -; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i32 [[L_1]], 16383 -; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i32 [[L_1]], i32 16383 -; CHECK-NEXT: store i32 [[S_1]], i32* [[GEP_1]], align 4 +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1 ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2 -; CHECK-NEXT: [[L_2:%.*]] = load i32, i32* [[GEP_2]], align 4 -; CHECK-NEXT: [[CMP_2:%.*]] = icmp sgt i32 [[L_2]], 16383 -; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i32 [[L_2]], i32 16383 -; CHECK-NEXT: store i32 [[S_2]], i32* [[GEP_2]], align 4 ; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3 -; CHECK-NEXT: [[L_3:%.*]] = load i32, i32* [[GEP_3]], align 4 -; CHECK-NEXT: [[CMP_3:%.*]] = icmp sgt i32 [[L_3]], 16383 -; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i32 [[L_3]], i32 16383 -; CHECK-NEXT: store i32 [[S_3]], i32* [[GEP_3]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>* +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -1285,45 +1033,19 @@ define void @select_smax_sge_8xi16(i16* %ptr, i16 %x) { ; CHECK-LABEL: @select_smax_sge_8xi16( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[L_0:%.*]] = load i16, i16* [[PTR:%.*]], align 2 -; CHECK-NEXT: [[CMP_0:%.*]] = icmp sge i16 [[L_0]], 16383 -; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i16 [[L_0]], i16 16383 -; CHECK-NEXT: store i16 [[S_0]], i16* [[PTR]], align 2 -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 1 -; CHECK-NEXT: [[L_1:%.*]] = load i16, i16* [[GEP_1]], align 2 -; CHECK-NEXT: [[CMP_1:%.*]] = icmp sge i16 [[L_1]], 16383 -; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i16 [[L_1]], i16 16383 -; CHECK-NEXT: store i16 [[S_1]], i16* [[GEP_1]], align 2 +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1 ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2 -; CHECK-NEXT: [[L_2:%.*]] = load i16, i16* [[GEP_2]], align 2 -; CHECK-NEXT: [[CMP_2:%.*]] = icmp sge i16 [[L_2]], 16383 -; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i16 [[L_2]], i16 16383 -; CHECK-NEXT: store i16 [[S_2]], i16* [[GEP_2]], align 2 ; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3 -; CHECK-NEXT: [[L_3:%.*]] = load i16, i16* [[GEP_3]], align 2 -; CHECK-NEXT: [[CMP_3:%.*]] = icmp sge i16 [[L_3]], 16383 -; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i16 [[L_3]], i16 16383 -; CHECK-NEXT: store i16 [[S_3]], i16* [[GEP_3]], align 2 ; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4 -; CHECK-NEXT: [[L_4:%.*]] = load i16, i16* [[GEP_4]], align 2 -; CHECK-NEXT: [[CMP_4:%.*]] = icmp sge i16 [[L_4]], 16383 -; CHECK-NEXT: [[S_4:%.*]] = select i1 [[CMP_4]], i16 [[L_4]], i16 16383 -; CHECK-NEXT: store i16 [[S_4]], i16* [[GEP_4]], align 2 ; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5 -; CHECK-NEXT: [[L_5:%.*]] = load i16, i16* [[GEP_5]], align 2 -; CHECK-NEXT: [[CMP_5:%.*]] = icmp sge i16 [[L_5]], 16383 -; CHECK-NEXT: [[S_5:%.*]] = select i1 [[CMP_5]], i16 [[L_5]], i16 16383 -; CHECK-NEXT: store i16 [[S_5]], i16* [[GEP_5]], align 2 ; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6 -; CHECK-NEXT: [[L_6:%.*]] = load i16, i16* [[GEP_6]], align 2 -; CHECK-NEXT: [[CMP_6:%.*]] = icmp sge i16 [[L_6]], 16383 -; CHECK-NEXT: [[S_6:%.*]] = select i1 [[CMP_6]], i16 [[L_6]], i16 16383 -; CHECK-NEXT: store i16 [[S_6]], i16* [[GEP_6]], align 2 ; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7 -; CHECK-NEXT: [[L_7:%.*]] = load i16, i16* [[GEP_7]], align 2 -; CHECK-NEXT: [[CMP_7:%.*]] = icmp sge i16 [[L_7]], 16383 -; CHECK-NEXT: [[S_7:%.*]] = select i1 [[CMP_7]], i16 [[L_7]], i16 16383 -; CHECK-NEXT: store i16 [[S_7]], i16* [[GEP_7]], align 2 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>* +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2 +; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <8 x i16> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>* +; CHECK-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2 ; CHECK-NEXT: ret void ; entry: @@ -1379,25 +1101,15 @@ define void @select_smax_sge_4xi32(i32* %ptr, i32 %x) { ; CHECK-LABEL: @select_smax_sge_4xi32( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[L_0:%.*]] = load i32, i32* [[PTR:%.*]], align 4 -; CHECK-NEXT: [[CMP_0:%.*]] = icmp sge i32 [[L_0]], 16383 -; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i32 [[L_0]], i32 16383 -; CHECK-NEXT: store i32 [[S_0]], i32* [[PTR]], align 4 -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 1 -; CHECK-NEXT: [[L_1:%.*]] = load i32, i32* [[GEP_1]], align 4 -; CHECK-NEXT: [[CMP_1:%.*]] = icmp sge i32 [[L_1]], 16383 -; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i32 [[L_1]], i32 16383 -; CHECK-NEXT: store i32 [[S_1]], i32* [[GEP_1]], align 4 +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1 ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2 -; CHECK-NEXT: [[L_2:%.*]] = load i32, i32* [[GEP_2]], align 4 -; CHECK-NEXT: [[CMP_2:%.*]] = icmp sge i32 [[L_2]], 16383 -; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i32 [[L_2]], i32 16383 -; CHECK-NEXT: store i32 [[S_2]], i32* [[GEP_2]], align 4 ; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3 -; CHECK-NEXT: [[L_3:%.*]] = load i32, i32* [[GEP_3]], align 4 -; CHECK-NEXT: [[CMP_3:%.*]] = icmp sge i32 [[L_3]], 16383 -; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i32 [[L_3]], i32 16383 -; CHECK-NEXT: store i32 [[S_3]], i32* [[GEP_3]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>* +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4 ; CHECK-NEXT: ret void ; entry: