diff --git a/clang/test/CodeGen/builtins-wasm.c b/clang/test/CodeGen/builtins-wasm.c --- a/clang/test/CodeGen/builtins-wasm.c +++ b/clang/test/CodeGen/builtins-wasm.c @@ -262,86 +262,74 @@ i8x16 min_s_i8x16(i8x16 x, i8x16 y) { return __builtin_wasm_min_s_i8x16(x, y); - // WEBASSEMBLY: %0 = icmp slt <16 x i8> %x, %y - // WEBASSEMBLY-NEXT: %1 = select <16 x i1> %0, <16 x i8> %x, <16 x i8> %y - // WEBASSEMBLY-NEXT: ret <16 x i8> %1 + // WEBASSEMBLY: call <16 x i8> @llvm.smin.v16i8(<16 x i8> %x, <16 x i8> %y) + // WEBASSEMBLY-NEXT: ret } u8x16 min_u_i8x16(u8x16 x, u8x16 y) { return __builtin_wasm_min_u_i8x16(x, y); - // WEBASSEMBLY: %0 = icmp ult <16 x i8> %x, %y - // WEBASSEMBLY-NEXT: %1 = select <16 x i1> %0, <16 x i8> %x, <16 x i8> %y - // WEBASSEMBLY-NEXT: ret <16 x i8> %1 + // WEBASSEMBLY: call <16 x i8> @llvm.umin.v16i8(<16 x i8> %x, <16 x i8> %y) + // WEBASSEMBLY-NEXT: ret } i8x16 max_s_i8x16(i8x16 x, i8x16 y) { return __builtin_wasm_max_s_i8x16(x, y); - // WEBASSEMBLY: %0 = icmp sgt <16 x i8> %x, %y - // WEBASSEMBLY-NEXT: %1 = select <16 x i1> %0, <16 x i8> %x, <16 x i8> %y - // WEBASSEMBLY-NEXT: ret <16 x i8> %1 + // WEBASSEMBLY: call <16 x i8> @llvm.smax.v16i8(<16 x i8> %x, <16 x i8> %y) + // WEBASSEMBLY-NEXT: ret } u8x16 max_u_i8x16(u8x16 x, u8x16 y) { return __builtin_wasm_max_u_i8x16(x, y); - // WEBASSEMBLY: %0 = icmp ugt <16 x i8> %x, %y - // WEBASSEMBLY-NEXT: %1 = select <16 x i1> %0, <16 x i8> %x, <16 x i8> %y - // WEBASSEMBLY-NEXT: ret <16 x i8> %1 + // WEBASSEMBLY: call <16 x i8> @llvm.umax.v16i8(<16 x i8> %x, <16 x i8> %y) + // WEBASSEMBLY-NEXT: ret } i16x8 min_s_i16x8(i16x8 x, i16x8 y) { return __builtin_wasm_min_s_i16x8(x, y); - // WEBASSEMBLY: %0 = icmp slt <8 x i16> %x, %y - // WEBASSEMBLY-NEXT: %1 = select <8 x i1> %0, <8 x i16> %x, <8 x i16> %y - // WEBASSEMBLY-NEXT: ret <8 x i16> %1 + // WEBASSEMBLY: call <8 x i16> @llvm.smin.v8i16(<8 x i16> %x, <8 x i16> %y) + // WEBASSEMBLY-NEXT: ret } u16x8 min_u_i16x8(u16x8 x, u16x8 y) { return __builtin_wasm_min_u_i16x8(x, y); - // WEBASSEMBLY: %0 = icmp ult <8 x i16> %x, %y - // WEBASSEMBLY-NEXT: %1 = select <8 x i1> %0, <8 x i16> %x, <8 x i16> %y - // WEBASSEMBLY-NEXT: ret <8 x i16> %1 + // WEBASSEMBLY: call <8 x i16> @llvm.umin.v8i16(<8 x i16> %x, <8 x i16> %y) + // WEBASSEMBLY-NEXT: ret } i16x8 max_s_i16x8(i16x8 x, i16x8 y) { return __builtin_wasm_max_s_i16x8(x, y); - // WEBASSEMBLY: %0 = icmp sgt <8 x i16> %x, %y - // WEBASSEMBLY-NEXT: %1 = select <8 x i1> %0, <8 x i16> %x, <8 x i16> %y - // WEBASSEMBLY-NEXT: ret <8 x i16> %1 + // WEBASSEMBLY: call <8 x i16> @llvm.smax.v8i16(<8 x i16> %x, <8 x i16> %y) + // WEBASSEMBLY-NEXT: ret } u16x8 max_u_i16x8(u16x8 x, u16x8 y) { return __builtin_wasm_max_u_i16x8(x, y); - // WEBASSEMBLY: %0 = icmp ugt <8 x i16> %x, %y - // WEBASSEMBLY-NEXT: %1 = select <8 x i1> %0, <8 x i16> %x, <8 x i16> %y - // WEBASSEMBLY-NEXT: ret <8 x i16> %1 + // WEBASSEMBLY: call <8 x i16> @llvm.umax.v8i16(<8 x i16> %x, <8 x i16> %y) + // WEBASSEMBLY-NEXT: ret } i32x4 min_s_i32x4(i32x4 x, i32x4 y) { return __builtin_wasm_min_s_i32x4(x, y); - // WEBASSEMBLY: %0 = icmp slt <4 x i32> %x, %y - // WEBASSEMBLY-NEXT: %1 = select <4 x i1> %0, <4 x i32> %x, <4 x i32> %y - // WEBASSEMBLY-NEXT: ret <4 x i32> %1 + // WEBASSEMBLY: call <4 x i32> @llvm.smin.v4i32(<4 x i32> %x, <4 x i32> %y) + // WEBASSEMBLY-NEXT: ret } u32x4 min_u_i32x4(u32x4 x, u32x4 y) { return __builtin_wasm_min_u_i32x4(x, y); - // WEBASSEMBLY: %0 = icmp ult <4 x i32> %x, %y - // WEBASSEMBLY-NEXT: %1 = select <4 x i1> %0, <4 x i32> %x, <4 x i32> %y - // WEBASSEMBLY-NEXT: ret <4 x i32> %1 + // WEBASSEMBLY: call <4 x i32> @llvm.umin.v4i32(<4 x i32> %x, <4 x i32> %y) + // WEBASSEMBLY-NEXT: ret } i32x4 max_s_i32x4(i32x4 x, i32x4 y) { return __builtin_wasm_max_s_i32x4(x, y); - // WEBASSEMBLY: %0 = icmp sgt <4 x i32> %x, %y - // WEBASSEMBLY-NEXT: %1 = select <4 x i1> %0, <4 x i32> %x, <4 x i32> %y - // WEBASSEMBLY-NEXT: ret <4 x i32> %1 + // WEBASSEMBLY: call <4 x i32> @llvm.smax.v4i32(<4 x i32> %x, <4 x i32> %y) + // WEBASSEMBLY-NEXT: ret } u32x4 max_u_i32x4(u32x4 x, u32x4 y) { return __builtin_wasm_max_u_i32x4(x, y); - // WEBASSEMBLY: %0 = icmp ugt <4 x i32> %x, %y - // WEBASSEMBLY-NEXT: %1 = select <4 x i1> %0, <4 x i32> %x, <4 x i32> %y - // WEBASSEMBLY-NEXT: ret <4 x i32> %1 + // WEBASSEMBLY: call <4 x i32> @llvm.umax.v4i32(<4 x i32> %x, <4 x i32> %y) + // WEBASSEMBLY-NEXT: ret } i16x8 sub_sat_s_i16x8(i16x8 x, i16x8 y) { diff --git a/clang/test/Headers/wasm.c b/clang/test/Headers/wasm.c --- a/clang/test/Headers/wasm.c +++ b/clang/test/Headers/wasm.c @@ -1711,10 +1711,9 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = icmp slt <16 x i8> [[TMP0]], [[TMP1]] -// CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP0]], <16 x i8> [[TMP1]] -// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[TMP4]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.smin.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR7]] +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[TMP3]] // v128_t test_i8x16_min(v128_t a, v128_t b) { return wasm_i8x16_min(a, b); @@ -1724,10 +1723,9 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = icmp ult <16 x i8> [[TMP0]], [[TMP1]] -// CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP0]], <16 x i8> [[TMP1]] -// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[TMP4]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.umin.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR7]] +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[TMP3]] // v128_t test_u8x16_min(v128_t a, v128_t b) { return wasm_u8x16_min(a, b); @@ -1737,10 +1735,9 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <16 x i8> [[TMP0]], [[TMP1]] -// CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP0]], <16 x i8> [[TMP1]] -// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[TMP4]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.smax.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR7]] +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[TMP3]] // v128_t test_i8x16_max(v128_t a, v128_t b) { return wasm_i8x16_max(a, b); @@ -1750,10 +1747,9 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <16 x i8> [[TMP0]], [[TMP1]] -// CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP0]], <16 x i8> [[TMP1]] -// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[TMP4]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.umax.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR7]] +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[TMP3]] // v128_t test_u8x16_max(v128_t a, v128_t b) { return wasm_u8x16_max(a, b); @@ -1944,10 +1940,9 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP2:%.*]] = icmp slt <8 x i16> [[TMP0]], [[TMP1]] -// CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP0]], <8 x i16> [[TMP1]] -// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[TMP4]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.smin.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR7]] +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[TMP3]] // v128_t test_i16x8_min(v128_t a, v128_t b) { return wasm_i16x8_min(a, b); @@ -1957,10 +1952,9 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP2:%.*]] = icmp ult <8 x i16> [[TMP0]], [[TMP1]] -// CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP0]], <8 x i16> [[TMP1]] -// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[TMP4]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.umin.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR7]] +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[TMP3]] // v128_t test_u16x8_min(v128_t a, v128_t b) { return wasm_u16x8_min(a, b); @@ -1970,10 +1964,9 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <8 x i16> [[TMP0]], [[TMP1]] -// CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP0]], <8 x i16> [[TMP1]] -// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[TMP4]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.smax.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR7]] +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[TMP3]] // v128_t test_i16x8_max(v128_t a, v128_t b) { return wasm_i16x8_max(a, b); @@ -1983,10 +1976,9 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <8 x i16> [[TMP0]], [[TMP1]] -// CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP0]], <8 x i16> [[TMP1]] -// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[TMP4]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.umax.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR7]] +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[TMP3]] // v128_t test_u16x8_max(v128_t a, v128_t b) { return wasm_u16x8_max(a, b); @@ -2103,9 +2095,8 @@ // CHECK-LABEL: @test_i32x4_min( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = icmp slt <4 x i32> [[A:%.*]], [[B:%.*]] -// CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[A]], <4 x i32> [[B]] -// CHECK-NEXT: ret <4 x i32> [[TMP1]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR7]] +// CHECK-NEXT: ret <4 x i32> [[TMP0]] // v128_t test_i32x4_min(v128_t a, v128_t b) { return wasm_i32x4_min(a, b); @@ -2113,9 +2104,8 @@ // CHECK-LABEL: @test_u32x4_min( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[A:%.*]], [[B:%.*]] -// CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[A]], <4 x i32> [[B]] -// CHECK-NEXT: ret <4 x i32> [[TMP1]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR7]] +// CHECK-NEXT: ret <4 x i32> [[TMP0]] // v128_t test_u32x4_min(v128_t a, v128_t b) { return wasm_u32x4_min(a, b); @@ -2123,9 +2113,8 @@ // CHECK-LABEL: @test_i32x4_max( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = icmp sgt <4 x i32> [[A:%.*]], [[B:%.*]] -// CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[A]], <4 x i32> [[B]] -// CHECK-NEXT: ret <4 x i32> [[TMP1]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR7]] +// CHECK-NEXT: ret <4 x i32> [[TMP0]] // v128_t test_i32x4_max(v128_t a, v128_t b) { return wasm_i32x4_max(a, b); @@ -2133,9 +2122,8 @@ // CHECK-LABEL: @test_u32x4_max( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = icmp ugt <4 x i32> [[A:%.*]], [[B:%.*]] -// CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[A]], <4 x i32> [[B]] -// CHECK-NEXT: ret <4 x i32> [[TMP1]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR7]] +// CHECK-NEXT: ret <4 x i32> [[TMP0]] // v128_t test_u32x4_max(v128_t a, v128_t b) { return wasm_u32x4_max(a, b); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -1142,29 +1142,55 @@ return &Sel; } -static Instruction *canonicalizeAbsNabs(SelectInst &Sel, ICmpInst &Cmp, - InstCombinerImpl &IC) { - if (!Cmp.hasOneUse() || !isa(Cmp.getOperand(1))) - return nullptr; - +static Instruction *canonicalizeSPF(SelectInst &Sel, ICmpInst &Cmp, + InstCombinerImpl &IC) { Value *LHS, *RHS; - SelectPatternFlavor SPF = matchSelectPattern(&Sel, LHS, RHS).Flavor; - if (SPF != SelectPatternFlavor::SPF_ABS && - SPF != SelectPatternFlavor::SPF_NABS) + // TODO: What to do with pointer min/max patterns? + if (!Sel.getType()->isIntOrIntVectorTy()) return nullptr; - // Note that NSW flag can only be propagated for normal, non-negated abs! - bool IntMinIsPoison = SPF == SelectPatternFlavor::SPF_ABS && - match(RHS, m_NSWNeg(m_Specific(LHS))); - Constant *IntMinIsPoisonC = - ConstantInt::get(Type::getInt1Ty(Sel.getContext()), IntMinIsPoison); - Instruction *Abs = - IC.Builder.CreateBinaryIntrinsic(Intrinsic::abs, LHS, IntMinIsPoisonC); - - if (SPF == SelectPatternFlavor::SPF_NABS) - return BinaryOperator::CreateNeg(Abs); // Always without NSW flag! + SelectPatternFlavor SPF = matchSelectPattern(&Sel, LHS, RHS).Flavor; + if (SPF == SelectPatternFlavor::SPF_ABS || + SPF == SelectPatternFlavor::SPF_NABS) { + if (!Cmp.hasOneUse()) + return nullptr; // TODO: Relax this restriction. + + // Note that NSW flag can only be propagated for normal, non-negated abs! + bool IntMinIsPoison = SPF == SelectPatternFlavor::SPF_ABS && + match(RHS, m_NSWNeg(m_Specific(LHS))); + Constant *IntMinIsPoisonC = + ConstantInt::get(Type::getInt1Ty(Sel.getContext()), IntMinIsPoison); + Instruction *Abs = + IC.Builder.CreateBinaryIntrinsic(Intrinsic::abs, LHS, IntMinIsPoisonC); + + if (SPF == SelectPatternFlavor::SPF_NABS) + return BinaryOperator::CreateNeg(Abs); // Always without NSW flag! + return IC.replaceInstUsesWith(Sel, Abs); + } + + if (SelectPatternResult::isMinOrMax(SPF)) { + Intrinsic::ID IntrinsicID; + switch (SPF) { + case SelectPatternFlavor::SPF_UMIN: + IntrinsicID = Intrinsic::umin; + break; + case SelectPatternFlavor::SPF_UMAX: + IntrinsicID = Intrinsic::umax; + break; + case SelectPatternFlavor::SPF_SMIN: + IntrinsicID = Intrinsic::smin; + break; + case SelectPatternFlavor::SPF_SMAX: + IntrinsicID = Intrinsic::smax; + break; + default: + llvm_unreachable("Unexpected SPF"); + } + return IC.replaceInstUsesWith( + Sel, IC.Builder.CreateBinaryIntrinsic(IntrinsicID, LHS, RHS)); + } - return IC.replaceInstUsesWith(Sel, Abs); + return nullptr; } /// If we have a select with an equality comparison, then we know the value in @@ -1540,8 +1566,8 @@ if (Instruction *NewSel = canonicalizeMinMaxWithConstant(SI, *ICI, *this)) return NewSel; - if (Instruction *NewAbs = canonicalizeAbsNabs(SI, *ICI, *this)) - return NewAbs; + if (Instruction *NewSPF = canonicalizeSPF(SI, *ICI, *this)) + return NewSPF; if (Value *V = canonicalizeClampLike(SI, *ICI, Builder)) return replaceInstUsesWith(SI, V); diff --git a/llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll b/llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll --- a/llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll +++ b/llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll @@ -198,8 +198,7 @@ ; CHECK: %group.id = tail call i32 @llvm.amdgcn.workgroup.id.x() ; CHECK: %group.id_x_group.size.x.neg = mul i32 %group.id, -8 ; CHECK: %sub = add i32 %group.id_x_group.size.x.neg, %grid.size.x -; CHECK: %cmp = icmp slt i32 %sub, 8 -; CHECK: %select = select i1 %cmp, i32 %sub, i32 8 +; CHECK: %1 = call i32 @llvm.smin.i32(i32 %sub, i32 8) define amdgpu_kernel void @local_size_x_8_16_2_wrong_cmp_type(i64 addrspace(1)* %out) #0 !reqd_work_group_size !0 { %dispatch.ptr = tail call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() %gep.group.size.x = getelementptr inbounds i8, i8 addrspace(4)* %dispatch.ptr, i64 4 @@ -222,9 +221,8 @@ ; CHECK-LABEL: @local_size_x_8_16_2_wrong_select( ; CHECK: %group.id_x_group.size.x.neg = mul i32 %group.id, -8 ; CHECK: %sub = add i32 %group.id_x_group.size.x.neg, %grid.size.x -; CHECK: %1 = icmp ugt i32 %sub, 8 -; CHECK: %select = select i1 %1, i32 %sub, i32 8 -; CHECK: %zext = zext i32 %select to i64 +; CHECK: %1 = call i32 @llvm.umax.i32(i32 %sub, i32 8) +; CHECK: %zext = zext i32 %1 to i64 define amdgpu_kernel void @local_size_x_8_16_2_wrong_select(i64 addrspace(1)* %out) #0 !reqd_work_group_size !0 { %dispatch.ptr = tail call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() %gep.group.size.x = getelementptr inbounds i8, i8 addrspace(4)* %dispatch.ptr, i64 4 @@ -472,8 +470,7 @@ } ; CHECK-LABEL: @use_local_size_x_uniform_work_group_size_false( -; CHECK: icmp ult -; CHECK: select +; CHECK: call i32 @llvm.umin define amdgpu_kernel void @use_local_size_x_uniform_work_group_size_false(i64 addrspace(1)* %out) #3 { %dispatch.ptr = tail call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() %gep.group.size.x = getelementptr inbounds i8, i8 addrspace(4)* %dispatch.ptr, i64 4 diff --git a/llvm/test/Transforms/Coroutines/coro-retcon-once-value.ll b/llvm/test/Transforms/Coroutines/coro-retcon-once-value.ll --- a/llvm/test/Transforms/Coroutines/coro-retcon-once-value.ll +++ b/llvm/test/Transforms/Coroutines/coro-retcon-once-value.ll @@ -11,11 +11,11 @@ ; CHECK-NEXT: store i32* [[ARRAY:%.*]], i32** [[ARRAY_SPILL_ADDR]], align 8 ; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[ARRAY]], align 4 ; CHECK-NEXT: [[LOAD_POS:%.*]] = icmp sgt i32 [[LOAD]], 0 -; CHECK-NEXT: [[SPEC_SELECT4:%.*]] = select i1 [[LOAD_POS]], i32 [[LOAD]], i32 0 -; CHECK-NEXT: [[TMP0:%.*]] = select i1 [[LOAD_POS]], i8* bitcast (void (i8*, i1)* @f.resume.0 to i8*), i8* bitcast (void (i8*, i1)* @f.resume.1 to i8*) -; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { i8*, i32 } undef, i8* [[TMP0]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { i8*, i32 } [[TMP1]], i32 [[SPEC_SELECT4]], 1 -; CHECK-NEXT: ret { i8*, i32 } [[TMP2]] +; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.smax.i32(i32 [[LOAD]], i32 0) +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[LOAD_POS]], i8* bitcast (void (i8*, i1)* @f.resume.0 to i8*), i8* bitcast (void (i8*, i1)* @f.resume.1 to i8*) +; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { i8*, i32 } undef, i8* [[TMP1]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = insertvalue { i8*, i32 } [[TMP2]], i32 [[TMP0]], 1 +; CHECK-NEXT: ret { i8*, i32 } [[TMP3]] ; entry: %id = call token @llvm.coro.id.retcon.once(i32 8, i32 8, i8* %buffer, i8* bitcast (void (i8*, i1)* @prototype to i8*), i8* bitcast (i8* (i32)* @allocate to i8*), i8* bitcast (void (i8*)* @deallocate to i8*)) @@ -45,9 +45,6 @@ unreachable } - - - define void @test(i32* %array) { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: @@ -56,8 +53,8 @@ ; CHECK-NEXT: store i32* [[ARRAY:%.*]], i32** [[TMP0]], align 8 ; CHECK-NEXT: [[LOAD_I:%.*]] = load i32, i32* [[ARRAY]], align 4 ; CHECK-NEXT: [[LOAD_POS_I:%.*]] = icmp sgt i32 [[LOAD_I]], 0 -; CHECK-NEXT: [[SPEC_SELECT4_I:%.*]] = select i1 [[LOAD_POS_I]], i32 [[LOAD_I]], i32 0 -; CHECK-NEXT: call void @print(i32 [[SPEC_SELECT4_I]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[LOAD_I]], i32 0) #[[ATTR0:[0-9]+]] +; CHECK-NEXT: call void @print(i32 [[TMP1]]) ; CHECK-NEXT: [[CONT_CAST:%.*]] = select i1 [[LOAD_POS_I]], void (i8*, i1)* @f.resume.0, void (i8*, i1)* @f.resume.1 ; CHECK-NEXT: call void [[CONT_CAST]](i8* nonnull [[DOTSUB]], i1 zeroext false) ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/InstCombine/2007-12-18-AddSelCmpSub.ll b/llvm/test/Transforms/InstCombine/2007-12-18-AddSelCmpSub.ll --- a/llvm/test/Transforms/InstCombine/2007-12-18-AddSelCmpSub.ll +++ b/llvm/test/Transforms/InstCombine/2007-12-18-AddSelCmpSub.ll @@ -3,10 +3,10 @@ define i32 @foo(i32 %a) { ; CHECK-LABEL: @foo( -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[A:%.*]], -100 -; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], -1 -; CHECK-NEXT: [[A_OP:%.*]] = add i32 [[A]], 1 -; CHECK-NEXT: [[T13:%.*]] = select i1 [[TMP2]], i32 100, i32 [[A_OP]] +; CHECK-NEXT: [[T15:%.*]] = sub i32 99, [[A:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[T15]], i32 0) +; CHECK-NEXT: [[T12:%.*]] = add i32 [[TMP1]], [[A]] +; CHECK-NEXT: [[T13:%.*]] = add i32 [[T12]], 1 ; CHECK-NEXT: ret i32 [[T13]] ; %t15 = sub i32 99, %a @@ -19,9 +19,9 @@ define i32 @bar(i32 %a) { ; CHECK-LABEL: @bar( -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[A:%.*]], -100 -; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], -1 -; CHECK-NEXT: [[T12:%.*]] = select i1 [[TMP2]], i32 99, i32 [[A]] +; CHECK-NEXT: [[T15:%.*]] = sub i32 99, [[A:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[T15]], i32 0) +; CHECK-NEXT: [[T12:%.*]] = add i32 [[TMP1]], [[A]] ; CHECK-NEXT: ret i32 [[T12]] ; %t15 = sub i32 99, %a diff --git a/llvm/test/Transforms/InstCombine/CPP_min_max.ll b/llvm/test/Transforms/InstCombine/CPP_min_max.ll --- a/llvm/test/Transforms/InstCombine/CPP_min_max.ll +++ b/llvm/test/Transforms/InstCombine/CPP_min_max.ll @@ -13,9 +13,8 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP_1_I:%.*]] = load i32, i32* [[Y:%.*]], align 4 ; CHECK-NEXT: [[TMP_3_I:%.*]] = load i32, i32* [[X:%.*]], align 4 -; CHECK-NEXT: [[TMP_4_I:%.*]] = icmp slt i32 [[TMP_1_I]], [[TMP_3_I]] -; CHECK-NEXT: [[TMP_4:%.*]] = select i1 [[TMP_4_I]], i32 [[TMP_1_I]], i32 [[TMP_3_I]] -; CHECK-NEXT: store i32 [[TMP_4]], i32* [[X]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP_1_I]], i32 [[TMP_3_I]]) +; CHECK-NEXT: store i32 [[TMP0]], i32* [[X]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -33,9 +32,8 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP_2:%.*]] = load i32, i32* [[X:%.*]], align 4 ; CHECK-NEXT: [[TMP_3_I:%.*]] = load i32, i32* [[Y:%.*]], align 4 -; CHECK-NEXT: [[TMP_4_I:%.*]] = icmp slt i32 [[TMP_2]], [[TMP_3_I]] -; CHECK-NEXT: [[TMP_6:%.*]] = select i1 [[TMP_4_I]], i32 [[TMP_3_I]], i32 [[TMP_2]] -; CHECK-NEXT: store i32 [[TMP_6]], i32* [[Y]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP_2]], i32 [[TMP_3_I]]) +; CHECK-NEXT: store i32 [[TMP0]], i32* [[Y]], align 4 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/InstCombine/abs-1.ll b/llvm/test/Transforms/InstCombine/abs-1.ll --- a/llvm/test/Transforms/InstCombine/abs-1.ll +++ b/llvm/test/Transforms/InstCombine/abs-1.ll @@ -363,9 +363,8 @@ ; have produced all 1s. We partially optimize this. define i8 @shifty_abs_commute0_nuw(i8 %x) { ; CHECK-LABEL: @shifty_abs_commute0_nuw( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i8 [[X:%.*]], 0 -; CHECK-NEXT: [[ABS:%.*]] = select i1 [[TMP1]], i8 [[X]], i8 0 -; CHECK-NEXT: ret i8 [[ABS]] +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.smax.i8(i8 [[X:%.*]], i8 0) +; CHECK-NEXT: ret i8 [[TMP1]] ; %signbit = ashr i8 %x, 7 %add = add nuw i8 %signbit, %x @@ -457,9 +456,8 @@ define <4 x i32> @shifty_sub_nuw_vec_commute(<4 x i32> %x) { ; CHECK-LABEL: @shifty_sub_nuw_vec_commute( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], zeroinitializer -; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[X]], <4 x i32> zeroinitializer -; CHECK-NEXT: ret <4 x i32> [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[X:%.*]], <4 x i32> zeroinitializer) +; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %sh = ashr <4 x i32> %x, %xor = xor <4 x i32> %sh, %x @@ -469,9 +467,8 @@ define i12 @shifty_sub_nsw_nuw(i12 %x) { ; CHECK-LABEL: @shifty_sub_nsw_nuw( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i12 [[X:%.*]], 0 -; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP1]], i12 [[X]], i12 0 -; CHECK-NEXT: ret i12 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call i12 @llvm.smax.i12(i12 [[X:%.*]], i12 0) +; CHECK-NEXT: ret i12 [[TMP1]] ; %sh = ashr i12 %x, 11 %xor = xor i12 %x, %sh diff --git a/llvm/test/Transforms/InstCombine/adjust-for-minmax.ll b/llvm/test/Transforms/InstCombine/adjust-for-minmax.ll --- a/llvm/test/Transforms/InstCombine/adjust-for-minmax.ll +++ b/llvm/test/Transforms/InstCombine/adjust-for-minmax.ll @@ -7,9 +7,8 @@ define i32 @smax1(i32 %n) { ; CHECK-LABEL: @smax1( -; CHECK-NEXT: [[T:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; CHECK-NEXT: [[M:%.*]] = select i1 [[T]], i32 [[N]], i32 0 -; CHECK-NEXT: ret i32 [[M]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) +; CHECK-NEXT: ret i32 [[TMP1]] ; %t = icmp sgt i32 %n, 0 %m = select i1 %t, i32 %n, i32 0 @@ -20,9 +19,8 @@ define i32 @smin1(i32 %n) { ; CHECK-LABEL: @smin1( -; CHECK-NEXT: [[T:%.*]] = icmp slt i32 [[N:%.*]], 0 -; CHECK-NEXT: [[M:%.*]] = select i1 [[T]], i32 [[N]], i32 0 -; CHECK-NEXT: ret i32 [[M]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[N:%.*]], i32 0) +; CHECK-NEXT: ret i32 [[TMP1]] ; %t = icmp slt i32 %n, 0 %m = select i1 %t, i32 %n, i32 0 @@ -33,9 +31,8 @@ define i32 @smax2(i32 %n) { ; CHECK-LABEL: @smax2( -; CHECK-NEXT: [[T:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; CHECK-NEXT: [[M:%.*]] = select i1 [[T]], i32 [[N]], i32 0 -; CHECK-NEXT: ret i32 [[M]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) +; CHECK-NEXT: ret i32 [[TMP1]] ; %t = icmp sge i32 %n, 1 %m = select i1 %t, i32 %n, i32 0 @@ -46,9 +43,8 @@ define i32 @smin2(i32 %n) { ; CHECK-LABEL: @smin2( -; CHECK-NEXT: [[T:%.*]] = icmp slt i32 [[N:%.*]], 0 -; CHECK-NEXT: [[M:%.*]] = select i1 [[T]], i32 [[N]], i32 0 -; CHECK-NEXT: ret i32 [[M]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[N:%.*]], i32 0) +; CHECK-NEXT: ret i32 [[TMP1]] ; %t = icmp sle i32 %n, -1 %m = select i1 %t, i32 %n, i32 0 @@ -59,9 +55,8 @@ define i32 @smax3(i32 %n) { ; CHECK-LABEL: @smax3( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; CHECK-NEXT: [[M:%.*]] = select i1 [[TMP1]], i32 [[N]], i32 0 -; CHECK-NEXT: ret i32 [[M]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) +; CHECK-NEXT: ret i32 [[TMP1]] ; %t = icmp sgt i32 %n, -1 %m = select i1 %t, i32 %n, i32 0 @@ -72,9 +67,8 @@ define <2 x i32> @smax3_vec(<2 x i32> %n) { ; CHECK-LABEL: @smax3_vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <2 x i32> [[N:%.*]], zeroinitializer -; CHECK-NEXT: [[M:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[N]], <2 x i32> zeroinitializer -; CHECK-NEXT: ret <2 x i32> [[M]] +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[N:%.*]], <2 x i32> zeroinitializer) +; CHECK-NEXT: ret <2 x i32> [[TMP1]] ; %t = icmp sgt <2 x i32> %n, %m = select <2 x i1> %t, <2 x i32> %n, <2 x i32> zeroinitializer @@ -85,9 +79,8 @@ define i32 @smin3(i32 %n) { ; CHECK-LABEL: @smin3( -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[N:%.*]], 0 -; CHECK-NEXT: [[M:%.*]] = select i1 [[TMP1]], i32 [[N]], i32 0 -; CHECK-NEXT: ret i32 [[M]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[N:%.*]], i32 0) +; CHECK-NEXT: ret i32 [[TMP1]] ; %t = icmp slt i32 %n, 1 %m = select i1 %t, i32 %n, i32 0 @@ -98,9 +91,8 @@ define <2 x i32> @smin3_vec(<2 x i32> %n) { ; CHECK-LABEL: @smin3_vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i32> [[N:%.*]], zeroinitializer -; CHECK-NEXT: [[M:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[N]], <2 x i32> zeroinitializer -; CHECK-NEXT: ret <2 x i32> [[M]] +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[N:%.*]], <2 x i32> zeroinitializer) +; CHECK-NEXT: ret <2 x i32> [[TMP1]] ; %t = icmp slt <2 x i32> %n, %m = select <2 x i1> %t, <2 x i32> %n, <2 x i32> zeroinitializer @@ -111,9 +103,8 @@ define i32 @umax3(i32 %n) { ; CHECK-LABEL: @umax3( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[N:%.*]], 5 -; CHECK-NEXT: [[M:%.*]] = select i1 [[TMP1]], i32 [[N]], i32 5 -; CHECK-NEXT: ret i32 [[M]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 5) +; CHECK-NEXT: ret i32 [[TMP1]] ; %t = icmp ugt i32 %n, 4 %m = select i1 %t, i32 %n, i32 5 @@ -124,9 +115,8 @@ define <2 x i32> @umax3_vec(<2 x i32> %n) { ; CHECK-LABEL: @umax3_vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <2 x i32> [[N:%.*]], -; CHECK-NEXT: [[M:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[N]], <2 x i32> -; CHECK-NEXT: ret <2 x i32> [[M]] +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[N:%.*]], <2 x i32> ) +; CHECK-NEXT: ret <2 x i32> [[TMP1]] ; %t = icmp ugt <2 x i32> %n, %m = select <2 x i1> %t, <2 x i32> %n, <2 x i32> @@ -137,9 +127,8 @@ define i32 @umin3(i32 %n) { ; CHECK-LABEL: @umin3( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[N:%.*]], 6 -; CHECK-NEXT: [[M:%.*]] = select i1 [[TMP1]], i32 [[N]], i32 6 -; CHECK-NEXT: ret i32 [[M]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umin.i32(i32 [[N:%.*]], i32 6) +; CHECK-NEXT: ret i32 [[TMP1]] ; %t = icmp ult i32 %n, 7 %m = select i1 %t, i32 %n, i32 6 @@ -150,9 +139,8 @@ define <2 x i32> @umin3_vec(<2 x i32> %n) { ; CHECK-LABEL: @umin3_vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i32> [[N:%.*]], -; CHECK-NEXT: [[M:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[N]], <2 x i32> -; CHECK-NEXT: ret <2 x i32> [[M]] +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[N:%.*]], <2 x i32> ) +; CHECK-NEXT: ret <2 x i32> [[TMP1]] ; %t = icmp ult <2 x i32> %n, %m = select <2 x i1> %t, <2 x i32> %n, <2 x i32> @@ -163,9 +151,8 @@ define i32 @smax4(i32 %n) { ; CHECK-LABEL: @smax4( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; CHECK-NEXT: [[M:%.*]] = select i1 [[TMP1]], i32 [[N]], i32 0 -; CHECK-NEXT: ret i32 [[M]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) +; CHECK-NEXT: ret i32 [[TMP1]] ; %t = icmp sge i32 %n, 0 %m = select i1 %t, i32 %n, i32 0 @@ -176,9 +163,8 @@ define <2 x i32> @smax4_vec(<2 x i32> %n) { ; CHECK-LABEL: @smax4_vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <2 x i32> [[N:%.*]], zeroinitializer -; CHECK-NEXT: [[M:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[N]], <2 x i32> zeroinitializer -; CHECK-NEXT: ret <2 x i32> [[M]] +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[N:%.*]], <2 x i32> zeroinitializer) +; CHECK-NEXT: ret <2 x i32> [[TMP1]] ; %t = icmp sge <2 x i32> %n, zeroinitializer %m = select <2 x i1> %t, <2 x i32> %n, <2 x i32> zeroinitializer @@ -189,9 +175,8 @@ define i32 @smin4(i32 %n) { ; CHECK-LABEL: @smin4( -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[N:%.*]], 0 -; CHECK-NEXT: [[M:%.*]] = select i1 [[TMP1]], i32 [[N]], i32 0 -; CHECK-NEXT: ret i32 [[M]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[N:%.*]], i32 0) +; CHECK-NEXT: ret i32 [[TMP1]] ; %t = icmp sle i32 %n, 0 %m = select i1 %t, i32 %n, i32 0 @@ -202,9 +187,8 @@ define <2 x i32> @smin4_vec(<2 x i32> %n) { ; CHECK-LABEL: @smin4_vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i32> [[N:%.*]], zeroinitializer -; CHECK-NEXT: [[M:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[N]], <2 x i32> zeroinitializer -; CHECK-NEXT: ret <2 x i32> [[M]] +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[N:%.*]], <2 x i32> zeroinitializer) +; CHECK-NEXT: ret <2 x i32> [[TMP1]] ; %t = icmp sle <2 x i32> %n, zeroinitializer %m = select <2 x i1> %t, <2 x i32> %n, <2 x i32> zeroinitializer @@ -215,9 +199,8 @@ define i32 @umax4(i32 %n) { ; CHECK-LABEL: @umax4( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[N:%.*]], 8 -; CHECK-NEXT: [[M:%.*]] = select i1 [[TMP1]], i32 [[N]], i32 8 -; CHECK-NEXT: ret i32 [[M]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 8) +; CHECK-NEXT: ret i32 [[TMP1]] ; %t = icmp uge i32 %n, 8 %m = select i1 %t, i32 %n, i32 8 @@ -228,9 +211,8 @@ define <2 x i32> @umax4_vec(<2 x i32> %n) { ; CHECK-LABEL: @umax4_vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <2 x i32> [[N:%.*]], -; CHECK-NEXT: [[M:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[N]], <2 x i32> -; CHECK-NEXT: ret <2 x i32> [[M]] +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[N:%.*]], <2 x i32> ) +; CHECK-NEXT: ret <2 x i32> [[TMP1]] ; %t = icmp uge <2 x i32> %n, %m = select <2 x i1> %t, <2 x i32> %n, <2 x i32> @@ -241,9 +223,8 @@ define i32 @umin4(i32 %n) { ; CHECK-LABEL: @umin4( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[N:%.*]], 9 -; CHECK-NEXT: [[M:%.*]] = select i1 [[TMP1]], i32 [[N]], i32 9 -; CHECK-NEXT: ret i32 [[M]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umin.i32(i32 [[N:%.*]], i32 9) +; CHECK-NEXT: ret i32 [[TMP1]] ; %t = icmp ule i32 %n, 9 %m = select i1 %t, i32 %n, i32 9 @@ -254,9 +235,8 @@ define <2 x i32> @umin4_vec(<2 x i32> %n) { ; CHECK-LABEL: @umin4_vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i32> [[N:%.*]], -; CHECK-NEXT: [[M:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[N]], <2 x i32> -; CHECK-NEXT: ret <2 x i32> [[M]] +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[N:%.*]], <2 x i32> ) +; CHECK-NEXT: ret <2 x i32> [[TMP1]] ; %t = icmp ule <2 x i32> %n, %m = select <2 x i1> %t, <2 x i32> %n, <2 x i32> @@ -265,10 +245,9 @@ define i64 @smax_sext(i32 %a) { ; CHECK-LABEL: @smax_sext( -; CHECK-NEXT: [[A_EXT:%.*]] = sext i32 [[A:%.*]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i64 [[A_EXT]], 0 -; CHECK-NEXT: [[MAX:%.*]] = select i1 [[TMP1]], i64 [[A_EXT]], i64 0 -; CHECK-NEXT: ret i64 [[MAX]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[A:%.*]], i32 0) +; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 +; CHECK-NEXT: ret i64 [[TMP2]] ; %a_ext = sext i32 %a to i64 %cmp = icmp sgt i32 %a, -1 @@ -278,10 +257,9 @@ define <2 x i64> @smax_sext_vec(<2 x i32> %a) { ; CHECK-LABEL: @smax_sext_vec( -; CHECK-NEXT: [[A_EXT:%.*]] = sext <2 x i32> [[A:%.*]] to <2 x i64> -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <2 x i64> [[A_EXT]], zeroinitializer -; CHECK-NEXT: [[MAX:%.*]] = select <2 x i1> [[TMP1]], <2 x i64> [[A_EXT]], <2 x i64> zeroinitializer -; CHECK-NEXT: ret <2 x i64> [[MAX]] +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[A:%.*]], <2 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[TMP2]] ; %a_ext = sext <2 x i32> %a to <2 x i64> %cmp = icmp sgt <2 x i32> %a, @@ -291,10 +269,9 @@ define i64 @smin_sext(i32 %a) { ; CHECK-LABEL: @smin_sext( -; CHECK-NEXT: [[A_EXT:%.*]] = sext i32 [[A:%.*]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i64 [[A_EXT]], 0 -; CHECK-NEXT: [[MIN:%.*]] = select i1 [[TMP1]], i64 [[A_EXT]], i64 0 -; CHECK-NEXT: ret i64 [[MIN]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[A:%.*]], i32 0) +; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 +; CHECK-NEXT: ret i64 [[TMP2]] ; %a_ext = sext i32 %a to i64 %cmp = icmp slt i32 %a, 1 @@ -304,10 +281,9 @@ define <2 x i64>@smin_sext_vec(<2 x i32> %a) { ; CHECK-LABEL: @smin_sext_vec( -; CHECK-NEXT: [[A_EXT:%.*]] = sext <2 x i32> [[A:%.*]] to <2 x i64> -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i64> [[A_EXT]], zeroinitializer -; CHECK-NEXT: [[MIN:%.*]] = select <2 x i1> [[TMP1]], <2 x i64> [[A_EXT]], <2 x i64> zeroinitializer -; CHECK-NEXT: ret <2 x i64> [[MIN]] +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[A:%.*]], <2 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[TMP2]] ; %a_ext = sext <2 x i32> %a to <2 x i64> %cmp = icmp slt <2 x i32> %a, @@ -317,10 +293,9 @@ define i64 @umax_sext(i32 %a) { ; CHECK-LABEL: @umax_sext( -; CHECK-NEXT: [[A_EXT:%.*]] = sext i32 [[A:%.*]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[A_EXT]], 3 -; CHECK-NEXT: [[MAX:%.*]] = select i1 [[TMP1]], i64 [[A_EXT]], i64 3 -; CHECK-NEXT: ret i64 [[MAX]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[A:%.*]], i32 3) +; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 +; CHECK-NEXT: ret i64 [[TMP2]] ; %a_ext = sext i32 %a to i64 %cmp = icmp ugt i32 %a, 2 @@ -330,10 +305,9 @@ define <2 x i64> @umax_sext_vec(<2 x i32> %a) { ; CHECK-LABEL: @umax_sext_vec( -; CHECK-NEXT: [[A_EXT:%.*]] = sext <2 x i32> [[A:%.*]] to <2 x i64> -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <2 x i64> [[A_EXT]], -; CHECK-NEXT: [[MAX:%.*]] = select <2 x i1> [[TMP1]], <2 x i64> [[A_EXT]], <2 x i64> -; CHECK-NEXT: ret <2 x i64> [[MAX]] +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[A:%.*]], <2 x i32> ) +; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[TMP2]] ; %a_ext = sext <2 x i32> %a to <2 x i64> %cmp = icmp ugt <2 x i32> %a, @@ -343,10 +317,9 @@ define i64 @umin_sext(i32 %a) { ; CHECK-LABEL: @umin_sext( -; CHECK-NEXT: [[A_EXT:%.*]] = sext i32 [[A:%.*]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[A_EXT]], 2 -; CHECK-NEXT: [[MIN:%.*]] = select i1 [[TMP1]], i64 [[A_EXT]], i64 2 -; CHECK-NEXT: ret i64 [[MIN]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umin.i32(i32 [[A:%.*]], i32 2) +; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 +; CHECK-NEXT: ret i64 [[TMP2]] ; %a_ext = sext i32 %a to i64 %cmp = icmp ult i32 %a, 3 @@ -356,10 +329,9 @@ define <2 x i64> @umin_sext_vec(<2 x i32> %a) { ; CHECK-LABEL: @umin_sext_vec( -; CHECK-NEXT: [[A_EXT:%.*]] = sext <2 x i32> [[A:%.*]] to <2 x i64> -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i64> [[A_EXT]], -; CHECK-NEXT: [[MIN:%.*]] = select <2 x i1> [[TMP1]], <2 x i64> [[A_EXT]], <2 x i64> -; CHECK-NEXT: ret <2 x i64> [[MIN]] +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[A:%.*]], <2 x i32> ) +; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[TMP2]] ; %a_ext = sext <2 x i32> %a to <2 x i64> %cmp = icmp ult <2 x i32> %a, @@ -369,10 +341,9 @@ define i64 @umax_sext2(i32 %a) { ; CHECK-LABEL: @umax_sext2( -; CHECK-NEXT: [[A_EXT:%.*]] = sext i32 [[A:%.*]] to i64 -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[A_EXT]], 2 -; CHECK-NEXT: [[MIN:%.*]] = select i1 [[CMP]], i64 [[A_EXT]], i64 2 -; CHECK-NEXT: ret i64 [[MIN]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[A:%.*]], i32 2) +; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 +; CHECK-NEXT: ret i64 [[TMP2]] ; %a_ext = sext i32 %a to i64 %cmp = icmp ult i32 %a, 3 @@ -382,10 +353,9 @@ define <2 x i64> @umax_sext2_vec(<2 x i32> %a) { ; CHECK-LABEL: @umax_sext2_vec( -; CHECK-NEXT: [[A_EXT:%.*]] = sext <2 x i32> [[A:%.*]] to <2 x i64> -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i64> [[A_EXT]], -; CHECK-NEXT: [[MIN:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[A_EXT]], <2 x i64> -; CHECK-NEXT: ret <2 x i64> [[MIN]] +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[A:%.*]], <2 x i32> ) +; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[TMP2]] ; %a_ext = sext <2 x i32> %a to <2 x i64> %cmp = icmp ult <2 x i32> %a, @@ -395,10 +365,9 @@ define i64 @umin_sext2(i32 %a) { ; CHECK-LABEL: @umin_sext2( -; CHECK-NEXT: [[A_EXT:%.*]] = sext i32 [[A:%.*]] to i64 -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[A_EXT]], 3 -; CHECK-NEXT: [[MIN:%.*]] = select i1 [[CMP]], i64 [[A_EXT]], i64 3 -; CHECK-NEXT: ret i64 [[MIN]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umin.i32(i32 [[A:%.*]], i32 3) +; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 +; CHECK-NEXT: ret i64 [[TMP2]] ; %a_ext = sext i32 %a to i64 %cmp = icmp ugt i32 %a, 2 @@ -408,10 +377,9 @@ define <2 x i64> @umin_sext2_vec(<2 x i32> %a) { ; CHECK-LABEL: @umin_sext2_vec( -; CHECK-NEXT: [[A_EXT:%.*]] = sext <2 x i32> [[A:%.*]] to <2 x i64> -; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i64> [[A_EXT]], -; CHECK-NEXT: [[MIN:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[A_EXT]], <2 x i64> -; CHECK-NEXT: ret <2 x i64> [[MIN]] +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[A:%.*]], <2 x i32> ) +; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[TMP2]] ; %a_ext = sext <2 x i32> %a to <2 x i64> %cmp = icmp ugt <2 x i32> %a, @@ -421,10 +389,9 @@ define i64 @umax_zext(i32 %a) { ; CHECK-LABEL: @umax_zext( -; CHECK-NEXT: [[A_EXT:%.*]] = zext i32 [[A:%.*]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[A_EXT]], 3 -; CHECK-NEXT: [[MAX:%.*]] = select i1 [[TMP1]], i64 [[A_EXT]], i64 3 -; CHECK-NEXT: ret i64 [[MAX]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[A:%.*]], i32 3) +; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 +; CHECK-NEXT: ret i64 [[TMP2]] ; %a_ext = zext i32 %a to i64 %cmp = icmp ugt i32 %a, 2 @@ -434,10 +401,9 @@ define <2 x i64> @umax_zext_vec(<2 x i32> %a) { ; CHECK-LABEL: @umax_zext_vec( -; CHECK-NEXT: [[A_EXT:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64> -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <2 x i64> [[A_EXT]], -; CHECK-NEXT: [[MAX:%.*]] = select <2 x i1> [[TMP1]], <2 x i64> [[A_EXT]], <2 x i64> -; CHECK-NEXT: ret <2 x i64> [[MAX]] +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[A:%.*]], <2 x i32> ) +; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[TMP2]] ; %a_ext = zext <2 x i32> %a to <2 x i64> %cmp = icmp ugt <2 x i32> %a, @@ -447,10 +413,9 @@ define i64 @umin_zext(i32 %a) { ; CHECK-LABEL: @umin_zext( -; CHECK-NEXT: [[A_EXT:%.*]] = zext i32 [[A:%.*]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[A_EXT]], 2 -; CHECK-NEXT: [[MIN:%.*]] = select i1 [[TMP1]], i64 [[A_EXT]], i64 2 -; CHECK-NEXT: ret i64 [[MIN]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umin.i32(i32 [[A:%.*]], i32 2) +; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 +; CHECK-NEXT: ret i64 [[TMP2]] ; %a_ext = zext i32 %a to i64 %cmp = icmp ult i32 %a, 3 @@ -460,10 +425,9 @@ define <2 x i64> @umin_zext_vec(<2 x i32> %a) { ; CHECK-LABEL: @umin_zext_vec( -; CHECK-NEXT: [[A_EXT:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64> -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i64> [[A_EXT]], -; CHECK-NEXT: [[MIN:%.*]] = select <2 x i1> [[TMP1]], <2 x i64> [[A_EXT]], <2 x i64> -; CHECK-NEXT: ret <2 x i64> [[MIN]] +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[A:%.*]], <2 x i32> ) +; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[TMP2]] ; %a_ext = zext <2 x i32> %a to <2 x i64> %cmp = icmp ult <2 x i32> %a, diff --git a/llvm/test/Transforms/InstCombine/canonicalize-clamp-with-select-of-constant-threshold-pattern.ll b/llvm/test/Transforms/InstCombine/canonicalize-clamp-with-select-of-constant-threshold-pattern.ll --- a/llvm/test/Transforms/InstCombine/canonicalize-clamp-with-select-of-constant-threshold-pattern.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-clamp-with-select-of-constant-threshold-pattern.ll @@ -7,11 +7,9 @@ define i32 @t0_select_cond_and_v0(i32 %X) { ; CHECK-LABEL: @t0_select_cond_and_v0( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], -32768 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 -32768 -; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 32767 -; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 32767 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 -32768) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP1]], i32 32767) +; CHECK-NEXT: ret i32 [[TMP2]] ; %dont_need_to_clamp_positive = icmp sle i32 %X, 32767 %dont_need_to_clamp_negative = icmp sge i32 %X, -32768 @@ -23,11 +21,9 @@ define i32 @t0_select_cond_and_v0_logical(i32 %X) { ; CHECK-LABEL: @t0_select_cond_and_v0_logical( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], -32768 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 -32768 -; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 32767 -; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 32767 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 -32768) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP1]], i32 32767) +; CHECK-NEXT: ret i32 [[TMP2]] ; %dont_need_to_clamp_positive = icmp sle i32 %X, 32767 %dont_need_to_clamp_negative = icmp sge i32 %X, -32768 @@ -38,11 +34,9 @@ } define i32 @t1_select_cond_and_v1(i32 %X) { ; CHECK-LABEL: @t1_select_cond_and_v1( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], -32768 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 -32768 -; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 32767 -; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 32767 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 -32768) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP1]], i32 32767) +; CHECK-NEXT: ret i32 [[TMP2]] ; %dont_need_to_clamp_positive = icmp sle i32 %X, 32767 %dont_need_to_clamp_negative = icmp sge i32 %X, -32768 @@ -54,11 +48,9 @@ define i32 @t1_select_cond_and_v1_logical(i32 %X) { ; CHECK-LABEL: @t1_select_cond_and_v1_logical( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], -32768 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 -32768 -; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 32767 -; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 32767 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 -32768) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP1]], i32 32767) +; CHECK-NEXT: ret i32 [[TMP2]] ; %dont_need_to_clamp_positive = icmp sle i32 %X, 32767 %dont_need_to_clamp_negative = icmp sge i32 %X, -32768 @@ -72,11 +64,9 @@ define i32 @t2_select_cond_or_v0(i32 %X) { ; CHECK-LABEL: @t2_select_cond_or_v0( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], -32768 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 -32768 -; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 32767 -; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 32767 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 -32768) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP1]], i32 32767) +; CHECK-NEXT: ret i32 [[TMP2]] ; %need_to_clamp_positive = icmp sgt i32 %X, 32767 %need_to_clamp_negative = icmp slt i32 %X, -32768 @@ -88,11 +78,9 @@ define i32 @t2_select_cond_or_v0_logical(i32 %X) { ; CHECK-LABEL: @t2_select_cond_or_v0_logical( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], -32768 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 -32768 -; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 32767 -; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 32767 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 -32768) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP1]], i32 32767) +; CHECK-NEXT: ret i32 [[TMP2]] ; %need_to_clamp_positive = icmp sgt i32 %X, 32767 %need_to_clamp_negative = icmp slt i32 %X, -32768 @@ -103,11 +91,9 @@ } define i32 @t3_select_cond_or_v1(i32 %X) { ; CHECK-LABEL: @t3_select_cond_or_v1( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], -32768 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 -32768 -; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 32767 -; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 32767 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 -32768) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP1]], i32 32767) +; CHECK-NEXT: ret i32 [[TMP2]] ; %need_to_clamp_positive = icmp sgt i32 %X, 32767 %need_to_clamp_negative = icmp slt i32 %X, -32768 @@ -119,11 +105,9 @@ define i32 @t3_select_cond_or_v1_logical(i32 %X) { ; CHECK-LABEL: @t3_select_cond_or_v1_logical( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], -32768 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 -32768 -; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 32767 -; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 32767 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 -32768) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP1]], i32 32767) +; CHECK-NEXT: ret i32 [[TMP2]] ; %need_to_clamp_positive = icmp sgt i32 %X, 32767 %need_to_clamp_negative = icmp slt i32 %X, -32768 @@ -137,11 +121,9 @@ define i32 @t4_select_cond_xor_v0(i32 %X) { ; CHECK-LABEL: @t4_select_cond_xor_v0( -; CHECK-NEXT: [[DOTINV:%.*]] = icmp sgt i32 [[X:%.*]], -32768 -; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[DOTINV]], i32 [[X]], i32 -32768 -; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 32767 -; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 32767 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 -32768) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP1]], i32 32767) +; CHECK-NEXT: ret i32 [[TMP2]] ; %need_to_clamp_positive = icmp sgt i32 %X, 32767 %dont_need_to_clamp_negative = icmp sgt i32 %X, -32768 @@ -152,11 +134,9 @@ } define i32 @t4_select_cond_xor_v1(i32 %X) { ; CHECK-LABEL: @t4_select_cond_xor_v1( -; CHECK-NEXT: [[DOTINV:%.*]] = icmp sgt i32 [[X:%.*]], -32768 -; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[DOTINV]], i32 [[X]], i32 -32768 -; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 32767 -; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 32767 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 -32768) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP1]], i32 32767) +; CHECK-NEXT: ret i32 [[TMP2]] ; %need_to_clamp_positive = icmp sgt i32 %X, 32767 %dont_need_to_clamp_negative = icmp sgt i32 %X, -32768 @@ -168,11 +148,9 @@ define i32 @t5_select_cond_xor_v2(i32 %X) { ; CHECK-LABEL: @t5_select_cond_xor_v2( -; CHECK-NEXT: [[DOTINV:%.*]] = icmp sgt i32 [[X:%.*]], -32768 -; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[DOTINV]], i32 [[X]], i32 -32768 -; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 32767 -; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 32767 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 -32768) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP1]], i32 32767) +; CHECK-NEXT: ret i32 [[TMP2]] ; %dont_need_to_clamp_positive = icmp sle i32 %X, 32767 %need_to_clamp_negative = icmp sle i32 %X, -32768 @@ -183,11 +161,9 @@ } define i32 @t5_select_cond_xor_v3(i32 %X) { ; CHECK-LABEL: @t5_select_cond_xor_v3( -; CHECK-NEXT: [[DOTINV:%.*]] = icmp sgt i32 [[X:%.*]], -32768 -; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[DOTINV]], i32 [[X]], i32 -32768 -; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 32767 -; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 32767 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 -32768) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP1]], i32 32767) +; CHECK-NEXT: ret i32 [[TMP2]] ; %dont_need_to_clamp_positive = icmp sle i32 %X, 32767 %need_to_clamp_negative = icmp sle i32 %X, -32768 diff --git a/llvm/test/Transforms/InstCombine/clamp-to-minmax.ll b/llvm/test/Transforms/InstCombine/clamp-to-minmax.ll --- a/llvm/test/Transforms/InstCombine/clamp-to-minmax.ll +++ b/llvm/test/Transforms/InstCombine/clamp-to-minmax.ll @@ -471,9 +471,8 @@ define float @ui32_clamp_and_cast_to_float(i32 %x) { ; CHECK-LABEL: @ui32_clamp_and_cast_to_float( ; CHECK-NEXT: [[LO_CMP:%.*]] = icmp eq i32 [[X:%.*]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[X]], 255 -; CHECK-NEXT: [[MIN1:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 255 -; CHECK-NEXT: [[TMP2:%.*]] = uitofp i32 [[MIN1]] to float +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umin.i32(i32 [[X]], i32 255) +; CHECK-NEXT: [[TMP2:%.*]] = uitofp i32 [[TMP1]] to float ; CHECK-NEXT: [[R:%.*]] = select i1 [[LO_CMP]], float 1.000000e+00, float [[TMP2]] ; CHECK-NEXT: ret float [[R]] ; @@ -488,9 +487,8 @@ define float @ui64_clamp_and_cast_to_float(i64 %x) { ; CHECK-LABEL: @ui64_clamp_and_cast_to_float( ; CHECK-NEXT: [[LO_CMP:%.*]] = icmp eq i64 [[X:%.*]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[X]], 255 -; CHECK-NEXT: [[MIN1:%.*]] = select i1 [[TMP1]], i64 [[X]], i64 255 -; CHECK-NEXT: [[TMP2:%.*]] = uitofp i64 [[MIN1]] to float +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.umin.i64(i64 [[X]], i64 255) +; CHECK-NEXT: [[TMP2:%.*]] = uitofp i64 [[TMP1]] to float ; CHECK-NEXT: [[R:%.*]] = select i1 [[LO_CMP]], float 1.000000e+00, float [[TMP2]] ; CHECK-NEXT: ret float [[R]] ; @@ -504,11 +502,9 @@ define float @mixed_clamp_to_float_1(i32 %x) { ; CHECK-LABEL: @mixed_clamp_to_float_1( -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], 255 -; CHECK-NEXT: [[SI_MIN:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 255 -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[SI_MIN]], 1 -; CHECK-NEXT: [[R1:%.*]] = select i1 [[TMP2]], i32 [[SI_MIN]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = sitofp i32 [[R1]] to float +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[X:%.*]], i32 255) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP1]], i32 1) +; CHECK-NEXT: [[TMP3:%.*]] = sitofp i32 [[TMP2]] to float ; CHECK-NEXT: ret float [[TMP3]] ; %si_min_cmp = icmp sgt i32 %x, 255 @@ -541,11 +537,9 @@ define float @mixed_clamp_to_float_2(i32 %x) { ; CHECK-LABEL: @mixed_clamp_to_float_2( -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], 255 -; CHECK-NEXT: [[SI_MIN:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 255 -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[SI_MIN]], 1 -; CHECK-NEXT: [[R1:%.*]] = select i1 [[TMP2]], i32 [[SI_MIN]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = sitofp i32 [[R1]] to float +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[X:%.*]], i32 255) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP1]], i32 1) +; CHECK-NEXT: [[TMP3:%.*]] = sitofp i32 [[TMP2]] to float ; CHECK-NEXT: ret float [[TMP3]] ; %si_min_cmp = icmp sgt i32 %x, 255 diff --git a/llvm/test/Transforms/InstCombine/div-shift.ll b/llvm/test/Transforms/InstCombine/div-shift.ll --- a/llvm/test/Transforms/InstCombine/div-shift.ll +++ b/llvm/test/Transforms/InstCombine/div-shift.ll @@ -67,10 +67,9 @@ define i32 @t4(i32 %x, i32 %y) { ; CHECK-LABEL: @t4( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[Y:%.*]], 5 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[Y]], i32 5 -; CHECK-NEXT: [[TMP3:%.*]] = lshr i32 [[X:%.*]], [[TMP2]] -; CHECK-NEXT: ret i32 [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[Y:%.*]], i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[X:%.*]], [[TMP1]] +; CHECK-NEXT: ret i32 [[TMP2]] ; %1 = shl i32 1, %y %2 = icmp ult i32 %1, 32 diff --git a/llvm/test/Transforms/InstCombine/icmp-add.ll b/llvm/test/Transforms/InstCombine/icmp-add.ll --- a/llvm/test/Transforms/InstCombine/icmp-add.ll +++ b/llvm/test/Transforms/InstCombine/icmp-add.ll @@ -979,10 +979,9 @@ define i32 @increment_max(i32 %x) { ; CHECK-LABEL: @increment_max( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 -1 -; CHECK-NEXT: [[S:%.*]] = add nsw i32 [[TMP2]], 1 -; CHECK-NEXT: ret i32 [[S]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 -1) +; CHECK-NEXT: [[TMP2:%.*]] = add nsw i32 [[TMP1]], 1 +; CHECK-NEXT: ret i32 [[TMP2]] ; %a = add nsw i32 %x, 1 %c = icmp sgt i32 %a, 0 @@ -992,10 +991,9 @@ define i32 @decrement_max(i32 %x) { ; CHECK-LABEL: @decrement_max( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 1 -; CHECK-NEXT: [[S:%.*]] = add nsw i32 [[TMP2]], -1 -; CHECK-NEXT: ret i32 [[S]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 1) +; CHECK-NEXT: [[TMP2:%.*]] = add nsw i32 [[TMP1]], -1 +; CHECK-NEXT: ret i32 [[TMP2]] ; %a = add nsw i32 %x, -1 %c = icmp sgt i32 %a, 0 @@ -1005,10 +1003,9 @@ define i32 @increment_min(i32 %x) { ; CHECK-LABEL: @increment_min( -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 -1 -; CHECK-NEXT: [[S:%.*]] = add nsw i32 [[TMP2]], 1 -; CHECK-NEXT: ret i32 [[S]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[X:%.*]], i32 -1) +; CHECK-NEXT: [[TMP2:%.*]] = add nsw i32 [[TMP1]], 1 +; CHECK-NEXT: ret i32 [[TMP2]] ; %a = add nsw i32 %x, 1 %c = icmp slt i32 %a, 0 @@ -1018,10 +1015,9 @@ define i32 @decrement_min(i32 %x) { ; CHECK-LABEL: @decrement_min( -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 1 -; CHECK-NEXT: [[S:%.*]] = add nsw i32 [[TMP2]], -1 -; CHECK-NEXT: ret i32 [[S]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[X:%.*]], i32 1) +; CHECK-NEXT: [[TMP2:%.*]] = add nsw i32 [[TMP1]], -1 +; CHECK-NEXT: ret i32 [[TMP2]] ; %a = add nsw i32 %x, -1 %c = icmp slt i32 %a, 0 diff --git a/llvm/test/Transforms/InstCombine/icmp-dom.ll b/llvm/test/Transforms/InstCombine/icmp-dom.ll --- a/llvm/test/Transforms/InstCombine/icmp-dom.ll +++ b/llvm/test/Transforms/InstCombine/icmp-dom.ll @@ -9,8 +9,8 @@ ; CHECK: land.lhs.true: ; CHECK-NEXT: br label [[LOR_END:%.*]] ; CHECK: lor.rhs: -; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i64 [[A]], 0 -; CHECK-NEXT: br i1 [[CMP2]], label [[LOR_END]], label [[LAND_RHS:%.*]] +; CHECK-NEXT: [[CMP2_NOT:%.*]] = icmp eq i64 [[A]], 0 +; CHECK-NEXT: br i1 [[CMP2_NOT]], label [[LOR_END]], label [[LAND_RHS:%.*]] ; CHECK: land.rhs: ; CHECK-NEXT: br label [[LOR_END]] ; CHECK: lor.end: @@ -67,9 +67,6 @@ ret void } -; TODO: cmp3 could be reduced to A != B, but we miss that -; while avoiding an infinite loop with min/max canonicalization. - define void @idom_sign_bit_check_edge_dominates_select(i64 %a, i64 %b) { ; CHECK-LABEL: @idom_sign_bit_check_edge_dominates_select( ; CHECK-NEXT: entry: @@ -78,9 +75,7 @@ ; CHECK: land.lhs.true: ; CHECK-NEXT: br label [[LOR_END:%.*]] ; CHECK: lor.rhs: -; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i64 [[A]], 5 -; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP2]], i64 [[A]], i64 5 -; CHECK-NEXT: [[CMP3_NOT:%.*]] = icmp eq i64 [[SELECT]], [[B:%.*]] +; CHECK-NEXT: [[CMP3_NOT:%.*]] = icmp eq i64 [[A]], [[B:%.*]] ; CHECK-NEXT: br i1 [[CMP3_NOT]], label [[LOR_END]], label [[LAND_RHS:%.*]] ; CHECK: land.rhs: ; CHECK-NEXT: br label [[LOR_END]] @@ -135,18 +130,13 @@ ret void } -; TODO: cmp2 could be reduced to A != B, but we miss that -; while avoiding an infinite loop with min/max canonicalization. - define void @idom_not_zbranch(i32 %a, i32 %b) { ; CHECK-LABEL: @idom_not_zbranch( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[A:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[IF_END:%.*]] ; CHECK: if.end: -; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[A]], 0 -; CHECK-NEXT: [[A_:%.*]] = select i1 [[CMP1]], i32 [[A]], i32 0 -; CHECK-NEXT: [[CMP2_NOT:%.*]] = icmp eq i32 [[A_]], [[B:%.*]] +; CHECK-NEXT: [[CMP2_NOT:%.*]] = icmp eq i32 [[A]], [[B:%.*]] ; CHECK-NEXT: br i1 [[CMP2_NOT]], label [[RETURN]], label [[IF_THEN3:%.*]] ; CHECK: if.then3: ; CHECK-NEXT: br label [[RETURN]] @@ -363,14 +353,12 @@ define i32 @PR48900(i32 %i, i1* %p) { ; CHECK-LABEL: @PR48900( -; CHECK-NEXT: [[MAXCMP:%.*]] = icmp ugt i32 [[I:%.*]], 1 -; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[MAXCMP]], i32 [[I]], i32 1 -; CHECK-NEXT: [[I4:%.*]] = icmp sgt i32 [[UMAX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[I:%.*]], i32 1) +; CHECK-NEXT: [[I4:%.*]] = icmp sgt i32 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[I4]], label [[TRUELABEL:%.*]], label [[FALSELABEL:%.*]] ; CHECK: truelabel: -; CHECK-NEXT: [[MINCMP:%.*]] = icmp ult i32 [[UMAX]], 2 -; CHECK-NEXT: [[SMIN:%.*]] = select i1 [[MINCMP]], i32 [[UMAX]], i32 2 -; CHECK-NEXT: ret i32 [[SMIN]] +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 2) +; CHECK-NEXT: ret i32 [[TMP2]] ; CHECK: falselabel: ; CHECK-NEXT: ret i32 0 ; @@ -393,14 +381,12 @@ define i8 @PR48900_alt(i8 %i, i1* %p) { ; CHECK-LABEL: @PR48900_alt( -; CHECK-NEXT: [[MAXCMP:%.*]] = icmp sgt i8 [[I:%.*]], -127 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[MAXCMP]], i8 [[I]], i8 -127 -; CHECK-NEXT: [[I4:%.*]] = icmp ugt i8 [[SMAX]], -128 +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.smax.i8(i8 [[I:%.*]], i8 -127) +; CHECK-NEXT: [[I4:%.*]] = icmp ugt i8 [[TMP1]], -128 ; CHECK-NEXT: br i1 [[I4]], label [[TRUELABEL:%.*]], label [[FALSELABEL:%.*]] ; CHECK: truelabel: -; CHECK-NEXT: [[MINCMP:%.*]] = icmp slt i8 [[SMAX]], -126 -; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[MINCMP]], i8 [[SMAX]], i8 -126 -; CHECK-NEXT: ret i8 [[UMIN]] +; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.smin.i8(i8 [[TMP1]], i8 -126) +; CHECK-NEXT: ret i8 [[TMP2]] ; CHECK: falselabel: ; CHECK-NEXT: ret i8 0 ; diff --git a/llvm/test/Transforms/InstCombine/icmp.ll b/llvm/test/Transforms/InstCombine/icmp.ll --- a/llvm/test/Transforms/InstCombine/icmp.ll +++ b/llvm/test/Transforms/InstCombine/icmp.ll @@ -675,9 +675,8 @@ define i32 @neg_max_s32(i32 %x, i32 %y) { ; CHECK-LABEL: @neg_max_s32( -; CHECK-NEXT: [[C:%.*]] = icmp slt i32 [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[S_NEG:%.*]] = select i1 [[C]], i32 [[Y]], i32 [[X]] -; CHECK-NEXT: ret i32 [[S_NEG]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[X:%.*]], i32 [[Y:%.*]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; %nx = sub nsw i32 0, %x %ny = sub nsw i32 0, %y @@ -689,9 +688,8 @@ define <4 x i32> @neg_max_v4s32(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @neg_max_v4s32( -; CHECK-NEXT: [[C:%.*]] = icmp sgt <4 x i32> [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[S_NEG:%.*]] = select <4 x i1> [[C]], <4 x i32> [[X]], <4 x i32> [[Y]] -; CHECK-NEXT: ret <4 x i32> [[S_NEG]] +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]) +; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %nx = sub nsw <4 x i32> zeroinitializer, %x %ny = sub nsw <4 x i32> zeroinitializer, %y diff --git a/llvm/test/Transforms/InstCombine/max-of-nots.ll b/llvm/test/Transforms/InstCombine/max-of-nots.ll --- a/llvm/test/Transforms/InstCombine/max-of-nots.ll +++ b/llvm/test/Transforms/InstCombine/max-of-nots.ll @@ -3,10 +3,9 @@ define <2 x i32> @umin_of_nots(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @umin_of_nots( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <2 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[X]], <2 x i32> [[Y]] -; CHECK-NEXT: [[MIN:%.*]] = xor <2 x i32> [[TMP2]], -; CHECK-NEXT: ret <2 x i32> [[MIN]] +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[TMP1]], +; CHECK-NEXT: ret <2 x i32> [[TMP2]] ; %notx = xor <2 x i32> %x, %noty = xor <2 x i32> %y, @@ -17,10 +16,9 @@ define <2 x i32> @smin_of_nots(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @smin_of_nots( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <2 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[X]], <2 x i32> [[Y]] -; CHECK-NEXT: [[MIN:%.*]] = xor <2 x i32> [[TMP2]], -; CHECK-NEXT: ret <2 x i32> [[MIN]] +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[TMP1]], +; CHECK-NEXT: ret <2 x i32> [[TMP2]] ; %notx = xor <2 x i32> %x, %noty = xor <2 x i32> %y, @@ -31,9 +29,8 @@ define i32 @compute_min_2(i32 %x, i32 %y) { ; CHECK-LABEL: @compute_min_2( -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 [[Y]] -; CHECK-NEXT: ret i32 [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[X:%.*]], i32 [[Y:%.*]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; %not_x = sub i32 -1, %x %not_y = sub i32 -1, %y @@ -47,11 +44,10 @@ define i8 @umin_not_1_extra_use(i8 %x, i8 %y) { ; CHECK-LABEL: @umin_not_1_extra_use( ; CHECK-NEXT: [[NX:%.*]] = xor i8 [[X:%.*]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i8 [[Y:%.*]], [[X]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i8 [[Y]], i8 [[X]] -; CHECK-NEXT: [[MINXY:%.*]] = xor i8 [[TMP2]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.umax.i8(i8 [[Y:%.*]], i8 [[X]]) +; CHECK-NEXT: [[TMP2:%.*]] = xor i8 [[TMP1]], -1 ; CHECK-NEXT: call void @extra_use(i8 [[NX]]) -; CHECK-NEXT: ret i8 [[MINXY]] +; CHECK-NEXT: ret i8 [[TMP2]] ; %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 @@ -65,11 +61,10 @@ ; CHECK-LABEL: @umin_not_2_extra_use( ; CHECK-NEXT: [[NX:%.*]] = xor i8 [[X:%.*]], -1 ; CHECK-NEXT: [[NY:%.*]] = xor i8 [[Y:%.*]], -1 -; CHECK-NEXT: [[CMPXY:%.*]] = icmp ult i8 [[NX]], [[NY]] -; CHECK-NEXT: [[MINXY:%.*]] = select i1 [[CMPXY]], i8 [[NX]], i8 [[NY]] +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.umin.i8(i8 [[NX]], i8 [[NY]]) ; CHECK-NEXT: call void @extra_use(i8 [[NX]]) ; CHECK-NEXT: call void @extra_use(i8 [[NY]]) -; CHECK-NEXT: ret i8 [[MINXY]] +; CHECK-NEXT: ret i8 [[TMP1]] ; %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 @@ -84,10 +79,8 @@ define i8 @umin3_not(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @umin3_not( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i8 [[X:%.*]], [[Z:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i8 [[X]], i8 [[Z]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i8 [[TMP2]], [[Y:%.*]] -; CHECK-NEXT: [[R_V:%.*]] = select i1 [[TMP3]], i8 [[TMP2]], i8 [[Y]] +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.umax.i8(i8 [[Y:%.*]], i8 [[X:%.*]]) +; CHECK-NEXT: [[R_V:%.*]] = call i8 @llvm.umax.i8(i8 [[TMP1]], i8 [[Z:%.*]]) ; CHECK-NEXT: [[R:%.*]] = xor i8 [[R_V]], -1 ; CHECK-NEXT: ret i8 [[R]] ; @@ -109,11 +102,9 @@ ; CHECK-LABEL: @umin3_not_more_uses( ; CHECK-NEXT: [[NX:%.*]] = xor i8 [[X:%.*]], -1 ; CHECK-NEXT: [[NY:%.*]] = xor i8 [[Y:%.*]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i8 [[Z:%.*]], [[X]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i8 [[Z]], i8 [[X]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i8 [[TMP2]], [[Y]] -; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i8 [[TMP2]], i8 [[Y]] -; CHECK-NEXT: [[R:%.*]] = xor i8 [[TMP4]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.umax.i8(i8 [[X]], i8 [[Y]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.umax.i8(i8 [[Z:%.*]], i8 [[TMP1]]) +; CHECK-NEXT: [[R:%.*]] = xor i8 [[TMP2]], -1 ; CHECK-NEXT: call void @extra_use(i8 [[NX]]) ; CHECK-NEXT: call void @extra_use(i8 [[NY]]) ; CHECK-NEXT: ret i8 [[R]] @@ -139,14 +130,12 @@ ; CHECK-NEXT: [[XN:%.*]] = xor i8 [[X:%.*]], -1 ; CHECK-NEXT: [[YN:%.*]] = xor i8 [[Y:%.*]], -1 ; CHECK-NEXT: [[ZN:%.*]] = xor i8 [[Z:%.*]], -1 -; CHECK-NEXT: [[CMPXZ:%.*]] = icmp ult i8 [[XN]], [[ZN]] -; CHECK-NEXT: [[MINXZ:%.*]] = select i1 [[CMPXZ]], i8 [[XN]], i8 [[ZN]] -; CHECK-NEXT: [[CMPXYZ:%.*]] = icmp ult i8 [[MINXZ]], [[YN]] -; CHECK-NEXT: [[MINXYZ:%.*]] = select i1 [[CMPXYZ]], i8 [[MINXZ]], i8 [[YN]] +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.umin.i8(i8 [[XN]], i8 [[ZN]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.umin.i8(i8 [[TMP1]], i8 [[YN]]) ; CHECK-NEXT: call void @use8(i8 [[XN]]) ; CHECK-NEXT: call void @use8(i8 [[YN]]) ; CHECK-NEXT: call void @use8(i8 [[ZN]]) -; CHECK-NEXT: ret i8 [[MINXYZ]] +; CHECK-NEXT: ret i8 [[TMP2]] ; %xn = xor i8 %x, -1 %yn = xor i8 %y, -1 @@ -163,11 +152,9 @@ define i32 @compute_min_3(i32 %x, i32 %y, i32 %z) { ; CHECK-LABEL: @compute_min_3( -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 [[Y]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], [[Z:%.*]] -; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 [[Z]] -; CHECK-NEXT: ret i32 [[TMP4]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[X:%.*]], i32 [[Y:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP1]], i32 [[Z:%.*]]) +; CHECK-NEXT: ret i32 [[TMP2]] ; %not_x = sub i32 -1, %x %not_y = sub i32 -1, %y @@ -184,11 +171,10 @@ define i32 @compute_min_arithmetic(i32 %x, i32 %y) { ; CHECK-LABEL: @compute_min_arithmetic( -; CHECK-NEXT: [[NOT_VALUE:%.*]] = sub i32 3, [[X:%.*]] -; CHECK-NEXT: [[NOT_Y:%.*]] = xor i32 [[Y:%.*]], -1 -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[NOT_VALUE]], [[NOT_Y]] -; CHECK-NEXT: [[NOT_MIN:%.*]] = select i1 [[CMP]], i32 [[NOT_VALUE]], i32 [[NOT_Y]] -; CHECK-NEXT: ret i32 [[NOT_MIN]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X:%.*]], -4 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smin.i32(i32 [[Y:%.*]], i32 [[TMP1]]) +; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP2]], -1 +; CHECK-NEXT: ret i32 [[TMP3]] ; %not_value = sub i32 3, %x %not_y = sub i32 -1, %y @@ -204,9 +190,8 @@ ; CHECK-NEXT: [[NOT_VALUE:%.*]] = sub i32 3, [[X:%.*]] ; CHECK-NEXT: call void @fake_use(i32 [[NOT_VALUE]]) ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X]], -4 -; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], [[Y:%.*]] -; CHECK-NEXT: [[MIN:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 [[Y]] -; CHECK-NEXT: ret i32 [[MIN]] +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smin.i32(i32 [[Y:%.*]], i32 [[TMP1]]) +; CHECK-NEXT: ret i32 [[TMP2]] ; %not_value = sub i32 3, %x call void @fake_use(i32 %not_value) @@ -219,12 +204,10 @@ define i32 @max_of_nots(i32 %x, i32 %y) { ; CHECK-LABEL: @max_of_nots( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[Y:%.*]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[Y]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], [[X:%.*]] -; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 [[X]] -; CHECK-NEXT: [[SMAX96:%.*]] = xor i32 [[TMP4]], -1 -; CHECK-NEXT: ret i32 [[SMAX96]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[Y:%.*]], i32 0) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP1]], i32 [[X:%.*]]) +; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP2]], -1 +; CHECK-NEXT: ret i32 [[TMP3]] ; %c0 = icmp sgt i32 %y, 0 %xor_y = xor i32 %y, -1 @@ -235,15 +218,14 @@ ret i32 %smax96 } - ; negative test case (i.e. can not simplify) : ABS(MIN(NOT x,y)) + ; negative test case (i.e. can not simplify) : ABS(MIN(NOT x,y)) define i32 @abs_of_min_of_not(i32 %x, i32 %y) { ; CHECK-LABEL: @abs_of_min_of_not( -; CHECK-NEXT: [[XORD:%.*]] = xor i32 [[X:%.*]], -1 -; CHECK-NEXT: [[YADD:%.*]] = add i32 [[Y:%.*]], 2 -; CHECK-NEXT: [[COND_I_NOT:%.*]] = icmp slt i32 [[YADD]], [[XORD]] -; CHECK-NEXT: [[MIN:%.*]] = select i1 [[COND_I_NOT]], i32 [[YADD]], i32 [[XORD]] -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.abs.i32(i32 [[MIN]], i1 false) -; CHECK-NEXT: ret i32 [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = sub i32 -3, [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 [[TMP1]]) +; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP2]], -1 +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.abs.i32(i32 [[TMP3]], i1 false) +; CHECK-NEXT: ret i32 [[TMP4]] ; %xord = xor i32 %x, -1 @@ -258,12 +240,10 @@ define <2 x i32> @max_of_nots_vec(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @max_of_nots_vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <2 x i32> [[Y:%.*]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[Y]], <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <2 x i32> [[TMP2]], [[X:%.*]] -; CHECK-NEXT: [[TMP4:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[TMP2]], <2 x i32> [[X]] -; CHECK-NEXT: [[SMAX96:%.*]] = xor <2 x i32> [[TMP4]], -; CHECK-NEXT: ret <2 x i32> [[SMAX96]] +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[Y:%.*]], <2 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[X:%.*]]) +; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i32> [[TMP2]], +; CHECK-NEXT: ret <2 x i32> [[TMP3]] ; %c0 = icmp sgt <2 x i32> %y, zeroinitializer %xor_y = xor <2 x i32> %y, @@ -276,12 +256,10 @@ define <2 x i37> @max_of_nots_weird_type_vec(<2 x i37> %x, <2 x i37> %y) { ; CHECK-LABEL: @max_of_nots_weird_type_vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <2 x i37> [[Y:%.*]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i37> [[Y]], <2 x i37> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <2 x i37> [[TMP2]], [[X:%.*]] -; CHECK-NEXT: [[TMP4:%.*]] = select <2 x i1> [[TMP3]], <2 x i37> [[TMP2]], <2 x i37> [[X]] -; CHECK-NEXT: [[SMAX96:%.*]] = xor <2 x i37> [[TMP4]], -; CHECK-NEXT: ret <2 x i37> [[SMAX96]] +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i37> @llvm.smax.v2i37(<2 x i37> [[Y:%.*]], <2 x i37> zeroinitializer) +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i37> @llvm.smin.v2i37(<2 x i37> [[TMP1]], <2 x i37> [[X:%.*]]) +; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i37> [[TMP2]], +; CHECK-NEXT: ret <2 x i37> [[TMP3]] ; %c0 = icmp sgt <2 x i37> %y, zeroinitializer %xor_y = xor <2 x i37> %y, @@ -360,15 +338,13 @@ define void @cmyk(i8 %r, i8 %g, i8 %b) { ; CHECK-LABEL: @cmyk( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i8 [[R:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i8 [[R]], i8 [[B]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i8 [[TMP2]], [[G:%.*]] -; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i8 [[TMP2]], i8 [[G]] -; CHECK-NEXT: [[TMP5:%.*]] = xor i8 [[TMP4]], -1 -; CHECK-NEXT: [[CK:%.*]] = sub i8 [[TMP4]], [[R]] -; CHECK-NEXT: [[MK:%.*]] = sub i8 [[TMP4]], [[G]] -; CHECK-NEXT: [[YK:%.*]] = sub i8 [[TMP4]], [[B]] -; CHECK-NEXT: call void @use(i8 [[CK]], i8 [[MK]], i8 [[YK]], i8 [[TMP5]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.smax.i8(i8 [[R:%.*]], i8 [[G:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.smax.i8(i8 [[B:%.*]], i8 [[TMP1]]) +; CHECK-NEXT: [[K:%.*]] = xor i8 [[TMP2]], -1 +; CHECK-NEXT: [[CK:%.*]] = sub i8 [[TMP2]], [[R]] +; CHECK-NEXT: [[MK:%.*]] = sub i8 [[TMP2]], [[G]] +; CHECK-NEXT: [[YK:%.*]] = sub i8 [[TMP2]], [[B]] +; CHECK-NEXT: call void @use(i8 [[CK]], i8 [[MK]], i8 [[YK]], i8 [[K]]) ; CHECK-NEXT: ret void ; %notr = xor i8 %r, -1 @@ -389,14 +365,12 @@ define void @cmyk2(i8 %r, i8 %g, i8 %b) { ; CHECK-LABEL: @cmyk2( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i8 [[R:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i8 [[R]], i8 [[B]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i8 [[TMP2]], [[G:%.*]] -; CHECK-NEXT: [[K_V:%.*]] = select i1 [[TMP3]], i8 [[TMP2]], i8 [[G]] -; CHECK-NEXT: [[K:%.*]] = xor i8 [[K_V]], -1 -; CHECK-NEXT: [[CK:%.*]] = sub i8 [[K_V]], [[R]] -; CHECK-NEXT: [[MK:%.*]] = sub i8 [[K_V]], [[G]] -; CHECK-NEXT: [[YK:%.*]] = sub i8 [[K_V]], [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.smax.i8(i8 [[R:%.*]], i8 [[G:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.smax.i8(i8 [[B:%.*]], i8 [[TMP1]]) +; CHECK-NEXT: [[K:%.*]] = xor i8 [[TMP2]], -1 +; CHECK-NEXT: [[CK:%.*]] = sub i8 [[TMP2]], [[R]] +; CHECK-NEXT: [[MK:%.*]] = sub i8 [[TMP2]], [[G]] +; CHECK-NEXT: [[YK:%.*]] = sub i8 [[TMP2]], [[B]] ; CHECK-NEXT: call void @use(i8 [[CK]], i8 [[MK]], i8 [[YK]], i8 [[K]]) ; CHECK-NEXT: ret void ; @@ -418,14 +392,12 @@ define void @cmyk3(i8 %r, i8 %g, i8 %b) { ; CHECK-LABEL: @cmyk3( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i8 [[R:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i8 [[R]], i8 [[B]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i8 [[TMP2]], [[G:%.*]] -; CHECK-NEXT: [[K_V:%.*]] = select i1 [[TMP3]], i8 [[TMP2]], i8 [[G]] -; CHECK-NEXT: [[K:%.*]] = xor i8 [[K_V]], -1 -; CHECK-NEXT: [[CK:%.*]] = sub i8 [[K_V]], [[R]] -; CHECK-NEXT: [[MK:%.*]] = sub i8 [[K_V]], [[G]] -; CHECK-NEXT: [[YK:%.*]] = sub i8 [[K_V]], [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.smax.i8(i8 [[R:%.*]], i8 [[G:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.smax.i8(i8 [[B:%.*]], i8 [[TMP1]]) +; CHECK-NEXT: [[K:%.*]] = xor i8 [[TMP2]], -1 +; CHECK-NEXT: [[CK:%.*]] = sub i8 [[TMP2]], [[R]] +; CHECK-NEXT: [[MK:%.*]] = sub i8 [[TMP2]], [[G]] +; CHECK-NEXT: [[YK:%.*]] = sub i8 [[TMP2]], [[B]] ; CHECK-NEXT: call void @use(i8 [[CK]], i8 [[MK]], i8 [[YK]], i8 [[K]]) ; CHECK-NEXT: ret void ; @@ -447,14 +419,12 @@ define void @cmyk4(i8 %r, i8 %g, i8 %b) { ; CHECK-LABEL: @cmyk4( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i8 [[R:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i8 [[R]], i8 [[B]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i8 [[TMP2]], [[G:%.*]] -; CHECK-NEXT: [[K_V:%.*]] = select i1 [[TMP3]], i8 [[TMP2]], i8 [[G]] -; CHECK-NEXT: [[K:%.*]] = xor i8 [[K_V]], -1 -; CHECK-NEXT: [[CK:%.*]] = sub i8 [[K_V]], [[R]] -; CHECK-NEXT: [[MK:%.*]] = sub i8 [[K_V]], [[G]] -; CHECK-NEXT: [[YK:%.*]] = sub i8 [[K_V]], [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.smax.i8(i8 [[R:%.*]], i8 [[G:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.smax.i8(i8 [[B:%.*]], i8 [[TMP1]]) +; CHECK-NEXT: [[K:%.*]] = xor i8 [[TMP2]], -1 +; CHECK-NEXT: [[CK:%.*]] = sub i8 [[TMP2]], [[R]] +; CHECK-NEXT: [[MK:%.*]] = sub i8 [[TMP2]], [[G]] +; CHECK-NEXT: [[YK:%.*]] = sub i8 [[TMP2]], [[B]] ; CHECK-NEXT: call void @use(i8 [[CK]], i8 [[MK]], i8 [[YK]], i8 [[K]]) ; CHECK-NEXT: ret void ; @@ -476,14 +446,12 @@ define void @cmyk5(i8 %r, i8 %g, i8 %b) { ; CHECK-LABEL: @cmyk5( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i8 [[R:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i8 [[R]], i8 [[B]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i8 [[TMP2]], [[G:%.*]] -; CHECK-NEXT: [[K_V:%.*]] = select i1 [[TMP3]], i8 [[TMP2]], i8 [[G]] -; CHECK-NEXT: [[K:%.*]] = xor i8 [[K_V]], -1 -; CHECK-NEXT: [[CK:%.*]] = sub i8 [[K_V]], [[R]] -; CHECK-NEXT: [[MK:%.*]] = sub i8 [[K_V]], [[G]] -; CHECK-NEXT: [[YK:%.*]] = sub i8 [[K_V]], [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.smax.i8(i8 [[R:%.*]], i8 [[G:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.smax.i8(i8 [[B:%.*]], i8 [[TMP1]]) +; CHECK-NEXT: [[K:%.*]] = xor i8 [[TMP2]], -1 +; CHECK-NEXT: [[CK:%.*]] = sub i8 [[TMP2]], [[R]] +; CHECK-NEXT: [[MK:%.*]] = sub i8 [[TMP2]], [[G]] +; CHECK-NEXT: [[YK:%.*]] = sub i8 [[TMP2]], [[B]] ; CHECK-NEXT: call void @use(i8 [[CK]], i8 [[MK]], i8 [[YK]], i8 [[K]]) ; CHECK-NEXT: ret void ; @@ -505,14 +473,12 @@ define void @cmyk6(i8 %r, i8 %g, i8 %b) { ; CHECK-LABEL: @cmyk6( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i8 [[R:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i8 [[R]], i8 [[B]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i8 [[TMP2]], [[G:%.*]] -; CHECK-NEXT: [[K_V:%.*]] = select i1 [[TMP3]], i8 [[TMP2]], i8 [[G]] -; CHECK-NEXT: [[K:%.*]] = xor i8 [[K_V]], -1 -; CHECK-NEXT: [[CK:%.*]] = sub i8 [[K_V]], [[R]] -; CHECK-NEXT: [[MK:%.*]] = sub i8 [[K_V]], [[G]] -; CHECK-NEXT: [[YK:%.*]] = sub i8 [[K_V]], [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.umax.i8(i8 [[R:%.*]], i8 [[G:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.umax.i8(i8 [[B:%.*]], i8 [[TMP1]]) +; CHECK-NEXT: [[K:%.*]] = xor i8 [[TMP2]], -1 +; CHECK-NEXT: [[CK:%.*]] = sub i8 [[TMP2]], [[R]] +; CHECK-NEXT: [[MK:%.*]] = sub i8 [[TMP2]], [[G]] +; CHECK-NEXT: [[YK:%.*]] = sub i8 [[TMP2]], [[B]] ; CHECK-NEXT: tail call void @use(i8 [[CK]], i8 [[MK]], i8 [[YK]], i8 [[K]]) ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/InstCombine/max_known_bits.ll b/llvm/test/Transforms/InstCombine/max_known_bits.ll --- a/llvm/test/Transforms/InstCombine/max_known_bits.ll +++ b/llvm/test/Transforms/InstCombine/max_known_bits.ll @@ -19,11 +19,9 @@ ; By analyzing the clamp pattern, we can tell the add doesn't have signed overflow. define i16 @min_max_clamp(i16 %x) { ; CHECK-LABEL: @min_max_clamp( -; CHECK-NEXT: [[A:%.*]] = icmp sgt i16 [[X:%.*]], -2048 -; CHECK-NEXT: [[B:%.*]] = select i1 [[A]], i16 [[X]], i16 -2048 -; CHECK-NEXT: [[C:%.*]] = icmp slt i16 [[B]], 2047 -; CHECK-NEXT: [[D:%.*]] = select i1 [[C]], i16 [[B]], i16 2047 -; CHECK-NEXT: [[E:%.*]] = add nsw i16 [[D]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.smax.i16(i16 [[X:%.*]], i16 -2048) +; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.smin.i16(i16 [[TMP1]], i16 2047) +; CHECK-NEXT: [[E:%.*]] = add nsw i16 [[TMP2]], 1 ; CHECK-NEXT: ret i16 [[E]] ; %a = icmp sgt i16 %x, -2048 @@ -37,11 +35,9 @@ ; Same as above with min/max reversed. define i16 @min_max_clamp_2(i16 %x) { ; CHECK-LABEL: @min_max_clamp_2( -; CHECK-NEXT: [[A:%.*]] = icmp slt i16 [[X:%.*]], 2047 -; CHECK-NEXT: [[B:%.*]] = select i1 [[A]], i16 [[X]], i16 2047 -; CHECK-NEXT: [[C:%.*]] = icmp sgt i16 [[B]], -2048 -; CHECK-NEXT: [[D:%.*]] = select i1 [[C]], i16 [[B]], i16 -2048 -; CHECK-NEXT: [[E:%.*]] = add nsw i16 [[D]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.smin.i16(i16 [[X:%.*]], i16 2047) +; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.smax.i16(i16 [[TMP1]], i16 -2048) +; CHECK-NEXT: [[E:%.*]] = add nsw i16 [[TMP2]], 1 ; CHECK-NEXT: ret i16 [[E]] ; %a = icmp slt i16 %x, 2047 @@ -57,12 +53,10 @@ ; overflow the original type and can be moved before the extend. define i32 @min_max_clamp_3(i16 %x) { ; CHECK-LABEL: @min_max_clamp_3( -; CHECK-NEXT: [[A:%.*]] = icmp sgt i16 [[X:%.*]], -2048 -; CHECK-NEXT: [[B:%.*]] = select i1 [[A]], i16 [[X]], i16 -2048 -; CHECK-NEXT: [[C:%.*]] = icmp slt i16 [[B]], 2047 -; CHECK-NEXT: [[D:%.*]] = select i1 [[C]], i16 [[B]], i16 2047 -; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[D]] to i32 -; CHECK-NEXT: ret i32 [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.smax.i16(i16 [[X:%.*]], i16 -2048) +; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.smin.i16(i16 [[TMP1]], i16 2047) +; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP2]] to i32 +; CHECK-NEXT: ret i32 [[TMP3]] ; %a = icmp sgt i16 %x, -2048 %b = select i1 %a, i16 %x, i16 -2048 @@ -77,12 +71,10 @@ ; Same as above with min/max order reversed define i32 @min_max_clamp_4(i16 %x) { ; CHECK-LABEL: @min_max_clamp_4( -; CHECK-NEXT: [[A:%.*]] = icmp slt i16 [[X:%.*]], 2047 -; CHECK-NEXT: [[B:%.*]] = select i1 [[A]], i16 [[X]], i16 2047 -; CHECK-NEXT: [[C:%.*]] = icmp sgt i16 [[B]], -2048 -; CHECK-NEXT: [[D:%.*]] = select i1 [[C]], i16 [[B]], i16 -2048 -; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[D]] to i32 -; CHECK-NEXT: ret i32 [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.smin.i16(i16 [[X:%.*]], i16 2047) +; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.smax.i16(i16 [[TMP1]], i16 -2048) +; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP2]] to i32 +; CHECK-NEXT: ret i32 [[TMP3]] ; %a = icmp slt i16 %x, 2047 %b = select i1 %a, i16 %x, i16 2047 diff --git a/llvm/test/Transforms/InstCombine/min-positive.ll b/llvm/test/Transforms/InstCombine/min-positive.ll --- a/llvm/test/Transforms/InstCombine/min-positive.ll +++ b/llvm/test/Transforms/InstCombine/min-positive.ll @@ -84,9 +84,8 @@ define i1 @maybe_not_positive(i32 %other) { ; CHECK-LABEL: @maybe_not_positive( ; CHECK-NEXT: [[POSITIVE:%.*]] = load i32, i32* @g, align 4, !range [[RNG0:![0-9]+]] -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[POSITIVE]], [[OTHER:%.*]] -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 [[POSITIVE]], i32 [[OTHER]] -; CHECK-NEXT: [[TEST:%.*]] = icmp sgt i32 [[SEL]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[POSITIVE]], i32 [[OTHER:%.*]]) +; CHECK-NEXT: [[TEST:%.*]] = icmp sgt i32 [[TMP1]], 0 ; CHECK-NEXT: ret i1 [[TEST]] ; %positive = load i32, i32* @g, !range !{i32 0, i32 2048} @@ -99,9 +98,8 @@ define <2 x i1> @maybe_not_positive_vec(<2 x i32> %x, <2 x i32> %other) { ; CHECK-LABEL: @maybe_not_positive_vec( ; CHECK-NEXT: [[NOTNEG:%.*]] = and <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i32> [[NOTNEG]], [[OTHER:%.*]] -; CHECK-NEXT: [[SEL:%.*]] = select <2 x i1> [[CMP]], <2 x i32> [[NOTNEG]], <2 x i32> [[OTHER]] -; CHECK-NEXT: [[TEST:%.*]] = icmp sgt <2 x i32> [[SEL]], zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[NOTNEG]], <2 x i32> [[OTHER:%.*]]) +; CHECK-NEXT: [[TEST:%.*]] = icmp sgt <2 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[TEST]] ; %notneg = and <2 x i32> %x, diff --git a/llvm/test/Transforms/InstCombine/minmax-demandbits.ll b/llvm/test/Transforms/InstCombine/minmax-demandbits.ll --- a/llvm/test/Transforms/InstCombine/minmax-demandbits.ll +++ b/llvm/test/Transforms/InstCombine/minmax-demandbits.ll @@ -26,9 +26,8 @@ define i32 @and_umax_more(i32 %A) { ; CHECK-LABEL: @and_umax_more( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[A:%.*]], 32 -; CHECK-NEXT: [[L1:%.*]] = select i1 [[TMP1]], i32 [[A]], i32 32 -; CHECK-NEXT: [[X:%.*]] = and i32 [[L1]], -32 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[A:%.*]], i32 32) +; CHECK-NEXT: [[X:%.*]] = and i32 [[TMP1]], -32 ; CHECK-NEXT: ret i32 [[X]] ; %l0 = icmp ugt i32 32, %A @@ -118,9 +117,8 @@ define i8 @f_1_1(i8 %A) { ; CHECK-LABEL: @f_1_1( -; CHECK-NEXT: [[L2:%.*]] = icmp ugt i8 [[A:%.*]], 1 -; CHECK-NEXT: [[L1:%.*]] = select i1 [[L2]], i8 [[A]], i8 1 -; CHECK-NEXT: [[X:%.*]] = and i8 [[L1]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.umax.i8(i8 [[A:%.*]], i8 1) +; CHECK-NEXT: [[X:%.*]] = and i8 [[TMP1]], 1 ; CHECK-NEXT: ret i8 [[X]] ; %l2 = icmp ugt i8 %A, 1 @@ -131,9 +129,8 @@ define i8 @f_32_32(i8 %A) { ; CHECK-LABEL: @f_32_32( -; CHECK-NEXT: [[L2:%.*]] = icmp ugt i8 [[A:%.*]], 32 -; CHECK-NEXT: [[L1:%.*]] = select i1 [[L2]], i8 [[A]], i8 32 -; CHECK-NEXT: [[X:%.*]] = and i8 [[L1]], -32 +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.umax.i8(i8 [[A:%.*]], i8 32) +; CHECK-NEXT: [[X:%.*]] = and i8 [[TMP1]], -32 ; CHECK-NEXT: ret i8 [[X]] ; %l2 = icmp ugt i8 %A, 32 @@ -144,9 +141,8 @@ define i8 @f_191_192(i8 %A) { ; CHECK-LABEL: @f_191_192( -; CHECK-NEXT: [[L2:%.*]] = icmp ugt i8 [[A:%.*]], -65 -; CHECK-NEXT: [[L1:%.*]] = select i1 [[L2]], i8 [[A]], i8 -65 -; CHECK-NEXT: [[X:%.*]] = and i8 [[L1]], -64 +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.umax.i8(i8 [[A:%.*]], i8 -65) +; CHECK-NEXT: [[X:%.*]] = and i8 [[TMP1]], -64 ; CHECK-NEXT: ret i8 [[X]] ; %l2 = icmp ugt i8 %A, 191 @@ -157,9 +153,8 @@ define i8 @f_10_1(i8 %A) { ; CHECK-LABEL: @f_10_1( -; CHECK-NEXT: [[L2:%.*]] = icmp ugt i8 [[A:%.*]], 10 -; CHECK-NEXT: [[L1:%.*]] = select i1 [[L2]], i8 [[A]], i8 10 -; CHECK-NEXT: [[X:%.*]] = and i8 [[L1]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.umax.i8(i8 [[A:%.*]], i8 10) +; CHECK-NEXT: [[X:%.*]] = and i8 [[TMP1]], 1 ; CHECK-NEXT: ret i8 [[X]] ; %l2 = icmp ugt i8 %A, 10 @@ -223,9 +218,8 @@ define i8 @and_min_7_9(i8 %A) { ; CHECK-LABEL: @and_min_7_9( -; CHECK-NEXT: [[L2:%.*]] = icmp ult i8 [[A:%.*]], -9 -; CHECK-NEXT: [[MIN:%.*]] = select i1 [[L2]], i8 [[A]], i8 -9 -; CHECK-NEXT: [[R:%.*]] = and i8 [[MIN]], -8 +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.umin.i8(i8 [[A:%.*]], i8 -9) +; CHECK-NEXT: [[R:%.*]] = and i8 [[TMP1]], -8 ; CHECK-NEXT: ret i8 [[R]] ; %l2 = icmp ult i8 %A, -9 diff --git a/llvm/test/Transforms/InstCombine/minmax-fold.ll b/llvm/test/Transforms/InstCombine/minmax-fold.ll --- a/llvm/test/Transforms/InstCombine/minmax-fold.ll +++ b/llvm/test/Transforms/InstCombine/minmax-fold.ll @@ -4,10 +4,9 @@ ; This is the canonical form for a type-changing min/max. define i64 @t1(i32 %a) { ; CHECK-LABEL: @t1( -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[A:%.*]], 5 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[A]], i32 5 -; CHECK-NEXT: [[TMP3:%.*]] = sext i32 [[TMP2]] to i64 -; CHECK-NEXT: ret i64 [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[A:%.*]], i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 +; CHECK-NEXT: ret i64 [[TMP2]] ; %1 = icmp slt i32 %a, 5 %2 = select i1 %1, i32 %a, i32 5 @@ -18,9 +17,8 @@ ; Check this is converted into canonical form, as above. define i64 @t2(i32 %a) { ; CHECK-LABEL: @t2( -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[A:%.*]], 5 -; CHECK-NEXT: [[NARROW:%.*]] = select i1 [[TMP1]], i32 [[A]], i32 5 -; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[NARROW]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[A:%.*]], i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 ; CHECK-NEXT: ret i64 [[TMP2]] ; %1 = icmp slt i32 %a, 5 @@ -32,9 +30,8 @@ ; Same as @t2, with flipped operands and zext instead of sext. define i64 @t3(i32 %a) { ; CHECK-LABEL: @t3( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[A:%.*]], 5 -; CHECK-NEXT: [[NARROW:%.*]] = select i1 [[TMP1]], i32 [[A]], i32 5 -; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[NARROW]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[A:%.*]], i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 ; CHECK-NEXT: ret i64 [[TMP2]] ; %1 = icmp ult i32 %a, 5 @@ -46,10 +43,9 @@ ; Same again, with trunc. define i32 @t4(i64 %a) { ; CHECK-LABEL: @t4( -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i64 [[A:%.*]], 5 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[A]], i64 5 -; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32 -; CHECK-NEXT: ret i32 [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.smin.i64(i64 [[A:%.*]], i64 5) +; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 +; CHECK-NEXT: ret i32 [[TMP2]] ; %1 = icmp slt i64 %a, 5 %2 = trunc i64 %a to i32 @@ -60,9 +56,8 @@ ; Same as @t3, but with mismatched signedness between icmp and zext. define i64 @t5(i32 %a) { ; CHECK-LABEL: @t5( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[A:%.*]], 5 -; CHECK-NEXT: [[NARROW:%.*]] = select i1 [[TMP1]], i32 [[A]], i32 5 -; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[NARROW]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[A:%.*]], i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 ; CHECK-NEXT: ret i64 [[TMP2]] ; %1 = icmp slt i32 %a, 5 @@ -73,10 +68,9 @@ define float @t6(i32 %a) { ; CHECK-LABEL: @t6( -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[A:%.*]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[A]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = sitofp i32 [[TMP2]] to float -; CHECK-NEXT: ret float [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[A:%.*]], i32 0) +; CHECK-NEXT: [[TMP2:%.*]] = sitofp i32 [[TMP1]] to float +; CHECK-NEXT: ret float [[TMP2]] ; %1 = icmp slt i32 %a, 0 %2 = select i1 %1, i32 %a, i32 0 @@ -86,10 +80,9 @@ define i16 @t7(i32 %a) { ; CHECK-LABEL: @t7( -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[A:%.*]], -32768 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[A]], i32 -32768 -; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16 -; CHECK-NEXT: ret i16 [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[A:%.*]], i32 -32768) +; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 +; CHECK-NEXT: ret i16 [[TMP2]] ; %1 = icmp slt i32 %a, -32768 %2 = trunc i32 %a to i16 @@ -103,14 +96,13 @@ ; parts of instcombine. define i32 @t8(i64 %a, i32 %b) { ; CHECK-LABEL: @t8( -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i64 [[A:%.*]], -32767 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[A]], i64 -32767 -; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32 -; CHECK-NEXT: [[TMP4:%.*]] = icmp slt i32 [[B:%.*]], 42 -; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 42, i32 [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], [[B]] -; CHECK-NEXT: [[TMP7:%.*]] = zext i1 [[TMP6]] to i32 -; CHECK-NEXT: ret i32 [[TMP7]] +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.smin.i64(i64 [[A:%.*]], i64 -32767) +; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[B:%.*]], 42 +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 42, i32 [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], [[B]] +; CHECK-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32 +; CHECK-NEXT: ret i32 [[TMP6]] ; %1 = icmp slt i64 %a, -32767 %2 = select i1 %1, i64 %a, i64 -32767 @@ -138,9 +130,8 @@ define float @t10(i32 %x) { ; CHECK-LABEL: @t10( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], 255 -; CHECK-NEXT: [[R1:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 255 -; CHECK-NEXT: [[TMP2:%.*]] = sitofp i32 [[R1]] to float +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 255) +; CHECK-NEXT: [[TMP2:%.*]] = sitofp i32 [[TMP1]] to float ; CHECK-NEXT: ret float [[TMP2]] ; %f_x = sitofp i32 %x to float @@ -151,9 +142,8 @@ define float @t11(i64 %x) { ; CHECK-LABEL: @t11( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i64 [[X:%.*]], 255 -; CHECK-NEXT: [[R1:%.*]] = select i1 [[TMP1]], i64 [[X]], i64 255 -; CHECK-NEXT: [[TMP2:%.*]] = sitofp i64 [[R1]] to float +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.smax.i64(i64 [[X:%.*]], i64 255) +; CHECK-NEXT: [[TMP2:%.*]] = sitofp i64 [[TMP1]] to float ; CHECK-NEXT: ret float [[TMP2]] ; %f_x = sitofp i64 %x to float @@ -208,8 +198,7 @@ ; CHECK-LABEL: @bitcasts_icmp( ; CHECK-NEXT: [[T0:%.*]] = bitcast <2 x i64> [[A:%.*]] to <4 x i32> ; CHECK-NEXT: [[T1:%.*]] = bitcast <2 x i64> [[B:%.*]] to <4 x i32> -; CHECK-NEXT: [[T2:%.*]] = icmp slt <4 x i32> [[T1]], [[T0]] -; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[T2]], <4 x i32> [[T0]], <4 x i32> [[T1]] +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[T1]], <4 x i32> [[T0]]) ; CHECK-NEXT: [[T5:%.*]] = bitcast <4 x i32> [[TMP1]] to <4 x float> ; CHECK-NEXT: ret <4 x float> [[T5]] ; @@ -225,9 +214,8 @@ ; SMIN(SMIN(X, 11), 92) -> SMIN(X, 11) define i32 @test68(i32 %x) { ; CHECK-LABEL: @test68( -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], 11 -; CHECK-NEXT: [[COND:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 11 -; CHECK-NEXT: ret i32 [[COND]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[X:%.*]], i32 11) +; CHECK-NEXT: ret i32 [[TMP1]] ; %cmp = icmp slt i32 11, %x %cond = select i1 %cmp, i32 11, i32 %x @@ -238,9 +226,8 @@ define <2 x i32> @test68vec(<2 x i32> %x) { ; CHECK-LABEL: @test68vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[COND:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[X]], <2 x i32> -; CHECK-NEXT: ret <2 x i32> [[COND]] +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> ) +; CHECK-NEXT: ret <2 x i32> [[TMP1]] ; %cmp = icmp slt <2 x i32> , %x %cond = select <2 x i1> %cmp, <2 x i32> , <2 x i32> %x @@ -252,9 +239,8 @@ ; MIN(MIN(X, 24), 83) -> MIN(X, 24) define i32 @test69(i32 %x) { ; CHECK-LABEL: @test69( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[X:%.*]], 24 -; CHECK-NEXT: [[COND:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 24 -; CHECK-NEXT: ret i32 [[COND]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umin.i32(i32 [[X:%.*]], i32 24) +; CHECK-NEXT: ret i32 [[TMP1]] ; %cmp = icmp ult i32 24, %x %cond = select i1 %cmp, i32 24, i32 %x @@ -266,9 +252,8 @@ ; SMAX(SMAX(X, 75), 36) -> SMAX(X, 75) define i32 @test70(i32 %x) { ; CHECK-LABEL: @test70( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], 75 -; CHECK-NEXT: [[COND:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 75 -; CHECK-NEXT: ret i32 [[COND]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 75) +; CHECK-NEXT: ret i32 [[TMP1]] ; %cmp = icmp slt i32 %x, 75 %cond = select i1 %cmp, i32 75, i32 %x @@ -280,9 +265,8 @@ ; MAX(MAX(X, 68), 47) -> MAX(X, 68) define i32 @test71(i32 %x) { ; CHECK-LABEL: @test71( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[X:%.*]], 68 -; CHECK-NEXT: [[COND:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 68 -; CHECK-NEXT: ret i32 [[COND]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[X:%.*]], i32 68) +; CHECK-NEXT: ret i32 [[TMP1]] ; %cmp = icmp ult i32 %x, 68 %cond = select i1 %cmp, i32 68, i32 %x @@ -294,9 +278,8 @@ ; SMIN(SMIN(X, 92), 11) -> SMIN(X, 11) define i32 @test72(i32 %x) { ; CHECK-LABEL: @test72( -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], 11 -; CHECK-NEXT: [[RETVAL:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 11 -; CHECK-NEXT: ret i32 [[RETVAL]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[X:%.*]], i32 11) +; CHECK-NEXT: ret i32 [[TMP1]] ; %cmp = icmp sgt i32 %x, 92 %cond = select i1 %cmp, i32 92, i32 %x @@ -307,9 +290,8 @@ define <2 x i32> @test72vec(<2 x i32> %x) { ; CHECK-LABEL: @test72vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[RETVAL:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[X]], <2 x i32> -; CHECK-NEXT: ret <2 x i32> [[RETVAL]] +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> ) +; CHECK-NEXT: ret <2 x i32> [[TMP1]] ; %cmp = icmp sgt <2 x i32> %x, %cond = select <2 x i1> %cmp, <2 x i32> , <2 x i32> %x @@ -321,9 +303,8 @@ ; MIN(MIN(X, 83), 24) -> MIN(X, 24) define i32 @test73(i32 %x) { ; CHECK-LABEL: @test73( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[X:%.*]], 24 -; CHECK-NEXT: [[RETVAL:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 24 -; CHECK-NEXT: ret i32 [[RETVAL]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umin.i32(i32 [[X:%.*]], i32 24) +; CHECK-NEXT: ret i32 [[TMP1]] ; %cmp = icmp ugt i32 %x, 83 %cond = select i1 %cmp, i32 83, i32 %x @@ -335,9 +316,9 @@ ; SMAX(SMAX(X, 36), 75) -> SMAX(X, 75) define i32 @test74(i32 %x) { ; CHECK-LABEL: @test74( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], 75 -; CHECK-NEXT: [[RETVAL:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 75 -; CHECK-NEXT: ret i32 [[RETVAL]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 36) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.umax.i32(i32 [[TMP1]], i32 75) +; CHECK-NEXT: ret i32 [[TMP2]] ; %cmp = icmp slt i32 %x, 36 %cond = select i1 %cmp, i32 36, i32 %x @@ -349,9 +330,8 @@ ; MAX(MAX(X, 47), 68) -> MAX(X, 68) define i32 @test75(i32 %x) { ; CHECK-LABEL: @test75( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[X:%.*]], 68 -; CHECK-NEXT: [[RETVAL:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 68 -; CHECK-NEXT: ret i32 [[RETVAL]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[X:%.*]], i32 68) +; CHECK-NEXT: ret i32 [[TMP1]] ; %cmp = icmp ult i32 %x, 47 %cond = select i1 %cmp, i32 47, i32 %x @@ -367,11 +347,9 @@ define i32 @clamp_signed1(i32 %x) { ; CHECK-LABEL: @clamp_signed1( -; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[X:%.*]], 255 -; CHECK-NEXT: [[MIN:%.*]] = select i1 [[CMP2]], i32 [[X]], i32 255 -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[MIN]], 15 -; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP1]], i32 [[MIN]], i32 15 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[X:%.*]], i32 255) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP1]], i32 15) +; CHECK-NEXT: ret i32 [[TMP2]] ; %cmp2 = icmp slt i32 %x, 255 %min = select i1 %cmp2, i32 %x, i32 255 @@ -384,11 +362,9 @@ define i32 @clamp_signed2(i32 %x) { ; CHECK-LABEL: @clamp_signed2( -; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[X:%.*]], 15 -; CHECK-NEXT: [[MAX:%.*]] = select i1 [[CMP2]], i32 [[X]], i32 15 -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[MAX]], 255 -; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP1]], i32 [[MAX]], i32 255 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 15) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 255) +; CHECK-NEXT: ret i32 [[TMP2]] ; %cmp2 = icmp sgt i32 %x, 15 %max = select i1 %cmp2, i32 %x, i32 15 @@ -401,11 +377,9 @@ define i32 @clamp_signed3(i32 %x) { ; CHECK-LABEL: @clamp_signed3( -; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[X:%.*]], 255 -; CHECK-NEXT: [[MIN:%.*]] = select i1 [[CMP2]], i32 [[X]], i32 255 -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[MIN]], 15 -; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP1]], i32 [[MIN]], i32 15 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[X:%.*]], i32 255) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP1]], i32 15) +; CHECK-NEXT: ret i32 [[TMP2]] ; %cmp2 = icmp slt i32 %x, 255 %min = select i1 %cmp2, i32 %x, i32 255 @@ -418,11 +392,9 @@ define i32 @clamp_signed4(i32 %x) { ; CHECK-LABEL: @clamp_signed4( -; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[X:%.*]], 15 -; CHECK-NEXT: [[MAX:%.*]] = select i1 [[CMP2]], i32 [[X]], i32 15 -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[MAX]], 255 -; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP1]], i32 [[MAX]], i32 255 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 15) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 255) +; CHECK-NEXT: ret i32 [[TMP2]] ; %cmp2 = icmp sgt i32 %x, 15 %max = select i1 %cmp2, i32 %x, i32 15 @@ -435,11 +407,9 @@ define i32 @clamp_unsigned1(i32 %x) { ; CHECK-LABEL: @clamp_unsigned1( -; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 [[X:%.*]], 255 -; CHECK-NEXT: [[MIN:%.*]] = select i1 [[CMP2]], i32 [[X]], i32 255 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[MIN]], 15 -; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP1]], i32 [[MIN]], i32 15 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umin.i32(i32 [[X:%.*]], i32 255) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.umax.i32(i32 [[TMP1]], i32 15) +; CHECK-NEXT: ret i32 [[TMP2]] ; %cmp2 = icmp ult i32 %x, 255 %min = select i1 %cmp2, i32 %x, i32 255 @@ -452,11 +422,9 @@ define i32 @clamp_unsigned2(i32 %x) { ; CHECK-LABEL: @clamp_unsigned2( -; CHECK-NEXT: [[CMP2:%.*]] = icmp ugt i32 [[X:%.*]], 15 -; CHECK-NEXT: [[MAX:%.*]] = select i1 [[CMP2]], i32 [[X]], i32 15 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[MAX]], 255 -; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP1]], i32 [[MAX]], i32 255 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[X:%.*]], i32 15) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 255) +; CHECK-NEXT: ret i32 [[TMP2]] ; %cmp2 = icmp ugt i32 %x, 15 %max = select i1 %cmp2, i32 %x, i32 15 @@ -469,11 +437,9 @@ define i32 @clamp_unsigned3(i32 %x) { ; CHECK-LABEL: @clamp_unsigned3( -; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 [[X:%.*]], 255 -; CHECK-NEXT: [[MIN:%.*]] = select i1 [[CMP2]], i32 [[X]], i32 255 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[MIN]], 15 -; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP1]], i32 [[MIN]], i32 15 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umin.i32(i32 [[X:%.*]], i32 255) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.umax.i32(i32 [[TMP1]], i32 15) +; CHECK-NEXT: ret i32 [[TMP2]] ; %cmp2 = icmp ult i32 %x, 255 %min = select i1 %cmp2, i32 %x, i32 255 @@ -486,11 +452,9 @@ define i32 @clamp_unsigned4(i32 %x) { ; CHECK-LABEL: @clamp_unsigned4( -; CHECK-NEXT: [[CMP2:%.*]] = icmp ugt i32 [[X:%.*]], 15 -; CHECK-NEXT: [[MAX:%.*]] = select i1 [[CMP2]], i32 [[X]], i32 15 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[MAX]], 255 -; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP1]], i32 [[MAX]], i32 255 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[X:%.*]], i32 15) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 255) +; CHECK-NEXT: ret i32 [[TMP2]] ; %cmp2 = icmp ugt i32 %x, 15 %max = select i1 %cmp2, i32 %x, i32 15 @@ -504,11 +468,9 @@ ; (icmp sgt smin(PositiveA, B) 0) -> (icmp sgt B 0) define i32 @clamp_check_for_no_infinite_loop1(i32 %i) { ; CHECK-LABEL: @clamp_check_for_no_infinite_loop1( -; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[I:%.*]], 255 -; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CMP1]], i32 [[I]], i32 255 -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[SEL1]], 0 -; CHECK-NEXT: [[RES:%.*]] = select i1 [[TMP1]], i32 [[SEL1]], i32 0 -; CHECK-NEXT: ret i32 [[RES]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[I:%.*]], i32 255) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP1]], i32 0) +; CHECK-NEXT: ret i32 [[TMP2]] ; %cmp1 = icmp slt i32 %i, 255 %sel1 = select i1 %cmp1, i32 %i, i32 255 @@ -520,11 +482,9 @@ ; (icmp slt smax(NegativeA, B) 0) -> (icmp slt B 0) define i32 @clamp_check_for_no_infinite_loop2(i32 %i) { ; CHECK-LABEL: @clamp_check_for_no_infinite_loop2( -; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[I:%.*]], -255 -; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CMP1]], i32 [[I]], i32 -255 -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[SEL1]], 0 -; CHECK-NEXT: [[RES:%.*]] = select i1 [[TMP1]], i32 [[SEL1]], i32 0 -; CHECK-NEXT: ret i32 [[RES]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[I:%.*]], i32 -255) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP1]], i32 0) +; CHECK-NEXT: ret i32 [[TMP2]] ; %cmp1 = icmp sgt i32 %i, -255 %sel1 = select i1 %cmp1, i32 %i, i32 -255 @@ -537,13 +497,11 @@ ; (icmp slt smax(PositiveA, B) 2) -> (icmp eq B 1) define i32 @clamp_check_for_no_infinite_loop3(i32 %i) { ; CHECK-LABEL: @clamp_check_for_no_infinite_loop3( -; CHECK-NEXT: [[I2:%.*]] = icmp sgt i32 [[I:%.*]], 1 -; CHECK-NEXT: [[I3:%.*]] = select i1 [[I2]], i32 [[I]], i32 1 ; CHECK-NEXT: br i1 true, label [[TRUELABEL:%.*]], label [[FALSELABEL:%.*]] ; CHECK: truelabel: -; CHECK-NEXT: [[I5:%.*]] = icmp slt i32 [[I3]], 2 -; CHECK-NEXT: [[I6:%.*]] = select i1 [[I5]], i32 [[I3]], i32 2 -; CHECK-NEXT: [[I7:%.*]] = shl nuw nsw i32 [[I6]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[I:%.*]], i32 1) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 2) +; CHECK-NEXT: [[I7:%.*]] = shl nuw nsw i32 [[TMP2]], 2 ; CHECK-NEXT: ret i32 [[I7]] ; CHECK: falselabel: ; CHECK-NEXT: ret i32 0 @@ -568,9 +526,8 @@ define double @PR31751_umin1(i32 %x) { ; CHECK-LABEL: @PR31751_umin1( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[X:%.*]], 2147483647 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 2147483647 -; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[SEL]] to double +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umin.i32(i32 [[X:%.*]], i32 2147483647) +; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double ; CHECK-NEXT: ret double [[CONV]] ; %cmp = icmp slt i32 %x, 0 @@ -581,9 +538,8 @@ define double @PR31751_umin2(i32 %x) { ; CHECK-LABEL: @PR31751_umin2( -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[X:%.*]], 2147483647 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 [[X]], i32 2147483647 -; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[SEL]] to double +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umin.i32(i32 [[X:%.*]], i32 2147483647) +; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double ; CHECK-NEXT: ret double [[CONV]] ; %cmp = icmp ult i32 %x, 2147483647 @@ -594,9 +550,8 @@ define double @PR31751_umin3(i32 %x) { ; CHECK-LABEL: @PR31751_umin3( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[X:%.*]], 2147483647 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 2147483647 -; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[SEL]] to double +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umin.i32(i32 [[X:%.*]], i32 2147483647) +; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double ; CHECK-NEXT: ret double [[CONV]] ; %cmp = icmp ugt i32 %x, 2147483647 @@ -609,9 +564,8 @@ define double @PR31751_umax1(i32 %x) { ; CHECK-LABEL: @PR31751_umax1( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[X:%.*]], -2147483648 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 -2147483648 -; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[SEL]] to double +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[X:%.*]], i32 -2147483648) +; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double ; CHECK-NEXT: ret double [[CONV]] ; %cmp = icmp sgt i32 %x, -1 @@ -622,9 +576,8 @@ define double @PR31751_umax2(i32 %x) { ; CHECK-LABEL: @PR31751_umax2( -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[X:%.*]], -2147483648 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 [[X]], i32 -2147483648 -; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[SEL]] to double +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[X:%.*]], i32 -2147483648) +; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double ; CHECK-NEXT: ret double [[CONV]] ; %cmp = icmp ugt i32 %x, 2147483648 @@ -635,9 +588,8 @@ define double @PR31751_umax3(i32 %x) { ; CHECK-LABEL: @PR31751_umax3( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[X:%.*]], -2147483648 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 -2147483648 -; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[SEL]] to double +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[X:%.*]], i32 -2147483648) +; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double ; CHECK-NEXT: ret double [[CONV]] ; %cmp = icmp ult i32 %x, 2147483648 @@ -652,9 +604,8 @@ ; CHECK-LABEL: @bitcast_scalar_smax( ; CHECK-NEXT: [[BCX:%.*]] = bitcast float [[X:%.*]] to i32 ; CHECK-NEXT: [[BCY:%.*]] = bitcast float [[Y:%.*]] to i32 -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[BCX]], [[BCY]] -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 [[BCX]], i32 [[BCY]] -; CHECK-NEXT: [[BCS:%.*]] = bitcast i32 [[SEL]] to float +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[BCX]], i32 [[BCY]]) +; CHECK-NEXT: [[BCS:%.*]] = bitcast i32 [[TMP1]] to float ; CHECK-NEXT: ret float [[BCS]] ; %bcx = bitcast float %x to i32 @@ -689,9 +640,8 @@ ; CHECK-LABEL: @bitcast_vector_smin( ; CHECK-NEXT: [[BCX:%.*]] = bitcast <8 x float> [[X:%.*]] to <8 x i32> ; CHECK-NEXT: [[BCY:%.*]] = bitcast <8 x float> [[Y:%.*]] to <8 x i32> -; CHECK-NEXT: [[CMP:%.*]] = icmp slt <8 x i32> [[BCX]], [[BCY]] -; CHECK-NEXT: [[SEL:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[BCX]], <8 x i32> [[BCY]] -; CHECK-NEXT: [[BCS:%.*]] = bitcast <8 x i32> [[SEL]] to <8 x float> +; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.smin.v8i32(<8 x i32> [[BCX]], <8 x i32> [[BCY]]) +; CHECK-NEXT: [[BCS:%.*]] = bitcast <8 x i32> [[TMP1]] to <8 x float> ; CHECK-NEXT: ret <8 x float> [[BCS]] ; %bcx = bitcast <8 x float> %x to <8 x i32> @@ -723,9 +673,8 @@ define zeroext i8 @look_through_cast1(i32 %x) { ; CHECK-LABEL: @look_through_cast1( -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], 511 -; CHECK-NEXT: [[RES1:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 511 -; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[RES1]] to i8 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[X:%.*]], i32 511) +; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 ; CHECK-NEXT: ret i8 [[TMP2]] ; %cmp1 = icmp slt i32 %x, 511 @@ -751,9 +700,8 @@ define <2 x i8> @min_through_cast_vec1(<2 x i32> %x) { ; CHECK-LABEL: @min_through_cast_vec1( -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[RES1:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[X]], <2 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = trunc <2 x i32> [[RES1]] to <2 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> ) +; CHECK-NEXT: [[TMP2:%.*]] = trunc <2 x i32> [[TMP1]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[TMP2]] ; %cmp = icmp slt <2 x i32> %x, @@ -764,9 +712,8 @@ define <2 x i8> @min_through_cast_vec2(<2 x i32> %x) { ; CHECK-LABEL: @min_through_cast_vec2( -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[RES1:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[X]], <2 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = trunc <2 x i32> [[RES1]] to <2 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> ) +; CHECK-NEXT: [[TMP2:%.*]] = trunc <2 x i32> [[TMP1]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[TMP2]] ; %cmp = icmp slt <2 x i32> %x, @@ -782,11 +729,9 @@ define i32 @common_factor_smin(i32 %a, i32 %b, i32 %c) { ; CHECK-LABEL: @common_factor_smin( -; CHECK-NEXT: [[CMP_AB:%.*]] = icmp slt i32 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[MIN_AB:%.*]] = select i1 [[CMP_AB]], i32 [[A]], i32 [[B]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[MIN_AB]], [[C:%.*]] -; CHECK-NEXT: [[MIN_ABC:%.*]] = select i1 [[TMP1]], i32 [[MIN_AB]], i32 [[C]] -; CHECK-NEXT: ret i32 [[MIN_ABC]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[B:%.*]], i32 [[C:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP1]], i32 [[A:%.*]]) +; CHECK-NEXT: ret i32 [[TMP2]] ; %cmp_ab = icmp slt i32 %a, %b %min_ab = select i1 %cmp_ab, i32 %a, i32 %b @@ -801,11 +746,9 @@ define <2 x i32> @common_factor_smax(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) { ; CHECK-LABEL: @common_factor_smax( -; CHECK-NEXT: [[CMP_AB:%.*]] = icmp sgt <2 x i32> [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[MAX_AB:%.*]] = select <2 x i1> [[CMP_AB]], <2 x i32> [[A]], <2 x i32> [[B]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <2 x i32> [[MAX_AB]], [[C:%.*]] -; CHECK-NEXT: [[MAX_ABC:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[MAX_AB]], <2 x i32> [[C]] -; CHECK-NEXT: ret <2 x i32> [[MAX_ABC]] +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[C:%.*]], <2 x i32> [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[A:%.*]]) +; CHECK-NEXT: ret <2 x i32> [[TMP2]] ; %cmp_ab = icmp sgt <2 x i32> %a, %b %max_ab = select <2 x i1> %cmp_ab, <2 x i32> %a, <2 x i32> %b @@ -820,11 +763,9 @@ define <2 x i32> @common_factor_umin(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) { ; CHECK-LABEL: @common_factor_umin( -; CHECK-NEXT: [[CMP_BC:%.*]] = icmp ult <2 x i32> [[B:%.*]], [[C:%.*]] -; CHECK-NEXT: [[MIN_BC:%.*]] = select <2 x i1> [[CMP_BC]], <2 x i32> [[B]], <2 x i32> [[C]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i32> [[MIN_BC]], [[A:%.*]] -; CHECK-NEXT: [[MIN_ABC:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[MIN_BC]], <2 x i32> [[A]] -; CHECK-NEXT: ret <2 x i32> [[MIN_ABC]] +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[C:%.*]]) +; CHECK-NEXT: ret <2 x i32> [[TMP2]] ; %cmp_bc = icmp ult <2 x i32> %b, %c %min_bc = select <2 x i1> %cmp_bc, <2 x i32> %b, <2 x i32> %c @@ -839,11 +780,9 @@ define i32 @common_factor_umax(i32 %a, i32 %b, i32 %c) { ; CHECK-LABEL: @common_factor_umax( -; CHECK-NEXT: [[CMP_BC:%.*]] = icmp ugt i32 [[B:%.*]], [[C:%.*]] -; CHECK-NEXT: [[MAX_BC:%.*]] = select i1 [[CMP_BC]], i32 [[B]], i32 [[C]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[MAX_BC]], [[A:%.*]] -; CHECK-NEXT: [[MAX_ABC:%.*]] = select i1 [[TMP1]], i32 [[MAX_BC]], i32 [[A]] -; CHECK-NEXT: ret i32 [[MAX_ABC]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[B:%.*]], i32 [[A:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.umax.i32(i32 [[TMP1]], i32 [[C:%.*]]) +; CHECK-NEXT: ret i32 [[TMP2]] ; %cmp_bc = icmp ugt i32 %b, %c %max_bc = select i1 %cmp_bc, i32 %b, i32 %c @@ -858,12 +797,10 @@ define i32 @common_factor_umax_extra_use_lhs(i32 %a, i32 %b, i32 %c) { ; CHECK-LABEL: @common_factor_umax_extra_use_lhs( -; CHECK-NEXT: [[CMP_BC:%.*]] = icmp ugt i32 [[B:%.*]], [[C:%.*]] -; CHECK-NEXT: [[MAX_BC:%.*]] = select i1 [[CMP_BC]], i32 [[B]], i32 [[C]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[MAX_BC]], [[A:%.*]] -; CHECK-NEXT: [[MAX_ABC:%.*]] = select i1 [[TMP1]], i32 [[MAX_BC]], i32 [[A]] -; CHECK-NEXT: call void @extra_use(i32 [[MAX_BC]]) -; CHECK-NEXT: ret i32 [[MAX_ABC]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[B:%.*]], i32 [[C:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.umax.i32(i32 [[TMP1]], i32 [[A:%.*]]) +; CHECK-NEXT: call void @extra_use(i32 [[TMP1]]) +; CHECK-NEXT: ret i32 [[TMP2]] ; %cmp_bc = icmp ugt i32 %b, %c %max_bc = select i1 %cmp_bc, i32 %b, i32 %c @@ -877,12 +814,10 @@ define i32 @common_factor_umax_extra_use_rhs(i32 %a, i32 %b, i32 %c) { ; CHECK-LABEL: @common_factor_umax_extra_use_rhs( -; CHECK-NEXT: [[CMP_BA:%.*]] = icmp ugt i32 [[B:%.*]], [[A:%.*]] -; CHECK-NEXT: [[MAX_BA:%.*]] = select i1 [[CMP_BA]], i32 [[B]], i32 [[A]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[MAX_BA]], [[C:%.*]] -; CHECK-NEXT: [[MAX_ABC:%.*]] = select i1 [[TMP1]], i32 [[MAX_BA]], i32 [[C]] -; CHECK-NEXT: call void @extra_use(i32 [[MAX_BA]]) -; CHECK-NEXT: ret i32 [[MAX_ABC]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[B:%.*]], i32 [[A:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.umax.i32(i32 [[TMP1]], i32 [[C:%.*]]) +; CHECK-NEXT: call void @extra_use(i32 [[TMP1]]) +; CHECK-NEXT: ret i32 [[TMP2]] ; %cmp_bc = icmp ugt i32 %b, %c %max_bc = select i1 %cmp_bc, i32 %b, i32 %c @@ -896,15 +831,12 @@ define i32 @common_factor_umax_extra_use_both(i32 %a, i32 %b, i32 %c) { ; CHECK-LABEL: @common_factor_umax_extra_use_both( -; CHECK-NEXT: [[CMP_BC:%.*]] = icmp ugt i32 [[B:%.*]], [[C:%.*]] -; CHECK-NEXT: [[MAX_BC:%.*]] = select i1 [[CMP_BC]], i32 [[B]], i32 [[C]] -; CHECK-NEXT: [[CMP_BA:%.*]] = icmp ugt i32 [[B]], [[A:%.*]] -; CHECK-NEXT: [[MAX_BA:%.*]] = select i1 [[CMP_BA]], i32 [[B]], i32 [[A]] -; CHECK-NEXT: [[CMP_BC_BA:%.*]] = icmp ugt i32 [[MAX_BC]], [[MAX_BA]] -; CHECK-NEXT: [[MAX_ABC:%.*]] = select i1 [[CMP_BC_BA]], i32 [[MAX_BC]], i32 [[MAX_BA]] -; CHECK-NEXT: call void @extra_use(i32 [[MAX_BC]]) -; CHECK-NEXT: call void @extra_use(i32 [[MAX_BA]]) -; CHECK-NEXT: ret i32 [[MAX_ABC]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[B:%.*]], i32 [[C:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.umax.i32(i32 [[B]], i32 [[A:%.*]]) +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.umax.i32(i32 [[TMP1]], i32 [[TMP2]]) +; CHECK-NEXT: call void @extra_use(i32 [[TMP1]]) +; CHECK-NEXT: call void @extra_use(i32 [[TMP2]]) +; CHECK-NEXT: ret i32 [[TMP3]] ; %cmp_bc = icmp ugt i32 %b, %c %max_bc = select i1 %cmp_bc, i32 %b, i32 %c @@ -940,10 +872,9 @@ define i32 @add_umin(i32 %x) { ; CHECK-LABEL: @add_umin( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[X:%.*]], 27 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 27 -; CHECK-NEXT: [[R:%.*]] = add nuw nsw i32 [[TMP2]], 15 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umin.i32(i32 [[X:%.*]], i32 27) +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[TMP1]], 15 +; CHECK-NEXT: ret i32 [[TMP2]] ; %a = add nuw i32 %x, 15 %c = icmp ult i32 %a, 42 @@ -954,8 +885,8 @@ define i32 @add_umin_constant_limit(i32 %x) { ; CHECK-LABEL: @add_umin_constant_limit( ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[X:%.*]], 0 -; CHECK-NEXT: [[R:%.*]] = select i1 [[DOTNOT]], i32 41, i32 42 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[DOTNOT]], i32 41, i32 42 +; CHECK-NEXT: ret i32 [[TMP1]] ; %a = add nuw i32 %x, 41 %c = icmp ult i32 %a, 42 @@ -994,9 +925,8 @@ define i32 @add_umin_wrong_pred(i32 %x) { ; CHECK-LABEL: @add_umin_wrong_pred( ; CHECK-NEXT: [[A:%.*]] = add nuw i32 [[X:%.*]], 15 -; CHECK-NEXT: [[C:%.*]] = icmp slt i32 [[A]], 42 -; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 42 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[A]], i32 42) +; CHECK-NEXT: ret i32 [[TMP1]] ; %a = add nuw i32 %x, 15 %c = icmp slt i32 %a, 42 @@ -1009,9 +939,8 @@ define i32 @add_umin_wrong_wrap(i32 %x) { ; CHECK-LABEL: @add_umin_wrong_wrap( ; CHECK-NEXT: [[A:%.*]] = add nsw i32 [[X:%.*]], 15 -; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[A]], 42 -; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 42 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umin.i32(i32 [[A]], i32 42) +; CHECK-NEXT: ret i32 [[TMP1]] ; %a = add nsw i32 %x, 15 %c = icmp ult i32 %a, 42 @@ -1025,9 +954,8 @@ ; CHECK-LABEL: @add_umin_extra_use( ; CHECK-NEXT: [[A:%.*]] = add nuw i32 [[X:%.*]], 15 ; CHECK-NEXT: store i32 [[A]], i32* [[P:%.*]], align 4 -; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[A]], 42 -; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 42 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umin.i32(i32 [[A]], i32 42) +; CHECK-NEXT: ret i32 [[TMP1]] ; %a = add nuw i32 %x, 15 store i32 %a, i32* %p @@ -1038,10 +966,9 @@ define <2 x i16> @add_umin_vec(<2 x i16> %x) { ; CHECK-LABEL: @add_umin_vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i16> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i16> [[X]], <2 x i16> -; CHECK-NEXT: [[R:%.*]] = add nuw nsw <2 x i16> [[TMP2]], -; CHECK-NEXT: ret <2 x i16> [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i16> @llvm.umin.v2i16(<2 x i16> [[X:%.*]], <2 x i16> ) +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw <2 x i16> [[TMP1]], +; CHECK-NEXT: ret <2 x i16> [[TMP2]] ; %a = add nuw <2 x i16> %x, %c = icmp ult <2 x i16> %a, @@ -1051,10 +978,9 @@ define i37 @add_umax(i37 %x) { ; CHECK-LABEL: @add_umax( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i37 [[X:%.*]], 37 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i37 [[X]], i37 37 -; CHECK-NEXT: [[R:%.*]] = add nuw i37 [[TMP2]], 5 -; CHECK-NEXT: ret i37 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call i37 @llvm.umax.i37(i37 [[X:%.*]], i37 37) +; CHECK-NEXT: [[TMP2:%.*]] = add nuw i37 [[TMP1]], 5 +; CHECK-NEXT: ret i37 [[TMP2]] ; %a = add nuw i37 %x, 5 %c = icmp ugt i37 %a, 42 @@ -1064,10 +990,9 @@ define i37 @add_umax_constant_limit(i37 %x) { ; CHECK-LABEL: @add_umax_constant_limit( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i37 [[X:%.*]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i37 [[X]], i37 1 -; CHECK-NEXT: [[R:%.*]] = add nuw i37 [[TMP2]], 81 -; CHECK-NEXT: ret i37 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call i37 @llvm.umax.i37(i37 [[X:%.*]], i37 1) +; CHECK-NEXT: [[TMP2:%.*]] = add nuw i37 [[TMP1]], 81 +; CHECK-NEXT: ret i37 [[TMP2]] ; %a = add nuw i37 %x, 81 %c = icmp ugt i37 %a, 82 @@ -1080,8 +1005,8 @@ define i37 @add_umax_simplify(i37 %x) { ; CHECK-LABEL: @add_umax_simplify( -; CHECK-NEXT: [[R:%.*]] = add nuw i37 [[X:%.*]], 42 -; CHECK-NEXT: ret i37 [[R]] +; CHECK-NEXT: [[A:%.*]] = add nuw i37 [[X:%.*]], 42 +; CHECK-NEXT: ret i37 [[A]] ; %a = add nuw i37 %x, 42 %c = icmp ugt i37 %a, 42 @@ -1108,9 +1033,8 @@ define i32 @add_umax_wrong_pred(i32 %x) { ; CHECK-LABEL: @add_umax_wrong_pred( ; CHECK-NEXT: [[A:%.*]] = add nuw i32 [[X:%.*]], 15 -; CHECK-NEXT: [[C:%.*]] = icmp sgt i32 [[A]], 42 -; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 42 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[A]], i32 42) +; CHECK-NEXT: ret i32 [[TMP1]] ; %a = add nuw i32 %x, 15 %c = icmp sgt i32 %a, 42 @@ -1127,9 +1051,8 @@ define i32 @add_umax_wrong_wrap(i32 %x) { ; CHECK-LABEL: @add_umax_wrong_wrap( ; CHECK-NEXT: [[A:%.*]] = add nsw i32 [[X:%.*]], 15 -; CHECK-NEXT: [[C:%.*]] = icmp ugt i32 [[A]], 42 -; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 42 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[A]], i32 42) +; CHECK-NEXT: ret i32 [[TMP1]] ; %a = add nsw i32 %x, 15 %c = icmp ugt i32 %a, 42 @@ -1143,9 +1066,8 @@ ; CHECK-LABEL: @add_umax_extra_use( ; CHECK-NEXT: [[A:%.*]] = add nuw i32 [[X:%.*]], 15 ; CHECK-NEXT: store i32 [[A]], i32* [[P:%.*]], align 4 -; CHECK-NEXT: [[C:%.*]] = icmp ugt i32 [[A]], 42 -; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 42 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[A]], i32 42) +; CHECK-NEXT: ret i32 [[TMP1]] ; %a = add nuw i32 %x, 15 store i32 %a, i32* %p @@ -1156,10 +1078,9 @@ define <2 x i33> @add_umax_vec(<2 x i33> %x) { ; CHECK-LABEL: @add_umax_vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <2 x i33> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i33> [[X]], <2 x i33> -; CHECK-NEXT: [[R:%.*]] = add nuw <2 x i33> [[TMP2]], -; CHECK-NEXT: ret <2 x i33> [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i33> @llvm.umax.v2i33(<2 x i33> [[X:%.*]], <2 x i33> ) +; CHECK-NEXT: [[TMP2:%.*]] = add nuw <2 x i33> [[TMP1]], +; CHECK-NEXT: ret <2 x i33> [[TMP2]] ; %a = add nuw <2 x i33> %x, %c = icmp ugt <2 x i33> %a, @@ -1169,8 +1090,8 @@ define i8 @PR14613_umin(i8 %x) { ; CHECK-LABEL: @PR14613_umin( -; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.uadd.sat.i8(i8 [[X:%.*]], i8 15) -; CHECK-NEXT: ret i8 [[TMP1]] +; CHECK-NEXT: [[NARROW:%.*]] = call i8 @llvm.uadd.sat.i8(i8 [[X:%.*]], i8 15) +; CHECK-NEXT: ret i8 [[NARROW]] ; %u4 = zext i8 %x to i32 %u5 = add nuw nsw i32 %u4, 15 @@ -1182,10 +1103,9 @@ define i8 @PR14613_umax(i8 %x) { ; CHECK-LABEL: @PR14613_umax( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i8 [[X:%.*]], -16 -; CHECK-NEXT: [[X_OP:%.*]] = add i8 [[X]], 15 -; CHECK-NEXT: [[U7:%.*]] = select i1 [[TMP1]], i8 [[X_OP]], i8 -1 -; CHECK-NEXT: ret i8 [[U7]] +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.umax.i8(i8 [[X:%.*]], i8 -16) +; CHECK-NEXT: [[TMP2:%.*]] = add nsw i8 [[TMP1]], 15 +; CHECK-NEXT: ret i8 [[TMP2]] ; %u4 = zext i8 %x to i32 %u5 = add nuw nsw i32 %u4, 15 @@ -1197,10 +1117,9 @@ define i32 @add_smin(i32 %x) { ; CHECK-LABEL: @add_smin( -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], 27 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 27 -; CHECK-NEXT: [[R:%.*]] = add nsw i32 [[TMP2]], 15 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[X:%.*]], i32 27) +; CHECK-NEXT: [[TMP2:%.*]] = add nsw i32 [[TMP1]], 15 +; CHECK-NEXT: ret i32 [[TMP2]] ; %a = add nsw i32 %x, 15 %c = icmp slt i32 %a, 42 @@ -1210,10 +1129,9 @@ define i32 @add_smin_constant_limit(i32 %x) { ; CHECK-LABEL: @add_smin_constant_limit( -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], 2147483646 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 2147483646 -; CHECK-NEXT: [[R:%.*]] = add nsw i32 [[TMP2]], -3 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[X:%.*]], i32 2147483646) +; CHECK-NEXT: [[TMP2:%.*]] = add nsw i32 [[TMP1]], -3 +; CHECK-NEXT: ret i32 [[TMP2]] ; %a = add nsw i32 %x, -3 %c = icmp slt i32 %a, 2147483643 @@ -1226,8 +1144,8 @@ define i32 @add_smin_simplify(i32 %x) { ; CHECK-LABEL: @add_smin_simplify( -; CHECK-NEXT: [[R:%.*]] = add nsw i32 [[X:%.*]], -3 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[A:%.*]] = add nsw i32 [[X:%.*]], -3 +; CHECK-NEXT: ret i32 [[A]] ; %a = add nsw i32 %x, -3 %c = icmp slt i32 %a, 2147483644 @@ -1254,9 +1172,8 @@ define i32 @add_smin_wrong_pred(i32 %x) { ; CHECK-LABEL: @add_smin_wrong_pred( ; CHECK-NEXT: [[A:%.*]] = add nsw i32 [[X:%.*]], 15 -; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[A]], 42 -; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 42 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umin.i32(i32 [[A]], i32 42) +; CHECK-NEXT: ret i32 [[TMP1]] ; %a = add nsw i32 %x, 15 %c = icmp ult i32 %a, 42 @@ -1269,9 +1186,8 @@ define i32 @add_smin_wrong_wrap(i32 %x) { ; CHECK-LABEL: @add_smin_wrong_wrap( ; CHECK-NEXT: [[A:%.*]] = add nuw i32 [[X:%.*]], 15 -; CHECK-NEXT: [[C:%.*]] = icmp slt i32 [[A]], 42 -; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 42 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[A]], i32 42) +; CHECK-NEXT: ret i32 [[TMP1]] ; %a = add nuw i32 %x, 15 %c = icmp slt i32 %a, 42 @@ -1285,9 +1201,8 @@ ; CHECK-LABEL: @add_smin_extra_use( ; CHECK-NEXT: [[A:%.*]] = add nsw i32 [[X:%.*]], 15 ; CHECK-NEXT: store i32 [[A]], i32* [[P:%.*]], align 4 -; CHECK-NEXT: [[C:%.*]] = icmp slt i32 [[A]], 42 -; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 42 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[A]], i32 42) +; CHECK-NEXT: ret i32 [[TMP1]] ; %a = add nsw i32 %x, 15 store i32 %a, i32* %p @@ -1298,10 +1213,9 @@ define <2 x i16> @add_smin_vec(<2 x i16> %x) { ; CHECK-LABEL: @add_smin_vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i16> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i16> [[X]], <2 x i16> -; CHECK-NEXT: [[R:%.*]] = add nsw <2 x i16> [[TMP2]], -; CHECK-NEXT: ret <2 x i16> [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i16> @llvm.smin.v2i16(<2 x i16> [[X:%.*]], <2 x i16> ) +; CHECK-NEXT: [[TMP2:%.*]] = add nsw <2 x i16> [[TMP1]], +; CHECK-NEXT: ret <2 x i16> [[TMP2]] ; %a = add nsw <2 x i16> %x, %c = icmp slt <2 x i16> %a, @@ -1311,10 +1225,9 @@ define i37 @add_smax(i37 %x) { ; CHECK-LABEL: @add_smax( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i37 [[X:%.*]], 37 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i37 [[X]], i37 37 -; CHECK-NEXT: [[R:%.*]] = add nuw nsw i37 [[TMP2]], 5 -; CHECK-NEXT: ret i37 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call i37 @llvm.smax.i37(i37 [[X:%.*]], i37 37) +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i37 [[TMP1]], 5 +; CHECK-NEXT: ret i37 [[TMP2]] ; %a = add nsw i37 %x, 5 %c = icmp sgt i37 %a, 42 @@ -1324,10 +1237,9 @@ define i8 @add_smax_constant_limit(i8 %x) { ; CHECK-LABEL: @add_smax_constant_limit( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i8 [[X:%.*]], -127 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i8 [[X]], i8 -127 -; CHECK-NEXT: [[R:%.*]] = add nsw i8 [[TMP2]], 125 -; CHECK-NEXT: ret i8 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.smax.i8(i8 [[X:%.*]], i8 -127) +; CHECK-NEXT: [[TMP2:%.*]] = add nsw i8 [[TMP1]], 125 +; CHECK-NEXT: ret i8 [[TMP2]] ; %a = add nsw i8 %x, 125 %c = icmp sgt i8 %a, -2 @@ -1340,8 +1252,8 @@ define i8 @add_smax_simplify(i8 %x) { ; CHECK-LABEL: @add_smax_simplify( -; CHECK-NEXT: [[R:%.*]] = add nsw i8 [[X:%.*]], 126 -; CHECK-NEXT: ret i8 [[R]] +; CHECK-NEXT: [[A:%.*]] = add nsw i8 [[X:%.*]], 126 +; CHECK-NEXT: ret i8 [[A]] ; %a = add nsw i8 %x, 126 %c = icmp sgt i8 %a, -2 @@ -1368,9 +1280,8 @@ define i32 @add_smax_wrong_pred(i32 %x) { ; CHECK-LABEL: @add_smax_wrong_pred( ; CHECK-NEXT: [[A:%.*]] = add nsw i32 [[X:%.*]], 15 -; CHECK-NEXT: [[C:%.*]] = icmp ugt i32 [[A]], 42 -; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 42 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[A]], i32 42) +; CHECK-NEXT: ret i32 [[TMP1]] ; %a = add nsw i32 %x, 15 %c = icmp ugt i32 %a, 42 @@ -1383,9 +1294,8 @@ define i32 @add_smax_wrong_wrap(i32 %x) { ; CHECK-LABEL: @add_smax_wrong_wrap( ; CHECK-NEXT: [[A:%.*]] = add nuw i32 [[X:%.*]], 15 -; CHECK-NEXT: [[C:%.*]] = icmp sgt i32 [[A]], 42 -; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 42 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[A]], i32 42) +; CHECK-NEXT: ret i32 [[TMP1]] ; %a = add nuw i32 %x, 15 %c = icmp sgt i32 %a, 42 @@ -1399,9 +1309,8 @@ ; CHECK-LABEL: @add_smax_extra_use( ; CHECK-NEXT: [[A:%.*]] = add nsw i32 [[X:%.*]], 15 ; CHECK-NEXT: store i32 [[A]], i32* [[P:%.*]], align 4 -; CHECK-NEXT: [[C:%.*]] = icmp sgt i32 [[A]], 42 -; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 42 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[A]], i32 42) +; CHECK-NEXT: ret i32 [[TMP1]] ; %a = add nsw i32 %x, 15 store i32 %a, i32* %p @@ -1412,10 +1321,9 @@ define <2 x i33> @add_smax_vec(<2 x i33> %x) { ; CHECK-LABEL: @add_smax_vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <2 x i33> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i33> [[X]], <2 x i33> -; CHECK-NEXT: [[R:%.*]] = add nuw nsw <2 x i33> [[TMP2]], -; CHECK-NEXT: ret <2 x i33> [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i33> @llvm.smax.v2i33(<2 x i33> [[X:%.*]], <2 x i33> ) +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw <2 x i33> [[TMP1]], +; CHECK-NEXT: ret <2 x i33> [[TMP2]] ; %a = add nsw <2 x i33> %x, %c = icmp sgt <2 x i33> %a, @@ -1425,10 +1333,9 @@ define i8 @PR14613_smin(i8 %x) { ; CHECK-LABEL: @PR14613_smin( -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i8 [[X:%.*]], 40 -; CHECK-NEXT: [[X_OP:%.*]] = add i8 [[X]], 15 -; CHECK-NEXT: [[U7:%.*]] = select i1 [[TMP1]], i8 [[X_OP]], i8 55 -; CHECK-NEXT: ret i8 [[U7]] +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.smin.i8(i8 [[X:%.*]], i8 40) +; CHECK-NEXT: [[NARROW:%.*]] = add nsw i8 [[TMP1]], 15 +; CHECK-NEXT: ret i8 [[NARROW]] ; %u4 = sext i8 %x to i32 %u5 = add nuw nsw i32 %u4, 15 @@ -1440,10 +1347,9 @@ define i8 @PR14613_smax(i8 %x) { ; CHECK-LABEL: @PR14613_smax( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i8 [[X:%.*]], 40 -; CHECK-NEXT: [[X_OP:%.*]] = add i8 [[X]], 15 -; CHECK-NEXT: [[U7:%.*]] = select i1 [[TMP1]], i8 [[X_OP]], i8 55 -; CHECK-NEXT: ret i8 [[U7]] +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.smax.i8(i8 [[X:%.*]], i8 40) +; CHECK-NEXT: [[NARROW:%.*]] = add nuw i8 [[TMP1]], 15 +; CHECK-NEXT: ret i8 [[NARROW]] ; %u4 = sext i8 %x to i32 %u5 = add nuw nsw i32 %u4, 15 @@ -1471,11 +1377,9 @@ define i32 @twoway_clamp_lt(i32 %num) { ; CHECK-LABEL: @twoway_clamp_lt( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[NUM:%.*]], 13768 -; CHECK-NEXT: [[S1:%.*]] = select i1 [[CMP1]], i32 [[NUM]], i32 13768 -; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[S1]], 13767 -; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP2]], i32 [[S1]], i32 13767 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[NUM:%.*]], 13767 +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i32 13768, i32 13767 +; CHECK-NEXT: ret i32 [[TMP1]] ; entry: %cmp1 = icmp slt i32 %num, 13768 @@ -1488,11 +1392,9 @@ define i32 @twoway_clamp_gt(i32 %num) { ; CHECK-LABEL: @twoway_clamp_gt( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[NUM:%.*]], 13767 -; CHECK-NEXT: [[S1:%.*]] = select i1 [[CMP1]], i32 [[NUM]], i32 13767 -; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[S1]], 13768 -; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP2]], i32 [[S1]], i32 13768 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.smax.i32(i32 [[NUM:%.*]], i32 13767) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 13768) +; CHECK-NEXT: ret i32 [[TMP1]] ; entry: %cmp1 = icmp sgt i32 %num, 13767 @@ -1506,11 +1408,9 @@ ; CHECK-LABEL: @twoway_clamp_gt_nonconst( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[K1:%.*]] = add i32 [[K:%.*]], 1 -; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[NUM:%.*]], [[K]] -; CHECK-NEXT: [[S1:%.*]] = select i1 [[CMP1]], i32 [[NUM]], i32 [[K]] -; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[S1]], [[K1]] -; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP2]], i32 [[S1]], i32 [[K1]] -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.smax.i32(i32 [[NUM:%.*]], i32 [[K]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP0]], i32 [[K1]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; entry: %k1 = add i32 %k, 1 diff --git a/llvm/test/Transforms/InstCombine/minmax-fp.ll b/llvm/test/Transforms/InstCombine/minmax-fp.ll --- a/llvm/test/Transforms/InstCombine/minmax-fp.ll +++ b/llvm/test/Transforms/InstCombine/minmax-fp.ll @@ -244,9 +244,8 @@ define double @t17(i32 %x) { ; CHECK-LABEL: @t17( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], 2 -; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 2 -; CHECK-NEXT: [[TMP2:%.*]] = sitofp i32 [[SEL1]] to double +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 2) +; CHECK-NEXT: [[TMP2:%.*]] = sitofp i32 [[TMP1]] to double ; CHECK-NEXT: ret double [[TMP2]] ; %cmp = icmp sgt i32 %x, 2 diff --git a/llvm/test/Transforms/InstCombine/minmax-of-minmax.ll b/llvm/test/Transforms/InstCombine/minmax-of-minmax.ll --- a/llvm/test/Transforms/InstCombine/minmax-of-minmax.ll +++ b/llvm/test/Transforms/InstCombine/minmax-of-minmax.ll @@ -3,9 +3,8 @@ define i32 @smax_of_smax_smin_commute0(i32 %x, i32 %y) { ; CHECK-LABEL: @smax_of_smax_smin_commute0( -; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[MAX:%.*]] = select i1 [[CMP2]], i32 [[X]], i32 [[Y]] -; CHECK-NEXT: ret i32 [[MAX]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[Y:%.*]], i32 [[X:%.*]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; %cmp1 = icmp slt i32 %x, %y %min = select i1 %cmp1, i32 %x, i32 %y @@ -18,9 +17,8 @@ define i32 @smax_of_smax_smin_commute1(i32 %x, i32 %y) { ; CHECK-LABEL: @smax_of_smax_smin_commute1( -; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[MAX:%.*]] = select i1 [[CMP2]], i32 [[X]], i32 [[Y]] -; CHECK-NEXT: ret i32 [[MAX]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 [[Y:%.*]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; %cmp1 = icmp sgt i32 %x, %y %min = select i1 %cmp1, i32 %y, i32 %x @@ -33,9 +31,8 @@ define i32 @smax_of_smax_smin_commute2(i32 %x, i32 %y) { ; CHECK-LABEL: @smax_of_smax_smin_commute2( -; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[MAX:%.*]] = select i1 [[CMP2]], i32 [[X]], i32 [[Y]] -; CHECK-NEXT: ret i32 [[MAX]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[Y:%.*]], i32 [[X:%.*]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; %cmp1 = icmp slt i32 %x, %y %min = select i1 %cmp1, i32 %x, i32 %y @@ -48,9 +45,8 @@ define <2 x i32> @smax_of_smax_smin_commute3(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @smax_of_smax_smin_commute3( -; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt <2 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[MAX:%.*]] = select <2 x i1> [[CMP2]], <2 x i32> [[X]], <2 x i32> [[Y]] -; CHECK-NEXT: ret <2 x i32> [[MAX]] +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]]) +; CHECK-NEXT: ret <2 x i32> [[TMP1]] ; %cmp1 = icmp sgt <2 x i32> %x, %y %min = select <2 x i1> %cmp1, <2 x i32> %y, <2 x i32> %x @@ -63,9 +59,8 @@ define i32 @smin_of_smin_smax_commute0(i32 %x, i32 %y) { ; CHECK-LABEL: @smin_of_smin_smax_commute0( -; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[MIN:%.*]] = select i1 [[CMP2]], i32 [[X]], i32 [[Y]] -; CHECK-NEXT: ret i32 [[MIN]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[Y:%.*]], i32 [[X:%.*]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; %cmp1 = icmp sgt i32 %x, %y %max = select i1 %cmp1, i32 %x, i32 %y @@ -78,9 +73,8 @@ define i32 @smin_of_smin_smax_commute1(i32 %x, i32 %y) { ; CHECK-LABEL: @smin_of_smin_smax_commute1( -; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[MIN:%.*]] = select i1 [[CMP2]], i32 [[X]], i32 [[Y]] -; CHECK-NEXT: ret i32 [[MIN]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[X:%.*]], i32 [[Y:%.*]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; %cmp1 = icmp slt i32 %x, %y %max = select i1 %cmp1, i32 %y, i32 %x @@ -93,9 +87,8 @@ define <2 x i32> @smin_of_smin_smax_commute2(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @smin_of_smin_smax_commute2( -; CHECK-NEXT: [[CMP2:%.*]] = icmp slt <2 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[MIN:%.*]] = select <2 x i1> [[CMP2]], <2 x i32> [[X]], <2 x i32> [[Y]] -; CHECK-NEXT: ret <2 x i32> [[MIN]] +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]]) +; CHECK-NEXT: ret <2 x i32> [[TMP1]] ; %cmp1 = icmp sgt <2 x i32> %x, %y %max = select <2 x i1> %cmp1, <2 x i32> %x, <2 x i32> %y @@ -108,9 +101,8 @@ define i32 @smin_of_smin_smax_commute3(i32 %x, i32 %y) { ; CHECK-LABEL: @smin_of_smin_smax_commute3( -; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[MIN:%.*]] = select i1 [[CMP2]], i32 [[X]], i32 [[Y]] -; CHECK-NEXT: ret i32 [[MIN]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[Y:%.*]], i32 [[X:%.*]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; %cmp1 = icmp slt i32 %x, %y %max = select i1 %cmp1, i32 %y, i32 %x @@ -123,9 +115,8 @@ define i32 @umax_of_umax_umin_commute0(i32 %x, i32 %y) { ; CHECK-LABEL: @umax_of_umax_umin_commute0( -; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[MAX:%.*]] = select i1 [[CMP2]], i32 [[X]], i32 [[Y]] -; CHECK-NEXT: ret i32 [[MAX]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[Y:%.*]], i32 [[X:%.*]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; %cmp1 = icmp ult i32 %x, %y %min = select i1 %cmp1, i32 %x, i32 %y @@ -138,9 +129,8 @@ define i32 @umax_of_umax_umin_commute1(i32 %x, i32 %y) { ; CHECK-LABEL: @umax_of_umax_umin_commute1( -; CHECK-NEXT: [[CMP2:%.*]] = icmp ugt i32 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[MAX:%.*]] = select i1 [[CMP2]], i32 [[X]], i32 [[Y]] -; CHECK-NEXT: ret i32 [[MAX]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[X:%.*]], i32 [[Y:%.*]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; %cmp1 = icmp ugt i32 %x, %y %min = select i1 %cmp1, i32 %y, i32 %x @@ -153,9 +143,8 @@ define i32 @umax_of_umax_umin_commute2(i32 %x, i32 %y) { ; CHECK-LABEL: @umax_of_umax_umin_commute2( -; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[MAX:%.*]] = select i1 [[CMP2]], i32 [[X]], i32 [[Y]] -; CHECK-NEXT: ret i32 [[MAX]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[Y:%.*]], i32 [[X:%.*]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; %cmp1 = icmp ult i32 %x, %y %min = select i1 %cmp1, i32 %x, i32 %y @@ -168,9 +157,8 @@ define <2 x i32> @umax_of_umax_umin_commute3(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @umax_of_umax_umin_commute3( -; CHECK-NEXT: [[CMP2:%.*]] = icmp ugt <2 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[MAX:%.*]] = select <2 x i1> [[CMP2]], <2 x i32> [[X]], <2 x i32> [[Y]] -; CHECK-NEXT: ret <2 x i32> [[MAX]] +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]]) +; CHECK-NEXT: ret <2 x i32> [[TMP1]] ; %cmp1 = icmp ugt <2 x i32> %x, %y %min = select <2 x i1> %cmp1, <2 x i32> %y, <2 x i32> %x @@ -183,9 +171,8 @@ define i32 @umin_of_umin_umax_commute0(i32 %x, i32 %y) { ; CHECK-LABEL: @umin_of_umin_umax_commute0( -; CHECK-NEXT: [[CMP2:%.*]] = icmp ugt i32 [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[MIN:%.*]] = select i1 [[CMP2]], i32 [[X]], i32 [[Y]] -; CHECK-NEXT: ret i32 [[MIN]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umin.i32(i32 [[Y:%.*]], i32 [[X:%.*]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; %cmp1 = icmp ugt i32 %x, %y %max = select i1 %cmp1, i32 %x, i32 %y @@ -198,9 +185,8 @@ define i32 @umin_of_umin_umax_commute1(i32 %x, i32 %y) { ; CHECK-LABEL: @umin_of_umin_umax_commute1( -; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[MIN:%.*]] = select i1 [[CMP2]], i32 [[X]], i32 [[Y]] -; CHECK-NEXT: ret i32 [[MIN]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umin.i32(i32 [[X:%.*]], i32 [[Y:%.*]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; %cmp1 = icmp ult i32 %x, %y %max = select i1 %cmp1, i32 %y, i32 %x @@ -213,9 +199,8 @@ define <2 x i32> @umin_of_umin_umax_commute2(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @umin_of_umin_umax_commute2( -; CHECK-NEXT: [[CMP2:%.*]] = icmp ult <2 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[MIN:%.*]] = select <2 x i1> [[CMP2]], <2 x i32> [[X]], <2 x i32> [[Y]] -; CHECK-NEXT: ret <2 x i32> [[MIN]] +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]]) +; CHECK-NEXT: ret <2 x i32> [[TMP1]] ; %cmp1 = icmp ugt <2 x i32> %x, %y %max = select <2 x i1> %cmp1, <2 x i32> %x, <2 x i32> %y @@ -228,9 +213,8 @@ define i32 @umin_of_umin_umax_commute3(i32 %x, i32 %y) { ; CHECK-LABEL: @umin_of_umin_umax_commute3( -; CHECK-NEXT: [[CMP2:%.*]] = icmp ugt i32 [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[MIN:%.*]] = select i1 [[CMP2]], i32 [[X]], i32 [[Y]] -; CHECK-NEXT: ret i32 [[MIN]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umin.i32(i32 [[Y:%.*]], i32 [[X:%.*]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; %cmp1 = icmp ult i32 %x, %y %max = select i1 %cmp1, i32 %y, i32 %x @@ -245,13 +229,8 @@ define i32 @umin_of_smin_umax_wrong_pattern(i32 %x, i32 %y) { ; CHECK-LABEL: @umin_of_smin_umax_wrong_pattern( -; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt i32 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[MAX:%.*]] = select i1 [[CMP1]], i32 [[X]], i32 [[Y]] -; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[Y]], [[X]] -; CHECK-NEXT: [[MIN:%.*]] = select i1 [[CMP2]], i32 [[X]], i32 [[Y]] -; CHECK-NEXT: [[CMP3:%.*]] = icmp ugt i32 [[MAX]], [[MIN]] -; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP3]], i32 [[MIN]], i32 [[MAX]] -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[Y:%.*]], i32 [[X:%.*]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; %cmp1 = icmp ugt i32 %x, %y %max = select i1 %cmp1, i32 %x, i32 %y @@ -266,13 +245,10 @@ define i32 @smin_of_umin_umax_wrong_pattern2(i32 %x, i32 %y) { ; CHECK-LABEL: @smin_of_umin_umax_wrong_pattern2( -; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[MAX:%.*]] = select i1 [[CMP1]], i32 [[Y]], i32 [[X]] -; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 [[X]], [[Y]] -; CHECK-NEXT: [[MIN:%.*]] = select i1 [[CMP2]], i32 [[X]], i32 [[Y]] -; CHECK-NEXT: [[CMP3:%.*]] = icmp sgt i32 [[MAX]], [[MIN]] -; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP3]], i32 [[MIN]], i32 [[MAX]] -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[X:%.*]], i32 [[Y:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.umin.i32(i32 [[X]], i32 [[Y]]) +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP1]], i32 [[TMP2]]) +; CHECK-NEXT: ret i32 [[TMP3]] ; %cmp1 = icmp ult i32 %x, %y %max = select i1 %cmp1, i32 %y, i32 %x @@ -287,13 +263,8 @@ define <2 x i32> @umin_of_umin_umax_wrong_operand(<2 x i32> %x, <2 x i32> %y, <2 x i32> %z) { ; CHECK-LABEL: @umin_of_umin_umax_wrong_operand( -; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt <2 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[MAX:%.*]] = select <2 x i1> [[CMP1]], <2 x i32> [[X]], <2 x i32> [[Y]] -; CHECK-NEXT: [[CMP2:%.*]] = icmp ult <2 x i32> [[X]], [[Z:%.*]] -; CHECK-NEXT: [[MIN:%.*]] = select <2 x i1> [[CMP2]], <2 x i32> [[X]], <2 x i32> [[Z]] -; CHECK-NEXT: [[CMP3:%.*]] = icmp ult <2 x i32> [[MIN]], [[MAX]] -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[CMP3]], <2 x i32> [[MIN]], <2 x i32> [[MAX]] -; CHECK-NEXT: ret <2 x i32> [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[Z:%.*]]) +; CHECK-NEXT: ret <2 x i32> [[TMP1]] ; %cmp1 = icmp ugt <2 x i32> %x, %y %max = select <2 x i1> %cmp1, <2 x i32> %x, <2 x i32> %y @@ -308,13 +279,8 @@ define i32 @umin_of_umin_umax_wrong_operand2(i32 %x, i32 %y, i32 %z) { ; CHECK-LABEL: @umin_of_umin_umax_wrong_operand2( -; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[X:%.*]], [[Z:%.*]] -; CHECK-NEXT: [[MAX:%.*]] = select i1 [[CMP1]], i32 [[Z]], i32 [[X]] -; CHECK-NEXT: [[CMP2:%.*]] = icmp ugt i32 [[Y:%.*]], [[X]] -; CHECK-NEXT: [[MIN:%.*]] = select i1 [[CMP2]], i32 [[X]], i32 [[Y]] -; CHECK-NEXT: [[CMP3:%.*]] = icmp ult i32 [[MIN]], [[MAX]] -; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP3]], i32 [[MIN]], i32 [[MAX]] -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umin.i32(i32 [[Y:%.*]], i32 [[X:%.*]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; %cmp1 = icmp ult i32 %x, %z %max = select i1 %cmp1, i32 %z, i32 %x diff --git a/llvm/test/Transforms/InstCombine/pr21199.ll b/llvm/test/Transforms/InstCombine/pr21199.ll --- a/llvm/test/Transforms/InstCombine/pr21199.ll +++ b/llvm/test/Transforms/InstCombine/pr21199.ll @@ -9,15 +9,14 @@ define void @test(i32 %len) { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[LEN:%.*]], 8 -; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 [[LEN]], i32 8 -; CHECK-NEXT: [[CMP11_NOT:%.*]] = icmp eq i32 [[COND]], 0 +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.umin.i32(i32 [[LEN:%.*]], i32 8) +; CHECK-NEXT: [[CMP11_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 ; CHECK-NEXT: br i1 [[CMP11_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_02:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: tail call void @f(i32 [[COND]]) +; CHECK-NEXT: tail call void @f(i32 [[TMP0]]) ; CHECK-NEXT: [[INC]] = add i32 [[I_02]], 1 -; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[INC]], [[COND]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[INC]], [[TMP0]] ; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY]], label [[FOR_END]] ; CHECK: for.end: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/InstCombine/pr27236.ll b/llvm/test/Transforms/InstCombine/pr27236.ll --- a/llvm/test/Transforms/InstCombine/pr27236.ll +++ b/llvm/test/Transforms/InstCombine/pr27236.ll @@ -3,10 +3,9 @@ define float @test1(i32 %scale) { ; CHECK-LABEL: @test1( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[SCALE:%.*]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[SCALE]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = sitofp i32 [[TMP2]] to float -; CHECK-NEXT: ret float [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[SCALE:%.*]], i32 1) +; CHECK-NEXT: [[TMP2:%.*]] = sitofp i32 [[TMP1]] to float +; CHECK-NEXT: ret float [[TMP2]] ; %1 = icmp sgt i32 1, %scale %2 = select i1 %1, i32 1, i32 %scale diff --git a/llvm/test/Transforms/InstCombine/pr38897.ll b/llvm/test/Transforms/InstCombine/pr38897.ll --- a/llvm/test/Transforms/InstCombine/pr38897.ll +++ b/llvm/test/Transforms/InstCombine/pr38897.ll @@ -7,11 +7,9 @@ ; CHECK-NEXT: [[SMAX58:%.*]] = select i1 [[C:%.*]], i32 [[E:%.*]], i32 [[F:%.*]] ; CHECK-NEXT: [[SMAX59:%.*]] = select i1 [[D:%.*]], i32 [[G:%.*]], i32 [[H:%.*]] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SMAX59]], 1 -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP0]], -1 -; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 -1 -; CHECK-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[SMAX58]], [[TMP12]] -; CHECK-NEXT: [[SMAX61:%.*]] = select i1 [[TMP13]], i32 [[SMAX58]], i32 [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = xor i32 [[SMAX61]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP0]], i32 -1) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smax.i32(i32 [[SMAX58]], i32 [[TMP1]]) +; CHECK-NEXT: [[TMP14:%.*]] = xor i32 [[TMP2]], -1 ; CHECK-NEXT: ret i32 [[TMP14]] ; entry: diff --git a/llvm/test/Transforms/InstCombine/pr38915.ll b/llvm/test/Transforms/InstCombine/pr38915.ll --- a/llvm/test/Transforms/InstCombine/pr38915.ll +++ b/llvm/test/Transforms/InstCombine/pr38915.ll @@ -5,11 +5,9 @@ ; CHECK-LABEL: @PR38915( ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X:%.*]], -1 ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[Y:%.*]], -1 -; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[M1N:%.*]] = select i1 [[TMP3]], i32 [[TMP1]], i32 [[TMP2]] -; CHECK-NEXT: [[C2:%.*]] = icmp sgt i32 [[M1N]], [[Z:%.*]] -; CHECK-NEXT: [[M2:%.*]] = select i1 [[C2]], i32 [[M1N]], i32 [[Z]] -; CHECK-NEXT: [[M2N:%.*]] = xor i32 [[M2]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP1]], i32 [[TMP2]]) +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP3]], i32 [[Z:%.*]]) +; CHECK-NEXT: [[M2N:%.*]] = xor i32 [[TMP4]], -1 ; CHECK-NEXT: ret i32 [[M2N]] ; %xn = sub i32 0, %x diff --git a/llvm/test/Transforms/InstCombine/pr44541.ll b/llvm/test/Transforms/InstCombine/pr44541.ll --- a/llvm/test/Transforms/InstCombine/pr44541.ll +++ b/llvm/test/Transforms/InstCombine/pr44541.ll @@ -13,9 +13,8 @@ define i16 @test(i16 %arg) { ; CHECK-LABEL: @test( ; CHECK-NEXT: [[ZERO:%.*]] = call i16 @passthru(i16 0) -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i16 [[ARG:%.*]], 0 -; CHECK-NEXT: [[RET:%.*]] = select i1 [[TMP1]], i16 [[ARG]], i16 0 -; CHECK-NEXT: ret i16 [[RET]] +; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.smax.i16(i16 [[ARG:%.*]], i16 0) +; CHECK-NEXT: ret i16 [[TMP1]] ; %zero = call i16 @passthru(i16 0) %sub = sub nuw nsw i16 %arg, %zero diff --git a/llvm/test/Transforms/InstCombine/pr44835.ll b/llvm/test/Transforms/InstCombine/pr44835.ll --- a/llvm/test/Transforms/InstCombine/pr44835.ll +++ b/llvm/test/Transforms/InstCombine/pr44835.ll @@ -8,8 +8,7 @@ ; CHECK-LABEL: @test( ; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[P:%.*]], align 4 ; CHECK-NEXT: [[V2:%.*]] = load i32, i32* [[P2:%.*]], align 4 -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[V2]], [[V]] -; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[CMP]], i32 [[V2]], i32 [[V]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umin.i32(i32 [[V2]], i32 [[V]]) ; CHECK-NEXT: store i32 [[TMP1]], i32* [[P]], align 4 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/InstCombine/preserve-sminmax.ll b/llvm/test/Transforms/InstCombine/preserve-sminmax.ll --- a/llvm/test/Transforms/InstCombine/preserve-sminmax.ll +++ b/llvm/test/Transforms/InstCombine/preserve-sminmax.ll @@ -10,9 +10,8 @@ define i32 @foo(i32 %h) { ; CHECK-LABEL: @foo( ; CHECK-NEXT: [[SD:%.*]] = sdiv i32 [[H:%.*]], 2 -; CHECK-NEXT: [[T:%.*]] = icmp slt i32 [[SD]], 1 -; CHECK-NEXT: [[R:%.*]] = select i1 [[T]], i32 [[SD]], i32 1 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[SD]], i32 1) +; CHECK-NEXT: ret i32 [[TMP1]] ; %sd = sdiv i32 %h, 2 %t = icmp slt i32 %sd, 1 @@ -23,9 +22,8 @@ define i32 @bar(i32 %h) { ; CHECK-LABEL: @bar( ; CHECK-NEXT: [[SD:%.*]] = sdiv i32 [[H:%.*]], 2 -; CHECK-NEXT: [[T:%.*]] = icmp sgt i32 [[SD]], 1 -; CHECK-NEXT: [[R:%.*]] = select i1 [[T]], i32 [[SD]], i32 1 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[SD]], i32 1) +; CHECK-NEXT: ret i32 [[TMP1]] ; %sd = sdiv i32 %h, 2 %t = icmp sgt i32 %sd, 1 diff --git a/llvm/test/Transforms/InstCombine/sadd_sat.ll b/llvm/test/Transforms/InstCombine/sadd_sat.ll --- a/llvm/test/Transforms/InstCombine/sadd_sat.ll +++ b/llvm/test/Transforms/InstCombine/sadd_sat.ll @@ -77,11 +77,9 @@ ; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[A:%.*]] to i64 ; CHECK-NEXT: [[CONV1:%.*]] = sext i32 [[B:%.*]] to i64 ; CHECK-NEXT: [[ADD:%.*]] = mul nsw i64 [[CONV1]], [[CONV]] -; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i64 [[ADD]], 2147483647 -; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = select i1 [[TMP0]], i64 [[ADD]], i64 2147483647 -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i64 [[SPEC_STORE_SELECT]], -2147483648 -; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = select i1 [[TMP1]], i64 [[SPEC_STORE_SELECT]], i64 -2147483648 -; CHECK-NEXT: [[CONV7:%.*]] = trunc i64 [[SPEC_STORE_SELECT8]] to i32 +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.smin.i64(i64 [[ADD]], i64 2147483647) +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.smax.i64(i64 [[TMP0]], i64 -2147483648) +; CHECK-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP1]] to i32 ; CHECK-NEXT: ret i32 [[CONV7]] ; entry: @@ -295,11 +293,9 @@ ; CHECK-NEXT: [[CONV:%.*]] = sext i4 [[A:%.*]] to i32 ; CHECK-NEXT: [[CONV1:%.*]] = sext i4 [[B:%.*]] to i32 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]] -; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i32 [[ADD]], 7 -; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = select i1 [[TMP0]], i32 [[ADD]], i32 7 -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[SPEC_STORE_SELECT]], -8 -; CHECK-NEXT: [[SPEC_STORE_SELECT10:%.*]] = select i1 [[TMP1]], i32 [[SPEC_STORE_SELECT]], i32 -8 -; CHECK-NEXT: [[CONV9:%.*]] = trunc i32 [[SPEC_STORE_SELECT10]] to i4 +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.smin.i32(i32 [[ADD]], i32 7) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 -8) +; CHECK-NEXT: [[CONV9:%.*]] = trunc i32 [[TMP1]] to i4 ; CHECK-NEXT: ret i4 [[CONV9]] ; entry: @@ -320,11 +316,9 @@ ; CHECK-NEXT: [[CONV:%.*]] = sext i4 [[A:%.*]] to i32 ; CHECK-NEXT: [[CONV1:%.*]] = sext i4 [[B:%.*]] to i32 ; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[CONV]], [[CONV1]] -; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i32 [[SUB]], 7 -; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = select i1 [[TMP0]], i32 [[SUB]], i32 7 -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[SPEC_STORE_SELECT]], -8 -; CHECK-NEXT: [[SPEC_STORE_SELECT10:%.*]] = select i1 [[TMP1]], i32 [[SPEC_STORE_SELECT]], i32 -8 -; CHECK-NEXT: [[CONV9:%.*]] = trunc i32 [[SPEC_STORE_SELECT10]] to i4 +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.smin.i32(i32 [[SUB]], i32 7) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 -8) +; CHECK-NEXT: [[CONV9:%.*]] = trunc i32 [[TMP1]] to i4 ; CHECK-NEXT: ret i4 [[CONV9]] ; entry: @@ -411,11 +405,9 @@ ; CHECK-LABEL: @sadd_satv4i4( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP0:%.*]] = icmp slt <4 x i32> [[ADD]], -; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[ADD]], <4 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[SPEC_STORE_SELECT]], -; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[SPEC_STORE_SELECT]], <4 x i32> -; CHECK-NEXT: ret <4 x i32> [[SPEC_STORE_SELECT8]] +; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[ADD]], <4 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[TMP0]], <4 x i32> ) +; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; entry: %add = add <4 x i32> %a, %b @@ -430,11 +422,9 @@ ; CHECK-LABEL: @ssub_satv4i4( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ADD:%.*]] = sub <4 x i32> [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP0:%.*]] = icmp slt <4 x i32> [[ADD]], -; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[ADD]], <4 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[SPEC_STORE_SELECT]], -; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[SPEC_STORE_SELECT]], <4 x i32> -; CHECK-NEXT: ret <4 x i32> [[SPEC_STORE_SELECT8]] +; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[ADD]], <4 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[TMP0]], <4 x i32> ) +; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; entry: %add = sub <4 x i32> %a, %b @@ -450,8 +440,8 @@ ; CHECK-LABEL: @sadd_sat32_extrause_1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[B:%.*]], i32 [[A:%.*]]) -; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = sext i32 [[TMP0]] to i64 -; CHECK-NEXT: call void @use64(i64 [[SPEC_STORE_SELECT8]]) +; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[TMP0]] to i64 +; CHECK-NEXT: call void @use64(i64 [[TMP1]]) ; CHECK-NEXT: ret i32 [[TMP0]] ; entry: @@ -473,12 +463,10 @@ ; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[A:%.*]] to i64 ; CHECK-NEXT: [[CONV1:%.*]] = sext i32 [[B:%.*]] to i64 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[CONV1]], [[CONV]] -; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i64 [[ADD]], 2147483647 -; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = select i1 [[TMP0]], i64 [[ADD]], i64 2147483647 -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i64 [[SPEC_STORE_SELECT]], -2147483648 -; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = select i1 [[TMP1]], i64 [[SPEC_STORE_SELECT]], i64 -2147483648 -; CHECK-NEXT: [[CONV7:%.*]] = trunc i64 [[SPEC_STORE_SELECT8]] to i32 -; CHECK-NEXT: call void @use64(i64 [[SPEC_STORE_SELECT]]) +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.smin.i64(i64 [[ADD]], i64 2147483647) +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.smax.i64(i64 [[TMP0]], i64 -2147483648) +; CHECK-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP1]] to i32 +; CHECK-NEXT: call void @use64(i64 [[TMP0]]) ; CHECK-NEXT: ret i32 [[CONV7]] ; entry: @@ -523,11 +511,9 @@ ; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[A:%.*]] to i64 ; CHECK-NEXT: [[CONV1:%.*]] = sext i32 [[B:%.*]] to i64 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[CONV1]], [[CONV]] -; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i64 [[ADD]], 2147483647 -; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = select i1 [[TMP0]], i64 [[ADD]], i64 2147483647 -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i64 [[SPEC_STORE_SELECT]], -2147483648 -; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = select i1 [[TMP1]], i64 [[SPEC_STORE_SELECT]], i64 -2147483648 -; CHECK-NEXT: [[CONV7:%.*]] = trunc i64 [[SPEC_STORE_SELECT8]] to i32 +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.smin.i64(i64 [[ADD]], i64 2147483647) +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.smax.i64(i64 [[TMP0]], i64 -2147483648) +; CHECK-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP1]] to i32 ; CHECK-NEXT: call void @use64(i64 [[ADD]]) ; CHECK-NEXT: ret i32 [[CONV7]] ; @@ -573,11 +559,9 @@ ; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[A:%.*]] to i64 ; CHECK-NEXT: [[CONV1:%.*]] = sext i32 [[B:%.*]] to i64 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[CONV1]], [[CONV]] -; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i64 [[ADD]], 32767 -; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = select i1 [[TMP0]], i64 [[ADD]], i64 32767 -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i64 [[SPEC_STORE_SELECT]], -32768 -; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = select i1 [[TMP1]], i64 [[SPEC_STORE_SELECT]], i64 -32768 -; CHECK-NEXT: [[CONV7:%.*]] = trunc i64 [[SPEC_STORE_SELECT8]] to i32 +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.smin.i64(i64 [[ADD]], i64 32767) +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.smax.i64(i64 [[TMP0]], i64 -32768) +; CHECK-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP1]] to i32 ; CHECK-NEXT: ret i32 [[CONV7]] ; entry: @@ -617,11 +601,9 @@ ; CHECK-NEXT: [[CONV:%.*]] = sext i8 [[A:%.*]] to i32 ; CHECK-NEXT: [[CONV1:%.*]] = sext i16 [[B:%.*]] to i32 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]] -; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i32 [[ADD]], 127 -; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = select i1 [[TMP0]], i32 [[ADD]], i32 127 -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[SPEC_STORE_SELECT]], -128 -; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = select i1 [[TMP1]], i32 [[SPEC_STORE_SELECT]], i32 -128 -; CHECK-NEXT: [[CONV7:%.*]] = trunc i32 [[SPEC_STORE_SELECT8]] to i8 +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.smin.i32(i32 [[ADD]], i32 127) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 -128) +; CHECK-NEXT: [[CONV7:%.*]] = trunc i32 [[TMP1]] to i8 ; CHECK-NEXT: ret i8 [[CONV7]] ; entry: @@ -642,9 +624,8 @@ ; CHECK-NEXT: [[CONV:%.*]] = zext i32 [[A:%.*]] to i64 ; CHECK-NEXT: [[CONV1:%.*]] = zext i32 [[B:%.*]] to i64 ; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i64 [[CONV1]], [[CONV]] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i64 [[ADD]], 2147483647 -; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = select i1 [[TMP0]], i64 [[ADD]], i64 2147483647 -; CHECK-NEXT: [[CONV7:%.*]] = trunc i64 [[SPEC_STORE_SELECT]] to i32 +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.umin.i64(i64 [[ADD]], i64 2147483647) +; CHECK-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP0]] to i32 ; CHECK-NEXT: ret i32 [[CONV7]] ; entry: @@ -681,8 +662,8 @@ ; CHECK-LABEL: @sadd_sat32_notrunc( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[B:%.*]], i32 [[A:%.*]]) -; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = sext i32 [[TMP0]] to i64 -; CHECK-NEXT: ret i64 [[SPEC_STORE_SELECT8]] +; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[TMP0]] to i64 +; CHECK-NEXT: ret i64 [[TMP1]] ; entry: %conv = sext i32 %a to i64 @@ -761,11 +742,9 @@ ; CHECK-NEXT: [[CONV:%.*]] = ashr i64 [[A:%.*]], 31 ; CHECK-NEXT: [[CONV1:%.*]] = sext i32 [[B:%.*]] to i64 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[CONV]], [[CONV1]] -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i64 [[ADD]], -2147483648 -; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = select i1 [[TMP0]], i64 [[ADD]], i64 -2147483648 -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i64 [[SPEC_STORE_SELECT]], 2147483647 -; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = select i1 [[TMP1]], i64 [[SPEC_STORE_SELECT]], i64 2147483647 -; CHECK-NEXT: [[CONV7:%.*]] = trunc i64 [[SPEC_STORE_SELECT8]] to i32 +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.smax.i64(i64 [[ADD]], i64 -2147483648) +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.smin.i64(i64 [[TMP0]], i64 2147483647) +; CHECK-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP1]] to i32 ; CHECK-NEXT: ret i32 [[CONV7]] ; entry: @@ -806,11 +785,9 @@ ; CHECK-NEXT: [[CONV:%.*]] = ashr <2 x i16> [[A:%.*]], ; CHECK-NEXT: [[CONV1:%.*]] = sext <2 x i8> [[B:%.*]] to <2 x i16> ; CHECK-NEXT: [[ADD:%.*]] = add <2 x i16> [[CONV]], [[CONV1]] -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt <2 x i16> [[ADD]], -; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = select <2 x i1> [[TMP0]], <2 x i16> [[ADD]], <2 x i16> -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i16> [[SPEC_STORE_SELECT]], -; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = select <2 x i1> [[TMP1]], <2 x i16> [[SPEC_STORE_SELECT]], <2 x i16> -; CHECK-NEXT: [[CONV7:%.*]] = trunc <2 x i16> [[SPEC_STORE_SELECT8]] to <2 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.smax.v2i16(<2 x i16> [[ADD]], <2 x i16> ) +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i16> @llvm.smin.v2i16(<2 x i16> [[TMP0]], <2 x i16> ) +; CHECK-NEXT: [[CONV7:%.*]] = trunc <2 x i16> [[TMP1]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[CONV7]] ; entry: @@ -850,8 +827,8 @@ ; CHECK-NEXT: [[TMP1:%.*]] = trunc i16 [[Y:%.*]] to i8 ; CHECK-NEXT: [[TMP2:%.*]] = or i8 [[TMP1]], -16 ; CHECK-NEXT: [[TMP3:%.*]] = call i8 @llvm.ssub.sat.i8(i8 [[X:%.*]], i8 [[TMP2]]) -; CHECK-NEXT: [[L12:%.*]] = sext i8 [[TMP3]] to i16 -; CHECK-NEXT: ret i16 [[L12]] +; CHECK-NEXT: [[TMP4:%.*]] = sext i8 [[TMP3]] to i16 +; CHECK-NEXT: ret i16 [[TMP4]] ; %conv10 = sext i8 %X to i16 %conv14 = or i16 %Y, 65520 @@ -865,11 +842,10 @@ define i16 @const(i8 %X) { ; CHECK-LABEL: @const( -; CHECK-NEXT: [[CONV10:%.*]] = sext i8 [[X:%.*]] to i16 -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i16 [[CONV10]], 117 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i16 [[CONV10]], i16 117 -; CHECK-NEXT: [[L12:%.*]] = add nsw i16 [[TMP2]], 10 -; CHECK-NEXT: ret i16 [[L12]] +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.smin.i8(i8 [[X:%.*]], i8 117) +; CHECK-NEXT: [[NARROW:%.*]] = add nsw i8 [[TMP1]], 10 +; CHECK-NEXT: [[TMP2:%.*]] = sext i8 [[NARROW]] to i16 +; CHECK-NEXT: ret i16 [[TMP2]] ; %conv10 = sext i8 %X to i16 %sub = add i16 %conv10, 10 diff --git a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll --- a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll +++ b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll @@ -1772,9 +1772,8 @@ ; CHECK-LABEL: @unsigned_sat_variable_using_wrong_min( ; CHECK-NEXT: [[Y:%.*]] = call i32 @get_i32() ; CHECK-NEXT: [[NOTY:%.*]] = xor i32 [[Y]], -1 -; CHECK-NEXT: [[C:%.*]] = icmp sgt i32 [[NOTY]], [[X:%.*]] -; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i32 [[X]], i32 [[NOTY]] -; CHECK-NEXT: [[R:%.*]] = add i32 [[Y]], [[S]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[NOTY]], i32 [[X:%.*]]) +; CHECK-NEXT: [[R:%.*]] = add i32 [[Y]], [[TMP1]] ; CHECK-NEXT: ret i32 [[R]] ; %y = call i32 @get_i32() ; thwart complexity-based canonicalization @@ -1791,9 +1790,8 @@ ; CHECK-LABEL: @unsigned_sat_variable_using_wrong_value( ; CHECK-NEXT: [[Y:%.*]] = call i32 @get_i32() ; CHECK-NEXT: [[NOTY:%.*]] = xor i32 [[Y]], -1 -; CHECK-NEXT: [[C:%.*]] = icmp ugt i32 [[NOTY]], [[X:%.*]] -; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i32 [[X]], i32 [[NOTY]] -; CHECK-NEXT: [[R:%.*]] = add i32 [[S]], [[Z:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umin.i32(i32 [[NOTY]], i32 [[X:%.*]]) +; CHECK-NEXT: [[R:%.*]] = add i32 [[TMP1]], [[Z:%.*]] ; CHECK-NEXT: ret i32 [[R]] ; %y = call i32 @get_i32() ; thwart complexity-based canonicalization @@ -1832,9 +1830,8 @@ define i32 @unsigned_sat_constant_using_min_wrong_constant(i32 %x) { ; CHECK-LABEL: @unsigned_sat_constant_using_min_wrong_constant( -; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[X:%.*]], 42 -; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i32 [[X]], i32 42 -; CHECK-NEXT: [[R:%.*]] = add nsw i32 [[S]], -42 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umin.i32(i32 [[X:%.*]], i32 42) +; CHECK-NEXT: [[R:%.*]] = add nsw i32 [[TMP1]], -42 ; CHECK-NEXT: ret i32 [[R]] ; %c = icmp ult i32 %x, 42 diff --git a/llvm/test/Transforms/InstCombine/select-gep.ll b/llvm/test/Transforms/InstCombine/select-gep.ll --- a/llvm/test/Transforms/InstCombine/select-gep.ll +++ b/llvm/test/Transforms/InstCombine/select-gep.ll @@ -59,9 +59,8 @@ define i32* @test2(i32* %p, i64 %x, i64 %y) { ; CHECK-LABEL: @test2( -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[SELECT_V:%.*]] = select i1 [[CMP]], i64 [[X]], i64 [[Y]] -; CHECK-NEXT: [[SELECT:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 [[SELECT_V]] +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.umax.i64(i64 [[X:%.*]], i64 [[Y:%.*]]) +; CHECK-NEXT: [[SELECT:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 [[TMP1]] ; CHECK-NEXT: ret i32* [[SELECT]] ; %gep1 = getelementptr inbounds i32, i32* %p, i64 %x diff --git a/llvm/test/Transforms/InstCombine/select-imm-canon.ll b/llvm/test/Transforms/InstCombine/select-imm-canon.ll --- a/llvm/test/Transforms/InstCombine/select-imm-canon.ll +++ b/llvm/test/Transforms/InstCombine/select-imm-canon.ll @@ -4,9 +4,8 @@ define i8 @single(i32 %A) { ; CHECK-LABEL: @single( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[A:%.*]], -128 -; CHECK-NEXT: [[CONV71:%.*]] = select i1 [[TMP0]], i32 [[A]], i32 -128 -; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[CONV71]] to i8 +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.smax.i32(i32 [[A:%.*]], i32 -128) +; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8 ; CHECK-NEXT: ret i8 [[TMP1]] ; entry: @@ -19,12 +18,10 @@ define i8 @double(i32 %A) { ; CHECK-LABEL: @double( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[A:%.*]], -128 -; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i32 [[A]], i32 -128 -; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 127 -; CHECK-NEXT: [[CONV71:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 127 -; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[CONV71]] to i8 -; CHECK-NEXT: ret i8 [[TMP3]] +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.smax.i32(i32 [[A:%.*]], i32 -128) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP0]], i32 127) +; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 +; CHECK-NEXT: ret i8 [[TMP2]] ; entry: %l1 = icmp slt i32 %A, -128 @@ -52,11 +49,9 @@ define i8 @original(i32 %A, i32 %B) { ; CHECK-LABEL: @original( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[A:%.*]], -128 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[A]], i32 -128 -; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 127 -; CHECK-NEXT: [[SPEC_SELECT_I:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 127 -; CHECK-NEXT: [[CONV7:%.*]] = trunc i32 [[SPEC_SELECT_I]] to i8 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[A:%.*]], i32 -128) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP1]], i32 127) +; CHECK-NEXT: [[CONV7:%.*]] = trunc i32 [[TMP2]] to i8 ; CHECK-NEXT: ret i8 [[CONV7]] ; %cmp4.i = icmp slt i32 127, %A @@ -71,11 +66,9 @@ define i8 @original_logical(i32 %A, i32 %B) { ; CHECK-LABEL: @original_logical( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[A:%.*]], -128 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[A]], i32 -128 -; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 127 -; CHECK-NEXT: [[SPEC_SELECT_I:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 127 -; CHECK-NEXT: [[CONV7:%.*]] = trunc i32 [[SPEC_SELECT_I]] to i8 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[A:%.*]], i32 -128) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP1]], i32 127) +; CHECK-NEXT: [[CONV7:%.*]] = trunc i32 [[TMP2]] to i8 ; CHECK-NEXT: ret i8 [[CONV7]] ; %cmp4.i = icmp slt i32 127, %A diff --git a/llvm/test/Transforms/InstCombine/select-min-max.ll b/llvm/test/Transforms/InstCombine/select-min-max.ll --- a/llvm/test/Transforms/InstCombine/select-min-max.ll +++ b/llvm/test/Transforms/InstCombine/select-min-max.ll @@ -204,10 +204,9 @@ define i32 @smax_smin(i32 %x) { ; CHECK-LABEL: @smax_smin( -; CHECK-NEXT: [[M:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 0) -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[M]], 1 -; CHECK-NEXT: [[S:%.*]] = select i1 [[TMP1]], i32 [[M]], i32 1 -; CHECK-NEXT: ret i32 [[S]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32 +; CHECK-NEXT: ret i32 [[TMP2]] ; %m = call i32 @llvm.smax.i32(i32 %x, i32 0) %c = icmp slt i32 %x, 1 @@ -218,9 +217,8 @@ define i32 @smin_smax(i32 %x) { ; CHECK-LABEL: @smin_smax( ; CHECK-NEXT: [[M:%.*]] = call i32 @llvm.smin.i32(i32 [[X:%.*]], i32 -1) -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[M]], -2 -; CHECK-NEXT: [[S:%.*]] = select i1 [[TMP1]], i32 [[M]], i32 -2 -; CHECK-NEXT: ret i32 [[S]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[M]], i32 -2) +; CHECK-NEXT: ret i32 [[TMP1]] ; %m = call i32 @llvm.smin.i32(i32 %x, i32 -1) %c = icmp sgt i32 %x, -2 @@ -230,10 +228,9 @@ define i8 @umax_umin(i8 %x) { ; CHECK-LABEL: @umax_umin( -; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umax.i8(i8 [[X:%.*]], i8 -128) -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i8 [[M]], -127 -; CHECK-NEXT: [[S:%.*]] = select i1 [[TMP1]], i8 [[M]], i8 -127 -; CHECK-NEXT: ret i8 [[S]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i8 [[X:%.*]], -127 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i8 -128, i8 -127 +; CHECK-NEXT: ret i8 [[TMP2]] ; %m = call i8 @llvm.umax.i8(i8 %x, i8 128) %c = icmp ult i8 %x, 129 @@ -243,10 +240,9 @@ define i8 @umin_umax(i8 %x) { ; CHECK-LABEL: @umin_umax( -; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umin.i8(i8 [[X:%.*]], i8 127) -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i8 [[M]], 126 -; CHECK-NEXT: [[S:%.*]] = select i1 [[TMP1]], i8 [[M]], i8 126 -; CHECK-NEXT: ret i8 [[S]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i8 [[X:%.*]], 126 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i8 127, i8 126 +; CHECK-NEXT: ret i8 [[TMP2]] ; %m = call i8 @llvm.umin.i8(i8 %x, i8 127) %c = icmp ugt i8 %x, 126 diff --git a/llvm/test/Transforms/InstCombine/select-pr39595.ll b/llvm/test/Transforms/InstCombine/select-pr39595.ll --- a/llvm/test/Transforms/InstCombine/select-pr39595.ll +++ b/llvm/test/Transforms/InstCombine/select-pr39595.ll @@ -1,12 +1,12 @@ -; RUN: opt < %s -passes=instcombine -S | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s define i32 @foo(i32 %x, i32 %y) { -; CHECK-LABEL: foo -; CHECK: [[TMP1:%.*]] = icmp ugt i32 %x, %y -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 %x, i32 %y, !prof ![[$MD0:[0-9]+]] -; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP2]], -1 -; CHECK-NEXT: ret i32 [[TMP3:%.*]] -; CHECK-DAG: !0 = !{!"branch_weights", i32 6, i32 1} +; CHECK-LABEL: @foo( +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[X:%.*]], i32 [[Y:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], -1 +; CHECK-NEXT: ret i32 [[TMP2]] +; %1 = xor i32 %x, -1 %2 = xor i32 %y, -1 diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll --- a/llvm/test/Transforms/InstCombine/select.ll +++ b/llvm/test/Transforms/InstCombine/select.ll @@ -592,9 +592,8 @@ ; SMAX(SMAX(x, y), x) -> SMAX(x, y) define i32 @test30(i32 %x, i32 %y) { ; CHECK-LABEL: @test30( -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[Y]] -; CHECK-NEXT: ret i32 [[COND]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 [[Y:%.*]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; %cmp = icmp sgt i32 %x, %y %cond = select i1 %cmp, i32 %x, i32 %y @@ -606,9 +605,8 @@ ; UMAX(UMAX(x, y), x) -> UMAX(x, y) define i32 @test31(i32 %x, i32 %y) { ; CHECK-LABEL: @test31( -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[Y]] -; CHECK-NEXT: ret i32 [[COND]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[X:%.*]], i32 [[Y:%.*]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; %cmp = icmp ugt i32 %x, %y %cond = select i1 %cmp, i32 %x, i32 %y @@ -620,9 +618,8 @@ ; SMIN(SMIN(x, y), x) -> SMIN(x, y) define i32 @test32(i32 %x, i32 %y) { ; CHECK-LABEL: @test32( -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[X]] -; CHECK-NEXT: ret i32 [[COND]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[X:%.*]], i32 [[Y:%.*]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; %cmp = icmp sgt i32 %x, %y %cond = select i1 %cmp, i32 %y, i32 %x @@ -1426,10 +1423,9 @@ define i32 @PR27137(i32 %a) { ; CHECK-LABEL: @PR27137( -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[A:%.*]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[A]], i32 0 -; CHECK-NEXT: [[S1:%.*]] = xor i32 [[TMP2]], -1 -; CHECK-NEXT: ret i32 [[S1]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[A:%.*]], i32 0) +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], -1 +; CHECK-NEXT: ret i32 [[TMP2]] ; %not_a = xor i32 %a, -1 %c0 = icmp slt i32 %a, 0 diff --git a/llvm/test/Transforms/InstCombine/select_meta.ll b/llvm/test/Transforms/InstCombine/select_meta.ll --- a/llvm/test/Transforms/InstCombine/select_meta.ll +++ b/llvm/test/Transforms/InstCombine/select_meta.ll @@ -6,7 +6,7 @@ define i32 @foo(i32) local_unnamed_addr #0 { ; CHECK-LABEL: @foo( ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[TMP0:%.*]], 2 -; CHECK-NEXT: [[DOTV:%.*]] = select i1 [[TMP2]], i32 20, i32 -20, !prof !0 +; CHECK-NEXT: [[DOTV:%.*]] = select i1 [[TMP2]], i32 20, i32 -20, !prof [[PROF0:![0-9]+]] ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[DOTV]], [[TMP0]] ; CHECK-NEXT: ret i32 [[TMP3]] ; @@ -20,7 +20,7 @@ define i8 @shrink_select(i1 %cond, i32 %x) { ; CHECK-LABEL: @shrink_select( ; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i8 -; CHECK-NEXT: [[TRUNC:%.*]] = select i1 [[COND:%.*]], i8 [[TMP1]], i8 42, !prof !0 +; CHECK-NEXT: [[TRUNC:%.*]] = select i1 [[COND:%.*]], i8 [[TMP1]], i8 42, !prof [[PROF0]] ; CHECK-NEXT: ret i8 [[TRUNC]] ; %sel = select i1 %cond, i32 %x, i32 42, !prof !1 @@ -31,8 +31,8 @@ define void @min_max_bitcast(<4 x float> %a, <4 x float> %b, <4 x i32>* %ptr1, <4 x i32>* %ptr2) { ; CHECK-LABEL: @min_max_bitcast( ; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <4 x float> [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[SEL1_V:%.*]] = select <4 x i1> [[CMP]], <4 x float> [[A]], <4 x float> [[B]], !prof !0 -; CHECK-NEXT: [[SEL2_V:%.*]] = select <4 x i1> [[CMP]], <4 x float> [[B]], <4 x float> [[A]], !prof !0 +; CHECK-NEXT: [[SEL1_V:%.*]] = select <4 x i1> [[CMP]], <4 x float> [[A]], <4 x float> [[B]], !prof [[PROF0]] +; CHECK-NEXT: [[SEL2_V:%.*]] = select <4 x i1> [[CMP]], <4 x float> [[B]], <4 x float> [[A]], !prof [[PROF0]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32>* [[PTR1:%.*]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[SEL1_V]], <4 x float>* [[TMP1]], align 16 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32>* [[PTR2:%.*]] to <4 x float>* @@ -53,7 +53,7 @@ ; CHECK-LABEL: @foo2( ; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[TMP0:%.*]], 2 ; CHECK-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP1:%.*]] -; CHECK-NEXT: [[DOTP:%.*]] = select i1 [[TMP3]], i32 [[TMP1]], i32 [[TMP4]], !prof !0 +; CHECK-NEXT: [[DOTP:%.*]] = select i1 [[TMP3]], i32 [[TMP1]], i32 [[TMP4]], !prof [[PROF0]] ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[DOTP]], [[TMP0]] ; CHECK-NEXT: ret i32 [[TMP5]] ; @@ -66,10 +66,9 @@ define i64 @test43(i32 %a) nounwind { ; CHECK-LABEL: @test43( -; CHECK-NEXT: [[A_EXT:%.*]] = sext i32 [[A:%.*]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i64 [[A_EXT]], 0 -; CHECK-NEXT: [[MAX:%.*]] = select i1 [[TMP1]], i64 [[A_EXT]], i64 0, !prof !0 -; CHECK-NEXT: ret i64 [[MAX]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[A:%.*]], i32 0) +; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 +; CHECK-NEXT: ret i64 [[TMP2]] ; %a_ext = sext i32 %a to i64 %is_a_nonnegative = icmp sgt i32 %a, -1 @@ -79,7 +78,7 @@ define <2 x i32> @scalar_select_of_vectors_sext(<2 x i1> %cca, i1 %ccb) { ; CHECK-LABEL: @scalar_select_of_vectors_sext( -; CHECK-NEXT: [[NARROW:%.*]] = select i1 [[CCB:%.*]], <2 x i1> [[CCA:%.*]], <2 x i1> zeroinitializer, !prof !0 +; CHECK-NEXT: [[NARROW:%.*]] = select i1 [[CCB:%.*]], <2 x i1> [[CCA:%.*]], <2 x i1> zeroinitializer, !prof [[PROF0]] ; CHECK-NEXT: [[R:%.*]] = sext <2 x i1> [[NARROW]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[R]] ; @@ -91,10 +90,9 @@ define i16 @t7(i32 %a) { ; CHECK-LABEL: @t7( -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[A:%.*]], -32768 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[A]], i32 -32768, !prof !0 -; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16 -; CHECK-NEXT: ret i16 [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[A:%.*]], i32 -32768) +; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 +; CHECK-NEXT: ret i16 [[TMP2]] ; %1 = icmp slt i32 %a, -32768 %2 = trunc i32 %a to i16 @@ -135,9 +133,8 @@ ; SMAX(SMAX(x, y), x) -> SMAX(x, y) define i32 @test30(i32 %x, i32 %y) { ; CHECK-LABEL: @test30( -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[Y]], !prof !0 -; CHECK-NEXT: ret i32 [[COND]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 [[Y:%.*]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; %cmp = icmp sgt i32 %x, %y %cond = select i1 %cmp, i32 %x, i32 %y, !prof !1 @@ -149,9 +146,8 @@ ; SMAX(SMAX(75, X), 36) -> SMAX(X, 75) define i32 @test70(i32 %x) { ; CHECK-LABEL: @test70( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], 75 -; CHECK-NEXT: [[COND:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 75, !prof !1 -; CHECK-NEXT: ret i32 [[COND]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 75) +; CHECK-NEXT: ret i32 [[TMP1]] ; %cmp = icmp slt i32 %x, 75 %cond = select i1 %cmp, i32 75, i32 %x, !prof !1 @@ -164,9 +160,8 @@ ; SMIN(SMIN(X, 92), 11) -> SMIN(X, 11) define i32 @test72(i32 %x) { ; CHECK-LABEL: @test72( -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], 11 -; CHECK-NEXT: [[RETVAL:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 11, !prof !2 -; CHECK-NEXT: ret i32 [[RETVAL]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[X:%.*]], i32 11) +; CHECK-NEXT: ret i32 [[TMP1]] ; %cmp = icmp sgt i32 %x, 92 %cond = select i1 %cmp, i32 92, i32 %x, !prof !1 @@ -179,9 +174,9 @@ ; SMAX(SMAX(X, 36), 75) -> SMAX(X, 75) define i32 @test74(i32 %x) { ; CHECK-LABEL: @test74( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], 75 -; CHECK-NEXT: [[RETVAL:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 75, !prof !2 -; CHECK-NEXT: ret i32 [[RETVAL]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 36) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.umax.i32(i32 [[TMP1]], i32 75) +; CHECK-NEXT: ret i32 [[TMP2]] ; %cmp = icmp slt i32 %x, 36 %cond = select i1 %cmp, i32 36, i32 %x, !prof !1 @@ -193,10 +188,9 @@ ; The xor is moved after the select. The metadata remains the same because the select operands are not swapped only inverted. define i32 @smin1(i32 %x) { ; CHECK-LABEL: @smin1( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 0, !prof !0 -; CHECK-NEXT: [[SEL:%.*]] = xor i32 [[TMP2]], -1 -; CHECK-NEXT: ret i32 [[SEL]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 0) +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], -1 +; CHECK-NEXT: ret i32 [[TMP2]] ; %not_x = xor i32 %x, -1 %cmp = icmp sgt i32 %x, 0 @@ -207,10 +201,9 @@ ; The compare should change, and the metadata is swapped because the select operands are swapped and inverted. define i32 @smin2(i32 %x) { ; CHECK-LABEL: @smin2( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 0, !prof !1 -; CHECK-NEXT: [[SEL:%.*]] = xor i32 [[TMP2]], -1 -; CHECK-NEXT: ret i32 [[SEL]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 0) +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], -1 +; CHECK-NEXT: ret i32 [[TMP2]] ; %not_x = xor i32 %x, -1 %cmp = icmp slt i32 %x, 0 @@ -221,10 +214,9 @@ ; The xor is moved after the select. The metadata remains the same because the select operands are not swapped only inverted. define i32 @smax1(i32 %x) { ; CHECK-LABEL: @smax1( -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 0, !prof !0 -; CHECK-NEXT: [[SEL:%.*]] = xor i32 [[TMP2]], -1 -; CHECK-NEXT: ret i32 [[SEL]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[X:%.*]], i32 0) +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], -1 +; CHECK-NEXT: ret i32 [[TMP2]] ; %not_x = xor i32 %x, -1 %cmp = icmp slt i32 %x, 0 @@ -235,10 +227,9 @@ ; The compare should change, and the metadata is swapped because the select operands are swapped and inverted. define i32 @smax2(i32 %x) { ; CHECK-LABEL: @smax2( -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 0, !prof !1 -; CHECK-NEXT: [[SEL:%.*]] = xor i32 [[TMP2]], -1 -; CHECK-NEXT: ret i32 [[SEL]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[X:%.*]], i32 0) +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], -1 +; CHECK-NEXT: ret i32 [[TMP2]] ; %not_x = xor i32 %x, -1 %cmp = icmp sgt i32 %x, 0 @@ -249,9 +240,8 @@ ; The compare should change, but the metadata remains the same because the select operands are not swapped. define i32 @umin1(i32 %x) { ; CHECK-LABEL: @umin1( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[X:%.*]], -2147483648 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 -2147483648, !prof !0 -; CHECK-NEXT: ret i32 [[SEL]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umin.i32(i32 [[X:%.*]], i32 -2147483648) +; CHECK-NEXT: ret i32 [[TMP1]] ; %cmp = icmp sgt i32 %x, -1 %sel = select i1 %cmp, i32 %x, i32 -2147483648, !prof !1 @@ -261,9 +251,8 @@ ; The compare should change, and the metadata is swapped because the select operands are swapped. define i32 @umin2(i32 %x) { ; CHECK-LABEL: @umin2( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[X:%.*]], 2147483647 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 2147483647, !prof !1 -; CHECK-NEXT: ret i32 [[SEL]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umin.i32(i32 [[X:%.*]], i32 2147483647) +; CHECK-NEXT: ret i32 [[TMP1]] ; %cmp = icmp slt i32 %x, 0 %sel = select i1 %cmp, i32 2147483647, i32 %x, !prof !1 @@ -273,9 +262,8 @@ ; The compare should change, but the metadata remains the same because the select operands are not swapped. define i32 @umax1(i32 %x) { ; CHECK-LABEL: @umax1( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[X:%.*]], 2147483647 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 2147483647, !prof !0 -; CHECK-NEXT: ret i32 [[SEL]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[X:%.*]], i32 2147483647) +; CHECK-NEXT: ret i32 [[TMP1]] ; %cmp = icmp slt i32 %x, 0 %sel = select i1 %cmp, i32 %x, i32 2147483647, !prof !1 @@ -285,9 +273,8 @@ ; The compare should change, and the metadata is swapped because the select operands are swapped. define i32 @umax2(i32 %x) { ; CHECK-LABEL: @umax2( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[X:%.*]], -2147483648 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 -2147483648, !prof !1 -; CHECK-NEXT: ret i32 [[SEL]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[X:%.*]], i32 -2147483648) +; CHECK-NEXT: ret i32 [[TMP1]] ; %cmp = icmp sgt i32 %x, -1 %sel = select i1 %cmp, i32 -2147483648, i32 %x, !prof !1 @@ -298,7 +285,7 @@ define i32 @not_cond(i1 %c, i32 %tv, i32 %fv) { ; CHECK-LABEL: @not_cond( -; CHECK-NEXT: [[R:%.*]] = select i1 [[C:%.*]], i32 [[FV:%.*]], i32 [[TV:%.*]], !prof !1 +; CHECK-NEXT: [[R:%.*]] = select i1 [[C:%.*]], i32 [[FV:%.*]], i32 [[TV:%.*]], !prof [[PROF1:![0-9]+]] ; CHECK-NEXT: ret i32 [[R]] ; %notc = xor i1 %c, true @@ -310,7 +297,7 @@ define <2 x i32> @not_cond_vec(<2 x i1> %c, <2 x i32> %tv, <2 x i32> %fv) { ; CHECK-LABEL: @not_cond_vec( -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[C:%.*]], <2 x i32> [[FV:%.*]], <2 x i32> [[TV:%.*]], !prof !1 +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[C:%.*]], <2 x i32> [[FV:%.*]], <2 x i32> [[TV:%.*]], !prof [[PROF1]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; %notc = xor <2 x i1> %c, @@ -323,7 +310,7 @@ define <2 x i32> @not_cond_vec_undef(<2 x i1> %c, <2 x i32> %tv, <2 x i32> %fv) { ; CHECK-LABEL: @not_cond_vec_undef( -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[C:%.*]], <2 x i32> [[FV:%.*]], <2 x i32> [[TV:%.*]], !prof !1 +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[C:%.*]], <2 x i32> [[FV:%.*]], <2 x i32> [[TV:%.*]], !prof [[PROF1]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; %notc = xor <2 x i1> %c, @@ -337,5 +324,4 @@ ; CHECK: !0 = !{!"branch_weights", i32 2, i32 10} ; CHECK-NEXT: !1 = !{!"branch_weights", i32 10, i32 2} -; CHECK-NEXT: !2 = !{!"branch_weights", i32 10, i32 3} diff --git a/llvm/test/Transforms/InstCombine/sext.ll b/llvm/test/Transforms/InstCombine/sext.ll --- a/llvm/test/Transforms/InstCombine/sext.ll +++ b/llvm/test/Transforms/InstCombine/sext.ll @@ -295,9 +295,8 @@ define i32 @test18(i16 %x) { ; CHECK-LABEL: @test18( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i16 [[X:%.*]], 0 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[TMP1]], i16 [[X]], i16 0 -; CHECK-NEXT: [[EXT:%.*]] = zext i16 [[SEL]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.smax.i16(i16 [[X:%.*]], i16 0) +; CHECK-NEXT: [[EXT:%.*]] = zext i16 [[TMP1]] to i32 ; CHECK-NEXT: ret i32 [[EXT]] ; %cmp = icmp slt i16 %x, 0 diff --git a/llvm/test/Transforms/InstCombine/smax-icmp.ll b/llvm/test/Transforms/InstCombine/smax-icmp.ll --- a/llvm/test/Transforms/InstCombine/smax-icmp.ll +++ b/llvm/test/Transforms/InstCombine/smax-icmp.ll @@ -137,8 +137,8 @@ define i1 @ne_smax2(i32 %x, i32 %y) { ; CHECK-LABEL: @ne_smax2( -; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: ret i1 [[CMP1]] +; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP2]] ; %cmp1 = icmp sgt i32 %y, %x %sel = select i1 %cmp1, i32 %y, i32 %x @@ -166,8 +166,8 @@ define i1 @ne_smax4(i32 %a, i32 %y) { ; CHECK-LABEL: @ne_smax4( ; CHECK-NEXT: [[X:%.*]] = add i32 [[A:%.*]], 3 -; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[X]], [[Y:%.*]] -; CHECK-NEXT: ret i1 [[CMP1]] +; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[X]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP2]] ; %x = add i32 %a, 3 ; thwart complexity-based canonicalization %cmp1 = icmp sgt i32 %y, %x @@ -193,8 +193,8 @@ define i1 @sgt_smax2(i32 %x, i32 %y) { ; CHECK-LABEL: @sgt_smax2( -; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: ret i1 [[CMP1]] +; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP2]] ; %cmp1 = icmp sgt i32 %y, %x %sel = select i1 %cmp1, i32 %y, i32 %x @@ -222,8 +222,8 @@ define i1 @sgt_smax4(i32 %a, i32 %y) { ; CHECK-LABEL: @sgt_smax4( ; CHECK-NEXT: [[X:%.*]] = add i32 [[A:%.*]], 3 -; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[X]], [[Y:%.*]] -; CHECK-NEXT: ret i1 [[CMP1]] +; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[X]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP2]] ; %x = add i32 %a, 3 ; thwart complexity-based canonicalization %cmp1 = icmp sgt i32 %y, %x diff --git a/llvm/test/Transforms/InstCombine/smin-icmp.ll b/llvm/test/Transforms/InstCombine/smin-icmp.ll --- a/llvm/test/Transforms/InstCombine/smin-icmp.ll +++ b/llvm/test/Transforms/InstCombine/smin-icmp.ll @@ -136,8 +136,8 @@ define i1 @ne_smin2(i32 %x, i32 %y) { ; CHECK-LABEL: @ne_smin2( -; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: ret i1 [[CMP1]] +; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP2]] ; %cmp1 = icmp slt i32 %y, %x %sel = select i1 %cmp1, i32 %y, i32 %x @@ -165,8 +165,8 @@ define i1 @ne_smin4(i32 %a, i32 %y) { ; CHECK-LABEL: @ne_smin4( ; CHECK-NEXT: [[X:%.*]] = add i32 [[A:%.*]], 3 -; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[X]], [[Y:%.*]] -; CHECK-NEXT: ret i1 [[CMP1]] +; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[X]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP2]] ; %x = add i32 %a, 3 ; thwart complexity-based canonicalization %cmp1 = icmp slt i32 %y, %x @@ -192,8 +192,8 @@ define i1 @slt_smin2(i32 %x, i32 %y) { ; CHECK-LABEL: @slt_smin2( -; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: ret i1 [[CMP1]] +; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP2]] ; %cmp1 = icmp slt i32 %y, %x %sel = select i1 %cmp1, i32 %y, i32 %x @@ -221,8 +221,8 @@ define i1 @slt_smin4(i32 %a, i32 %y) { ; CHECK-LABEL: @slt_smin4( ; CHECK-NEXT: [[X:%.*]] = add i32 [[A:%.*]], 3 -; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[X]], [[Y:%.*]] -; CHECK-NEXT: ret i1 [[CMP1]] +; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[X]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP2]] ; %x = add i32 %a, 3 ; thwart complexity-based canonicalization %cmp1 = icmp slt i32 %y, %x diff --git a/llvm/test/Transforms/InstCombine/sub-ashr-or-to-icmp-select.ll b/llvm/test/Transforms/InstCombine/sub-ashr-or-to-icmp-select.ll --- a/llvm/test/Transforms/InstCombine/sub-ashr-or-to-icmp-select.ll +++ b/llvm/test/Transforms/InstCombine/sub-ashr-or-to-icmp-select.ll @@ -12,9 +12,8 @@ define i32 @clamp255_i32(i32 %x) { ; CHECK-LABEL: @clamp255_i32( -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], 255 -; CHECK-NEXT: [[OR:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 255 -; CHECK-NEXT: [[AND:%.*]] = and i32 [[OR]], 255 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[X:%.*]], i32 255) +; CHECK-NEXT: [[AND:%.*]] = and i32 [[TMP1]], 255 ; CHECK-NEXT: ret i32 [[AND]] ; %sub = sub nsw i32 255, %x diff --git a/llvm/test/Transforms/InstCombine/sub-minmax.ll b/llvm/test/Transforms/InstCombine/sub-minmax.ll --- a/llvm/test/Transforms/InstCombine/sub-minmax.ll +++ b/llvm/test/Transforms/InstCombine/sub-minmax.ll @@ -14,8 +14,8 @@ ; CHECK-LABEL: @max_na_b_minux_na( ; CHECK-NEXT: [[NOT:%.*]] = xor i32 [[A:%.*]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[NOT]], i32 [[B:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = sub i32 0, [[TMP1]] -; CHECK-NEXT: ret i32 [[TMP2]] +; CHECK-NEXT: [[X:%.*]] = sub i32 0, [[TMP1]] +; CHECK-NEXT: ret i32 [[X]] ; %not = xor i32 %A, -1 %l0 = icmp ult i32 %not, %B @@ -85,8 +85,8 @@ ; CHECK-LABEL: @max_b_na_minus_na( ; CHECK-NEXT: [[NOT:%.*]] = xor i32 [[A:%.*]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[NOT]], i32 [[B:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = sub i32 0, [[TMP1]] -; CHECK-NEXT: ret i32 [[TMP2]] +; CHECK-NEXT: [[X:%.*]] = sub i32 0, [[TMP1]] +; CHECK-NEXT: ret i32 [[X]] ; %not = xor i32 %A, -1 %l0 = icmp ugt i32 %not, %B @@ -112,8 +112,8 @@ define i32 @max_na_bi_minux_na(i32 %A, i32 %Bi) { ; CHECK-LABEL: @max_na_bi_minux_na( ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[BI:%.*]], i32 [[A:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = sub i32 0, [[TMP1]] -; CHECK-NEXT: ret i32 [[TMP2]] +; CHECK-NEXT: [[X:%.*]] = sub i32 0, [[TMP1]] +; CHECK-NEXT: ret i32 [[X]] ; %B = xor i32 %Bi, -1 %not = xor i32 %A, -1 @@ -139,8 +139,8 @@ define i32 @max_bi_na_minus_na(i32 %A, i32 %Bi) { ; CHECK-LABEL: @max_bi_na_minus_na( ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[BI:%.*]], i32 [[A:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = sub i32 0, [[TMP1]] -; CHECK-NEXT: ret i32 [[TMP2]] +; CHECK-NEXT: [[X:%.*]] = sub i32 0, [[TMP1]] +; CHECK-NEXT: ret i32 [[X]] ; %B = xor i32 %Bi, -1 %not = xor i32 %A, -1 @@ -166,11 +166,10 @@ define i32 @max_na_bi_minux_na_use(i32 %A, i32 %Bi) { ; CHECK-LABEL: @max_na_bi_minux_na_use( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[A:%.*]], -32 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[A]], i32 -32 -; CHECK-NEXT: [[L1:%.*]] = xor i32 [[TMP2]], -1 -; CHECK-NEXT: [[X:%.*]] = sub i32 [[A]], [[TMP2]] -; CHECK-NEXT: call void @use32(i32 [[L1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[A:%.*]], i32 -32) +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], -1 +; CHECK-NEXT: [[X:%.*]] = sub i32 [[A]], [[TMP1]] +; CHECK-NEXT: call void @use32(i32 [[TMP2]]) ; CHECK-NEXT: ret i32 [[X]] ; %not = xor i32 %A, -1 @@ -183,11 +182,10 @@ define i32 @na_minus_max_na_bi_use(i32 %A, i32 %Bi) { ; CHECK-LABEL: @na_minus_max_na_bi_use( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[A:%.*]], -32 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[A]], i32 -32 -; CHECK-NEXT: [[L1:%.*]] = xor i32 [[TMP2]], -1 -; CHECK-NEXT: [[X:%.*]] = sub i32 [[TMP2]], [[A]] -; CHECK-NEXT: call void @use32(i32 [[L1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[A:%.*]], i32 -32) +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], -1 +; CHECK-NEXT: [[X:%.*]] = sub i32 [[TMP1]], [[A]] +; CHECK-NEXT: call void @use32(i32 [[TMP2]]) ; CHECK-NEXT: ret i32 [[X]] ; %not = xor i32 %A, -1 @@ -200,11 +198,10 @@ define i32 @max_bi_na_minus_na_use(i32 %A, i32 %Bi) { ; CHECK-LABEL: @max_bi_na_minus_na_use( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[BI:%.*]], [[A:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[BI]], i32 [[A]] -; CHECK-NEXT: [[L1:%.*]] = xor i32 [[TMP2]], -1 -; CHECK-NEXT: [[X:%.*]] = sub i32 [[A]], [[TMP2]] -; CHECK-NEXT: call void @use32(i32 [[L1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[BI:%.*]], i32 [[A:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], -1 +; CHECK-NEXT: [[X:%.*]] = sub i32 [[A]], [[TMP1]] +; CHECK-NEXT: call void @use32(i32 [[TMP2]]) ; CHECK-NEXT: ret i32 [[X]] ; %not = xor i32 %A, -1 @@ -218,11 +215,10 @@ define i32 @na_minus_max_bi_na_use(i32 %A, i32 %Bi) { ; CHECK-LABEL: @na_minus_max_bi_na_use( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[BI:%.*]], [[A:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[BI]], i32 [[A]] -; CHECK-NEXT: [[L1:%.*]] = xor i32 [[TMP2]], -1 -; CHECK-NEXT: [[X:%.*]] = sub i32 [[TMP2]], [[A]] -; CHECK-NEXT: call void @use32(i32 [[L1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[BI:%.*]], i32 [[A:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], -1 +; CHECK-NEXT: [[X:%.*]] = sub i32 [[TMP1]], [[A]] +; CHECK-NEXT: call void @use32(i32 [[TMP2]]) ; CHECK-NEXT: ret i32 [[X]] ; %not = xor i32 %A, -1 @@ -238,10 +234,9 @@ define i32 @max_na_bi_minux_na_use2(i32 %A, i32 %Bi) { ; CHECK-LABEL: @max_na_bi_minux_na_use2( ; CHECK-NEXT: [[NOT:%.*]] = xor i32 [[A:%.*]], -1 -; CHECK-NEXT: [[L0:%.*]] = icmp ult i32 [[NOT]], 31 -; CHECK-NEXT: [[L1:%.*]] = select i1 [[L0]], i32 [[NOT]], i32 31 -; CHECK-NEXT: [[X:%.*]] = sub i32 [[L1]], [[NOT]] -; CHECK-NEXT: call void @use32(i32 [[L1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umin.i32(i32 [[NOT]], i32 31) +; CHECK-NEXT: [[X:%.*]] = sub i32 [[TMP1]], [[NOT]] +; CHECK-NEXT: call void @use32(i32 [[TMP1]]) ; CHECK-NEXT: call void @use32(i32 [[NOT]]) ; CHECK-NEXT: ret i32 [[X]] ; @@ -257,10 +252,9 @@ define i32 @na_minus_max_na_bi_use2(i32 %A, i32 %Bi) { ; CHECK-LABEL: @na_minus_max_na_bi_use2( ; CHECK-NEXT: [[NOT:%.*]] = xor i32 [[A:%.*]], -1 -; CHECK-NEXT: [[L0:%.*]] = icmp ult i32 [[NOT]], 31 -; CHECK-NEXT: [[L1:%.*]] = select i1 [[L0]], i32 [[NOT]], i32 31 -; CHECK-NEXT: [[X:%.*]] = sub i32 [[NOT]], [[L1]] -; CHECK-NEXT: call void @use32(i32 [[L1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umin.i32(i32 [[NOT]], i32 31) +; CHECK-NEXT: [[X:%.*]] = sub i32 [[NOT]], [[TMP1]] +; CHECK-NEXT: call void @use32(i32 [[TMP1]]) ; CHECK-NEXT: call void @use32(i32 [[NOT]]) ; CHECK-NEXT: ret i32 [[X]] ; @@ -276,11 +270,10 @@ define i32 @max_bi_na_minus_na_use2(i32 %A, i32 %Bi) { ; CHECK-LABEL: @max_bi_na_minus_na_use2( ; CHECK-NEXT: [[NOT:%.*]] = xor i32 [[A:%.*]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[BI:%.*]], [[A]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[BI]], i32 [[A]] -; CHECK-NEXT: [[L1:%.*]] = xor i32 [[TMP2]], -1 -; CHECK-NEXT: [[X:%.*]] = sub i32 [[A]], [[TMP2]] -; CHECK-NEXT: call void @use32(i32 [[L1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[BI:%.*]], i32 [[A]]) +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], -1 +; CHECK-NEXT: [[X:%.*]] = sub i32 [[A]], [[TMP1]] +; CHECK-NEXT: call void @use32(i32 [[TMP2]]) ; CHECK-NEXT: call void @use32(i32 [[NOT]]) ; CHECK-NEXT: ret i32 [[X]] ; @@ -297,11 +290,10 @@ define i32 @na_minus_max_bi_na_use2(i32 %A, i32 %Bi) { ; CHECK-LABEL: @na_minus_max_bi_na_use2( ; CHECK-NEXT: [[NOT:%.*]] = xor i32 [[A:%.*]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[BI:%.*]], [[A]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[BI]], i32 [[A]] -; CHECK-NEXT: [[L1:%.*]] = xor i32 [[TMP2]], -1 -; CHECK-NEXT: [[X:%.*]] = sub i32 [[TMP2]], [[A]] -; CHECK-NEXT: call void @use32(i32 [[L1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[BI:%.*]], i32 [[A]]) +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], -1 +; CHECK-NEXT: [[X:%.*]] = sub i32 [[TMP1]], [[A]] +; CHECK-NEXT: call void @use32(i32 [[TMP2]]) ; CHECK-NEXT: call void @use32(i32 [[NOT]]) ; CHECK-NEXT: ret i32 [[X]] ; @@ -317,14 +309,13 @@ define i8 @umin_not_sub(i8 %x, i8 %y) { ; CHECK-LABEL: @umin_not_sub( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i8 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i8 [[X]], i8 [[Y]] -; CHECK-NEXT: [[MINXY:%.*]] = xor i8 [[TMP2]], -1 -; CHECK-NEXT: [[SUBX:%.*]] = sub i8 [[TMP2]], [[X]] -; CHECK-NEXT: [[SUBY:%.*]] = sub i8 [[TMP2]], [[Y]] +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.umax.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = xor i8 [[TMP1]], -1 +; CHECK-NEXT: [[SUBX:%.*]] = sub i8 [[TMP1]], [[X]] +; CHECK-NEXT: [[SUBY:%.*]] = sub i8 [[TMP1]], [[Y]] ; CHECK-NEXT: call void @use8(i8 [[SUBX]]) ; CHECK-NEXT: call void @use8(i8 [[SUBY]]) -; CHECK-NEXT: ret i8 [[MINXY]] +; CHECK-NEXT: ret i8 [[TMP2]] ; %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 @@ -339,14 +330,13 @@ define i8 @umin_not_sub_rev(i8 %x, i8 %y) { ; CHECK-LABEL: @umin_not_sub_rev( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i8 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i8 [[X]], i8 [[Y]] -; CHECK-NEXT: [[MINXY:%.*]] = xor i8 [[TMP2]], -1 -; CHECK-NEXT: [[SUBX:%.*]] = sub i8 [[X]], [[TMP2]] -; CHECK-NEXT: [[SUBY:%.*]] = sub i8 [[Y]], [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.umax.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = xor i8 [[TMP1]], -1 +; CHECK-NEXT: [[SUBX:%.*]] = sub i8 [[X]], [[TMP1]] +; CHECK-NEXT: [[SUBY:%.*]] = sub i8 [[Y]], [[TMP1]] ; CHECK-NEXT: call void @use8(i8 [[SUBX]]) ; CHECK-NEXT: call void @use8(i8 [[SUBY]]) -; CHECK-NEXT: ret i8 [[MINXY]] +; CHECK-NEXT: ret i8 [[TMP2]] ; %nx = xor i8 %x, -1 %ny = xor i8 %y, -1 @@ -361,15 +351,13 @@ define void @umin3_not_all_ops_extra_uses_invert_subs(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @umin3_not_all_ops_extra_uses_invert_subs( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i8 [[X:%.*]], [[Z:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i8 [[X]], i8 [[Z]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i8 [[TMP2]], [[Y:%.*]] -; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i8 [[TMP2]], i8 [[Y]] -; CHECK-NEXT: [[TMP5:%.*]] = xor i8 [[TMP4]], -1 -; CHECK-NEXT: [[XMIN:%.*]] = sub i8 [[TMP4]], [[X]] -; CHECK-NEXT: [[YMIN:%.*]] = sub i8 [[TMP4]], [[Y]] -; CHECK-NEXT: [[ZMIN:%.*]] = sub i8 [[TMP4]], [[Z]] -; CHECK-NEXT: call void @use8(i8 [[TMP5]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.umax.i8(i8 [[X:%.*]], i8 [[Z:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.umax.i8(i8 [[Y:%.*]], i8 [[TMP1]]) +; CHECK-NEXT: [[TMP3:%.*]] = xor i8 [[TMP2]], -1 +; CHECK-NEXT: [[XMIN:%.*]] = sub i8 [[TMP2]], [[X]] +; CHECK-NEXT: [[YMIN:%.*]] = sub i8 [[TMP2]], [[Y]] +; CHECK-NEXT: [[ZMIN:%.*]] = sub i8 [[TMP2]], [[Z]] +; CHECK-NEXT: call void @use8(i8 [[TMP3]]) ; CHECK-NEXT: call void @use8(i8 [[XMIN]]) ; CHECK-NEXT: call void @use8(i8 [[YMIN]]) ; CHECK-NEXT: call void @use8(i8 [[ZMIN]]) diff --git a/llvm/test/Transforms/InstCombine/sub.ll b/llvm/test/Transforms/InstCombine/sub.ll --- a/llvm/test/Transforms/InstCombine/sub.ll +++ b/llvm/test/Transforms/InstCombine/sub.ll @@ -1226,9 +1226,8 @@ define i32 @test64(i32 %x) { ; CHECK-LABEL: @test64( -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], 255 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 255 -; CHECK-NEXT: [[DOTNEG:%.*]] = add nsw i32 [[TMP2]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[X:%.*]], i32 255) +; CHECK-NEXT: [[DOTNEG:%.*]] = add nsw i32 [[TMP1]], 1 ; CHECK-NEXT: ret i32 [[DOTNEG]] ; %1 = xor i32 %x, -1 @@ -1240,9 +1239,8 @@ define i32 @test65(i32 %x) { ; CHECK-LABEL: @test65( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], -256 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 -256 -; CHECK-NEXT: [[DOTNEG:%.*]] = add i32 [[TMP2]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 -256) +; CHECK-NEXT: [[DOTNEG:%.*]] = add i32 [[TMP1]], 1 ; CHECK-NEXT: ret i32 [[DOTNEG]] ; %1 = xor i32 %x, -1 @@ -1254,9 +1252,8 @@ define i32 @test66(i32 %x) { ; CHECK-LABEL: @test66( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[X:%.*]], -101 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 -101 -; CHECK-NEXT: [[DOTNEG:%.*]] = add nuw i32 [[TMP2]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umin.i32(i32 [[X:%.*]], i32 -101) +; CHECK-NEXT: [[DOTNEG:%.*]] = add nuw i32 [[TMP1]], 1 ; CHECK-NEXT: ret i32 [[DOTNEG]] ; %1 = xor i32 %x, -1 @@ -1268,9 +1265,8 @@ define i32 @test67(i32 %x) { ; CHECK-LABEL: @test67( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[X:%.*]], 100 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 100 -; CHECK-NEXT: [[DOTNEG:%.*]] = add i32 [[TMP2]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[X:%.*]], i32 100) +; CHECK-NEXT: [[DOTNEG:%.*]] = add i32 [[TMP1]], 1 ; CHECK-NEXT: ret i32 [[DOTNEG]] ; %1 = xor i32 %x, -1 @@ -1283,9 +1279,8 @@ ; Check splat vectors too define <2 x i32> @test68(<2 x i32> %x) { ; CHECK-LABEL: @test68( -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[X]], <2 x i32> -; CHECK-NEXT: [[DOTNEG:%.*]] = add nsw <2 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> ) +; CHECK-NEXT: [[DOTNEG:%.*]] = add nsw <2 x i32> [[TMP1]], ; CHECK-NEXT: ret <2 x i32> [[DOTNEG]] ; %1 = xor <2 x i32> %x, @@ -1298,9 +1293,8 @@ ; And non-splat constant vectors. define <2 x i32> @test69(<2 x i32> %x) { ; CHECK-LABEL: @test69( -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[X]], <2 x i32> -; CHECK-NEXT: [[DOTNEG:%.*]] = add <2 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> ) +; CHECK-NEXT: [[DOTNEG:%.*]] = add <2 x i32> [[TMP1]], ; CHECK-NEXT: ret <2 x i32> [[DOTNEG]] ; %1 = xor <2 x i32> %x, diff --git a/llvm/test/Transforms/InstCombine/truncating-saturate.ll b/llvm/test/Transforms/InstCombine/truncating-saturate.ll --- a/llvm/test/Transforms/InstCombine/truncating-saturate.ll +++ b/llvm/test/Transforms/InstCombine/truncating-saturate.ll @@ -8,12 +8,10 @@ define i8 @testi16i8(i16 %add) { ; CHECK-LABEL: @testi16i8( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i16 [[ADD:%.*]], -128 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i16 [[ADD]], i16 -128 -; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i16 [[TMP2]], 127 -; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i16 [[TMP2]], i16 127 -; CHECK-NEXT: [[TMP5:%.*]] = trunc i16 [[TMP4]] to i8 -; CHECK-NEXT: ret i8 [[TMP5]] +; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.smax.i16(i16 [[ADD:%.*]], i16 -128) +; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.smin.i16(i16 [[TMP1]], i16 127) +; CHECK-NEXT: [[TMP3:%.*]] = trunc i16 [[TMP2]] to i8 +; CHECK-NEXT: ret i8 [[TMP3]] ; %sh = lshr i16 %add, 8 %conv.i = trunc i16 %sh to i8 @@ -29,12 +27,10 @@ define i32 @testi64i32(i64 %add) { ; CHECK-LABEL: @testi64i32( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i64 [[ADD:%.*]], -2147483648 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[ADD]], i64 -2147483648 -; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i64 [[TMP2]], 2147483647 -; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i64 [[TMP2]], i64 2147483647 -; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32 -; CHECK-NEXT: ret i32 [[TMP5]] +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.smax.i64(i64 [[ADD:%.*]], i64 -2147483648) +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.smin.i64(i64 [[TMP1]], i64 2147483647) +; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32 +; CHECK-NEXT: ret i32 [[TMP3]] ; %sh = lshr i64 %add, 32 %conv.i = trunc i64 %sh to i32 @@ -50,12 +46,10 @@ define i16 @testi32i16i8(i32 %add) { ; CHECK-LABEL: @testi32i16i8( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[ADD:%.*]], -128 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[ADD]], i32 -128 -; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 127 -; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 127 -; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 -; CHECK-NEXT: ret i16 [[TMP5]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[ADD:%.*]], i32 -128) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP1]], i32 127) +; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16 +; CHECK-NEXT: ret i16 [[TMP3]] ; %a = add i32 %add, 128 %cmp = icmp ult i32 %a, 256 @@ -68,12 +62,10 @@ define <4 x i16> @testv4i32i16i8(<4 x i32> %add) { ; CHECK-LABEL: @testv4i32i16i8( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[ADD:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[ADD]], <4 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <4 x i32> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP2]], <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = trunc <4 x i32> [[TMP4]] to <4 x i16> -; CHECK-NEXT: ret <4 x i16> [[TMP5]] +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[ADD:%.*]], <4 x i32> ) +; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[TMP1]], <4 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16> +; CHECK-NEXT: ret <4 x i16> [[TMP3]] ; %a = add <4 x i32> %add, %cmp = icmp ult <4 x i32> %a, @@ -86,11 +78,9 @@ define i32 @testi32i32i8(i32 %add) { ; CHECK-LABEL: @testi32i32i8( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[ADD:%.*]], -128 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[ADD]], i32 -128 -; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 127 -; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 127 -; CHECK-NEXT: ret i32 [[TMP4]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[ADD:%.*]], i32 -128) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP1]], i32 127) +; CHECK-NEXT: ret i32 [[TMP2]] ; %a = add i32 %add, 128 %cmp = icmp ult i32 %a, 256 @@ -103,11 +93,9 @@ define i16 @test_truncfirst(i32 %add) { ; CHECK-LABEL: @test_truncfirst( ; CHECK-NEXT: [[T:%.*]] = trunc i32 [[ADD:%.*]] to i16 -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i16 [[T]], -128 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i16 [[T]], i16 -128 -; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i16 [[TMP2]], 127 -; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i16 [[TMP2]], i16 127 -; CHECK-NEXT: ret i16 [[TMP4]] +; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.smax.i16(i16 [[T]], i16 -128) +; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.smin.i16(i16 [[TMP1]], i16 127) +; CHECK-NEXT: ret i16 [[TMP2]] ; %t = trunc i32 %add to i16 %a = add i16 %t, 128 @@ -159,12 +147,10 @@ define <4 x i8> @testv4i16i8(<4 x i16> %add) { ; CHECK-LABEL: @testv4i16i8( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i16> [[ADD:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i16> [[ADD]], <4 x i16> -; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <4 x i16> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i16> [[TMP2]], <4 x i16> -; CHECK-NEXT: [[TMP5:%.*]] = trunc <4 x i16> [[TMP4]] to <4 x i8> -; CHECK-NEXT: ret <4 x i8> [[TMP5]] +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i16> @llvm.smax.v4i16(<4 x i16> [[ADD:%.*]], <4 x i16> ) +; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[TMP1]], <4 x i16> ) +; CHECK-NEXT: [[TMP3:%.*]] = trunc <4 x i16> [[TMP2]] to <4 x i8> +; CHECK-NEXT: ret <4 x i8> [[TMP3]] ; %sh = lshr <4 x i16> %add, %conv.i = trunc <4 x i16> %sh to <4 x i8> @@ -200,12 +186,10 @@ define i8 @testi16i8_revcmp(i16 %add) { ; CHECK-LABEL: @testi16i8_revcmp( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i16 [[ADD:%.*]], -128 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i16 [[ADD]], i16 -128 -; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i16 [[TMP2]], 127 -; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i16 [[TMP2]], i16 127 -; CHECK-NEXT: [[TMP5:%.*]] = trunc i16 [[TMP4]] to i8 -; CHECK-NEXT: ret i8 [[TMP5]] +; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.smax.i16(i16 [[ADD:%.*]], i16 -128) +; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.smin.i16(i16 [[TMP1]], i16 127) +; CHECK-NEXT: [[TMP3:%.*]] = trunc i16 [[TMP2]] to i8 +; CHECK-NEXT: ret i8 [[TMP3]] ; %sh = lshr i16 %add, 8 %conv.i = trunc i16 %sh to i8 @@ -221,12 +205,10 @@ define i8 @testi16i8_revselect(i16 %add) { ; CHECK-LABEL: @testi16i8_revselect( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i16 [[ADD:%.*]], -128 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i16 [[ADD]], i16 -128 -; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i16 [[TMP2]], 127 -; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i16 [[TMP2]], i16 127 -; CHECK-NEXT: [[TMP5:%.*]] = trunc i16 [[TMP4]] to i8 -; CHECK-NEXT: ret i8 [[TMP5]] +; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.smax.i16(i16 [[ADD:%.*]], i16 -128) +; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.smin.i16(i16 [[TMP1]], i16 127) +; CHECK-NEXT: [[TMP3:%.*]] = trunc i16 [[TMP2]] to i8 +; CHECK-NEXT: ret i8 [[TMP3]] ; %sh = lshr i16 %add, 8 %conv.i = trunc i16 %sh to i8 diff --git a/llvm/test/Transforms/InstCombine/umax-icmp.ll b/llvm/test/Transforms/InstCombine/umax-icmp.ll --- a/llvm/test/Transforms/InstCombine/umax-icmp.ll +++ b/llvm/test/Transforms/InstCombine/umax-icmp.ll @@ -137,8 +137,8 @@ define i1 @ne_umax2(i32 %x, i32 %y) { ; CHECK-LABEL: @ne_umax2( -; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt i32 [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: ret i1 [[CMP1]] +; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP2]] ; %cmp1 = icmp ugt i32 %y, %x %sel = select i1 %cmp1, i32 %y, i32 %x @@ -166,8 +166,8 @@ define i1 @ne_umax4(i32 %a, i32 %y) { ; CHECK-LABEL: @ne_umax4( ; CHECK-NEXT: [[X:%.*]] = add i32 [[A:%.*]], 3 -; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[X]], [[Y:%.*]] -; CHECK-NEXT: ret i1 [[CMP1]] +; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 [[X]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP2]] ; %x = add i32 %a, 3 ; thwart complexity-based canonicalization %cmp1 = icmp ugt i32 %y, %x @@ -193,8 +193,8 @@ define i1 @ugt_umax2(i32 %x, i32 %y) { ; CHECK-LABEL: @ugt_umax2( -; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt i32 [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: ret i1 [[CMP1]] +; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP2]] ; %cmp1 = icmp ugt i32 %y, %x %sel = select i1 %cmp1, i32 %y, i32 %x @@ -222,8 +222,8 @@ define i1 @ugt_umax4(i32 %a, i32 %y) { ; CHECK-LABEL: @ugt_umax4( ; CHECK-NEXT: [[X:%.*]] = add i32 [[A:%.*]], 3 -; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[X]], [[Y:%.*]] -; CHECK-NEXT: ret i1 [[CMP1]] +; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 [[X]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP2]] ; %x = add i32 %a, 3 ; thwart complexity-based canonicalization %cmp1 = icmp ugt i32 %y, %x diff --git a/llvm/test/Transforms/InstCombine/umin-icmp.ll b/llvm/test/Transforms/InstCombine/umin-icmp.ll --- a/llvm/test/Transforms/InstCombine/umin-icmp.ll +++ b/llvm/test/Transforms/InstCombine/umin-icmp.ll @@ -137,8 +137,8 @@ define i1 @ne_umin2(i32 %x, i32 %y) { ; CHECK-LABEL: @ne_umin2( -; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: ret i1 [[CMP1]] +; CHECK-NEXT: [[CMP2:%.*]] = icmp ugt i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP2]] ; %cmp1 = icmp ult i32 %y, %x %sel = select i1 %cmp1, i32 %y, i32 %x @@ -166,8 +166,8 @@ define i1 @ne_umin4(i32 %a, i32 %y) { ; CHECK-LABEL: @ne_umin4( ; CHECK-NEXT: [[X:%.*]] = add i32 [[A:%.*]], 3 -; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt i32 [[X]], [[Y:%.*]] -; CHECK-NEXT: ret i1 [[CMP1]] +; CHECK-NEXT: [[CMP2:%.*]] = icmp ugt i32 [[X]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP2]] ; %x = add i32 %a, 3 ; thwart complexity-based canonicalization %cmp1 = icmp ult i32 %y, %x @@ -193,8 +193,8 @@ define i1 @ult_umin2(i32 %x, i32 %y) { ; CHECK-LABEL: @ult_umin2( -; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: ret i1 [[CMP1]] +; CHECK-NEXT: [[CMP2:%.*]] = icmp ugt i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP2]] ; %cmp1 = icmp ult i32 %y, %x %sel = select i1 %cmp1, i32 %y, i32 %x @@ -222,8 +222,8 @@ define i1 @ult_umin4(i32 %a, i32 %y) { ; CHECK-LABEL: @ult_umin4( ; CHECK-NEXT: [[X:%.*]] = add i32 [[A:%.*]], 3 -; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt i32 [[X]], [[Y:%.*]] -; CHECK-NEXT: ret i1 [[CMP1]] +; CHECK-NEXT: [[CMP2:%.*]] = icmp ugt i32 [[X]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP2]] ; %x = add i32 %a, 3 ; thwart complexity-based canonicalization %cmp1 = icmp ult i32 %y, %x diff --git a/llvm/test/Transforms/InstCombine/with_overflow.ll b/llvm/test/Transforms/InstCombine/with_overflow.ll --- a/llvm/test/Transforms/InstCombine/with_overflow.ll +++ b/llvm/test/Transforms/InstCombine/with_overflow.ll @@ -570,11 +570,10 @@ define { i8, i1 } @sadd_always_overflow(i8 %x) nounwind { ; CHECK-LABEL: @sadd_always_overflow( -; CHECK-NEXT: [[C:%.*]] = icmp sgt i8 [[X:%.*]], 100 -; CHECK-NEXT: [[Y:%.*]] = select i1 [[C]], i8 [[X]], i8 100 -; CHECK-NEXT: [[A:%.*]] = add nuw i8 [[Y]], 28 -; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { i8, i1 } { i8 undef, i1 true }, i8 [[A]], 0 -; CHECK-NEXT: ret { i8, i1 } [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.smax.i8(i8 [[X:%.*]], i8 100) +; CHECK-NEXT: [[A:%.*]] = add nuw i8 [[TMP1]], 28 +; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { i8, i1 } { i8 undef, i1 true }, i8 [[A]], 0 +; CHECK-NEXT: ret { i8, i1 } [[TMP2]] ; %c = icmp sgt i8 %x, 100 %y = select i1 %c, i8 %x, i8 100 @@ -584,11 +583,10 @@ define { i8, i1 } @ssub_always_overflow(i8 %x) nounwind { ; CHECK-LABEL: @ssub_always_overflow( -; CHECK-NEXT: [[C:%.*]] = icmp sgt i8 [[X:%.*]], 29 -; CHECK-NEXT: [[Y:%.*]] = select i1 [[C]], i8 [[X]], i8 29 -; CHECK-NEXT: [[A:%.*]] = sub nuw i8 -100, [[Y]] -; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { i8, i1 } { i8 undef, i1 true }, i8 [[A]], 0 -; CHECK-NEXT: ret { i8, i1 } [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.smax.i8(i8 [[X:%.*]], i8 29) +; CHECK-NEXT: [[A:%.*]] = sub nuw i8 -100, [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { i8, i1 } { i8 undef, i1 true }, i8 [[A]], 0 +; CHECK-NEXT: ret { i8, i1 } [[TMP2]] ; %c = icmp sgt i8 %x, 29 %y = select i1 %c, i8 %x, i8 29 @@ -598,9 +596,8 @@ define { i8, i1 } @smul_always_overflow(i8 %x) nounwind { ; CHECK-LABEL: @smul_always_overflow( -; CHECK-NEXT: [[C:%.*]] = icmp sgt i8 [[X:%.*]], 100 -; CHECK-NEXT: [[Y:%.*]] = select i1 [[C]], i8 [[X]], i8 100 -; CHECK-NEXT: [[A:%.*]] = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 [[Y]], i8 2) +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.smax.i8(i8 [[X:%.*]], i8 100) +; CHECK-NEXT: [[A:%.*]] = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 [[TMP1]], i8 2) ; CHECK-NEXT: ret { i8, i1 } [[A]] ; %c = icmp sgt i8 %x, 100 diff --git a/llvm/test/Transforms/InstCombine/xor.ll b/llvm/test/Transforms/InstCombine/xor.ll --- a/llvm/test/Transforms/InstCombine/xor.ll +++ b/llvm/test/Transforms/InstCombine/xor.ll @@ -665,9 +665,8 @@ define i32 @test39(i32 %x) { ; CHECK-LABEL: @test39( -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], 255 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 255 -; CHECK-NEXT: ret i32 [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[X:%.*]], i32 255) +; CHECK-NEXT: ret i32 [[TMP1]] ; %1 = xor i32 %x, -1 %2 = icmp sgt i32 %1, -256 @@ -679,9 +678,8 @@ define i32 @test40(i32 %x, i32 %y) { ; CHECK-LABEL: @test40( ; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[Y:%.*]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[TMP1]], [[X:%.*]] -; CHECK-NEXT: [[RES:%.*]] = select i1 [[TMP2]], i32 [[X]], i32 [[TMP1]] -; CHECK-NEXT: ret i32 [[RES]] +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smin.i32(i32 [[X:%.*]], i32 [[TMP1]]) +; CHECK-NEXT: ret i32 [[TMP2]] ; %notx = xor i32 %x, -1 %cmp1 = icmp sgt i32 %notx, %y @@ -693,9 +691,8 @@ define i32 @test41(i32 %x, i32 %y) { ; CHECK-LABEL: @test41( ; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[Y:%.*]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], [[X:%.*]] -; CHECK-NEXT: [[RES:%.*]] = select i1 [[TMP2]], i32 [[X]], i32 [[TMP1]] -; CHECK-NEXT: ret i32 [[RES]] +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 [[TMP1]]) +; CHECK-NEXT: ret i32 [[TMP2]] ; %notx = xor i32 %x, -1 %cmp1 = icmp slt i32 %notx, %y @@ -707,9 +704,8 @@ define i32 @test42(i32 %x, i32 %y) { ; CHECK-LABEL: @test42( ; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[Y:%.*]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt i32 [[TMP1]], [[X:%.*]] -; CHECK-NEXT: [[RES:%.*]] = select i1 [[TMP2]], i32 [[X]], i32 [[TMP1]] -; CHECK-NEXT: ret i32 [[RES]] +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.umin.i32(i32 [[X:%.*]], i32 [[TMP1]]) +; CHECK-NEXT: ret i32 [[TMP2]] ; %notx = xor i32 %x, -1 %cmp1 = icmp ugt i32 %notx, %y @@ -721,9 +717,8 @@ define i32 @test43(i32 %x, i32 %y) { ; CHECK-LABEL: @test43( ; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[Y:%.*]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TMP1]], [[X:%.*]] -; CHECK-NEXT: [[RES:%.*]] = select i1 [[TMP2]], i32 [[X]], i32 [[TMP1]] -; CHECK-NEXT: ret i32 [[RES]] +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.umax.i32(i32 [[X:%.*]], i32 [[TMP1]]) +; CHECK-NEXT: ret i32 [[TMP2]] ; %notx = xor i32 %x, -1 %cmp1 = icmp ult i32 %notx, %y @@ -735,9 +730,8 @@ define i32 @test44(i32 %x, i32 %y) { ; CHECK-LABEL: @test44( ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 -4, [[Y:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt i32 [[TMP1]], [[X:%.*]] -; CHECK-NEXT: [[RES:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 [[X]] -; CHECK-NEXT: ret i32 [[RES]] +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.umax.i32(i32 [[X:%.*]], i32 [[TMP1]]) +; CHECK-NEXT: ret i32 [[TMP2]] ; %z = add i32 %y, 3 ; thwart complexity-based canonicalization %notx = xor i32 %x, -1 @@ -749,9 +743,8 @@ define i32 @test45(i32 %x, i32 %y) { ; CHECK-LABEL: @test45( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[Y]], i32 [[X]] -; CHECK-NEXT: ret i32 [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[Y:%.*]], i32 [[X:%.*]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; %z = xor i32 %y, -1 %notx = xor i32 %x, -1 @@ -764,9 +757,8 @@ ; Check that we work with splat vectors also. define <4 x i32> @test46(<4 x i32> %x) { ; CHECK-LABEL: @test46( -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[X]], <4 x i32> -; CHECK-NEXT: ret <4 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[X:%.*]], <4 x i32> ) +; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = xor <4 x i32> %x, %2 = icmp sgt <4 x i32> %1, @@ -779,10 +771,9 @@ define i32 @test47(i32 %x, i32 %y, i32 %z) { ; CHECK-LABEL: @test47( ; CHECK-NEXT: [[NOTX:%.*]] = xor i32 [[X:%.*]], -1 -; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt i32 [[NOTX]], [[Y:%.*]] -; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[CMP1]], i32 [[NOTX]], i32 [[Y]] -; CHECK-NEXT: [[UMIN:%.*]] = xor i32 [[UMAX]], -1 -; CHECK-NEXT: [[ADD:%.*]] = add i32 [[UMAX]], [[Z:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[NOTX]], i32 [[Y:%.*]]) +; CHECK-NEXT: [[UMIN:%.*]] = xor i32 [[TMP1]], -1 +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP1]], [[Z:%.*]] ; CHECK-NEXT: [[RES:%.*]] = mul i32 [[ADD]], [[UMIN]] ; CHECK-NEXT: ret i32 [[RES]] ; @@ -798,9 +789,8 @@ define i32 @test48(i32 %x) { ; CHECK-LABEL: @test48( ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X:%.*]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], -1 -; CHECK-NEXT: [[D:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 -1 -; CHECK-NEXT: ret i32 [[D]] +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP1]], i32 -1) +; CHECK-NEXT: ret i32 [[TMP2]] ; %a = sub i32 -2, %x %b = icmp sgt i32 %a, 0 @@ -812,9 +802,8 @@ define <2 x i32> @test48vec(<2 x i32> %x) { ; CHECK-LABEL: @test48vec( ; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <2 x i32> [[TMP1]], -; CHECK-NEXT: [[D:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[TMP1]], <2 x i32> -; CHECK-NEXT: ret <2 x i32> [[D]] +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[TMP1]], <2 x i32> ) +; CHECK-NEXT: ret <2 x i32> [[TMP2]] ; %a = sub <2 x i32> , %x %b = icmp sgt <2 x i32> %a, zeroinitializer @@ -826,9 +815,8 @@ define i32 @test49(i32 %x) { ; CHECK-LABEL: @test49( ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 1, [[X:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[TMP1]], 0 -; CHECK-NEXT: [[D:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 0 -; CHECK-NEXT: ret i32 [[D]] +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP1]], i32 0) +; CHECK-NEXT: ret i32 [[TMP2]] ; %a = add i32 %x, -2 %b = icmp slt i32 %a, -1 @@ -840,9 +828,8 @@ define <2 x i32> @test49vec(<2 x i32> %x) { ; CHECK-LABEL: @test49vec( ; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i32> , [[X:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <2 x i32> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[D:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[TMP1]], <2 x i32> zeroinitializer -; CHECK-NEXT: ret <2 x i32> [[D]] +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[TMP1]], <2 x i32> zeroinitializer) +; CHECK-NEXT: ret <2 x i32> [[TMP2]] ; %a = add <2 x i32> %x, %b = icmp slt <2 x i32> %a, @@ -855,9 +842,8 @@ ; CHECK-LABEL: @test50( ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 1, [[X:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[Y:%.*]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[E:%.*]] = select i1 [[TMP3]], i32 [[TMP1]], i32 [[TMP2]] -; CHECK-NEXT: ret i32 [[E]] +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP1]], i32 [[TMP2]]) +; CHECK-NEXT: ret i32 [[TMP3]] ; %a = add i32 %x, -2 %b = sub i32 -2, %y @@ -871,9 +857,8 @@ ; CHECK-LABEL: @test50vec( ; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i32> , [[X:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i32> [[Y:%.*]], -; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <2 x i32> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[E:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[TMP1]], <2 x i32> [[TMP2]] -; CHECK-NEXT: ret <2 x i32> [[E]] +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]]) +; CHECK-NEXT: ret <2 x i32> [[TMP3]] ; %a = add <2 x i32> %x, %b = sub <2 x i32> , %y @@ -887,9 +872,8 @@ ; CHECK-LABEL: @test51( ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 -3, [[X:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[Y:%.*]], -3 -; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[E:%.*]] = select i1 [[TMP3]], i32 [[TMP1]], i32 [[TMP2]] -; CHECK-NEXT: ret i32 [[E]] +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP1]], i32 [[TMP2]]) +; CHECK-NEXT: ret i32 [[TMP3]] ; %a = add i32 %x, 2 %b = sub i32 2, %y @@ -903,9 +887,8 @@ ; CHECK-LABEL: @test51vec( ; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i32> , [[X:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i32> [[Y:%.*]], -; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <2 x i32> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[E:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[TMP1]], <2 x i32> [[TMP2]] -; CHECK-NEXT: ret <2 x i32> [[E]] +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]]) +; CHECK-NEXT: ret <2 x i32> [[TMP3]] ; %a = add <2 x i32> %x, %b = sub <2 x i32> , %y diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll @@ -20,8 +20,8 @@ ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_MASKED_LOAD]]) ; CHECK-NEXT: [[TMP3]] = add i32 [[TMP2]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 -; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -76,8 +76,8 @@ ; CHECK-NEXT: [[TMP10]] = add i32 [[TMP9]], [[TMP8]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], -; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 -; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 +; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -442,34 +442,32 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP3]] = select <4 x i1> [[TMP2]], <4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP2]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 -; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 +; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP3]]) +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP2]]) ; CHECK-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 256, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP5]], [[MIDDLE_BLOCK]] ], [ 1000, [[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ 1000, [[ENTRY]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[RESULT_08:%.*]] = phi i32 [ [[V0:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[RESULT_08:%.*]] = phi i32 [ [[TMP5:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDVARS_IV]] ; CHECK-NEXT: [[L0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[C0:%.*]] = icmp slt i32 [[RESULT_08]], [[L0]] -; CHECK-NEXT: [[V0]] = select i1 [[C0]], i32 [[RESULT_08]], i32 [[L0]] +; CHECK-NEXT: [[TMP5]] = call i32 @llvm.smin.i32(i32 [[RESULT_08]], i32 [[L0]]) ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], 257 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ [[V0]], [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ [[TMP5]], [[FOR_BODY]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] ; entry: @@ -499,34 +497,32 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP3]] = select <4 x i1> [[TMP2]], <4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP2]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 -; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 +; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP3]]) +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP2]]) ; CHECK-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 256, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP5]], [[MIDDLE_BLOCK]] ], [ 1000, [[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ 1000, [[ENTRY]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[RESULT_08:%.*]] = phi i32 [ [[V0:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[RESULT_08:%.*]] = phi i32 [ [[TMP5:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDVARS_IV]] ; CHECK-NEXT: [[L0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[C0:%.*]] = icmp ugt i32 [[RESULT_08]], [[L0]] -; CHECK-NEXT: [[V0]] = select i1 [[C0]], i32 [[RESULT_08]], i32 [[L0]] +; CHECK-NEXT: [[TMP5]] = call i32 @llvm.umax.i32(i32 [[RESULT_08]], i32 [[L0]]) ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], 257 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ [[V0]], [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ [[TMP5]], [[FOR_BODY]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-fold-multiple-icmps.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-fold-multiple-icmps.ll --- a/llvm/test/Transforms/LoopVectorize/ARM/tail-fold-multiple-icmps.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-fold-multiple-icmps.ll @@ -16,43 +16,39 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], [[VEC_PHI1]] -; CHECK-NEXT: [[TMP3]] = select <4 x i1> [[TMP2]], <4 x i32> [[WIDE_LOAD]], <4 x i32> [[VEC_PHI1]] -; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP4]], <4 x i32> [[WIDE_LOAD]], <4 x i32> [[VEC_PHI]] +; CHECK-NEXT: [[TMP2]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[WIDE_LOAD]], <4 x i32> [[VEC_PHI1]]) +; CHECK-NEXT: [[TMP3]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[WIDE_LOAD]], <4 x i32> [[VEC_PHI]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP3]]) -; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP5]]) +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP2]]) +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP3]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[N]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP8]], [[MIDDLE_BLOCK]] ], [ 2147483647, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i32 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ -2147483648, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP6]], [[MIDDLE_BLOCK]] ], [ 2147483647, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i32 [ [[TMP5]], [[MIDDLE_BLOCK]] ], [ -2147483648, [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[MAX_0_LCSSA:%.*]] = phi i32 [ -2147483648, [[ENTRY:%.*]] ], [ [[COND:%.*]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: [[MIN_0_LCSSA:%.*]] = phi i32 [ 2147483647, [[ENTRY]] ], [ [[COND9:%.*]], [[FOR_BODY]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[MAX_0_LCSSA:%.*]] = phi i32 [ -2147483648, [[ENTRY:%.*]] ], [ [[TMP8:%.*]], [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[MIN_0_LCSSA:%.*]] = phi i32 [ 2147483647, [[ENTRY]] ], [ [[TMP9:%.*]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: store i32 [[MIN_0_LCSSA]], i32* [[MINP:%.*]], align 4 ; CHECK-NEXT: ret i32 [[MAX_0_LCSSA]] ; CHECK: for.body: ; CHECK-NEXT: [[I_029:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[MIN_028:%.*]] = phi i32 [ [[COND9]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[MAX_027:%.*]] = phi i32 [ [[COND]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX2]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[MIN_028:%.*]] = phi i32 [ [[TMP9]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[MAX_027:%.*]] = phi i32 [ [[TMP8]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX2]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[I_029]] -; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP9]], [[MAX_027]] -; CHECK-NEXT: [[COND]] = select i1 [[CMP1]], i32 [[TMP9]], i32 [[MAX_027]] -; CHECK-NEXT: [[CMP4:%.*]] = icmp slt i32 [[TMP9]], [[MIN_028]] -; CHECK-NEXT: [[COND9]] = select i1 [[CMP4]], i32 [[TMP9]], i32 [[MIN_028]] +; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TMP8]] = call i32 @llvm.smax.i32(i32 [[TMP7]], i32 [[MAX_027]]) +; CHECK-NEXT: [[TMP9]] = call i32 @llvm.smin.i32(i32 [[TMP7]], i32 [[MIN_028]]) ; CHECK-NEXT: [[INC]] = add nuw i32 [[I_029]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll --- a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll @@ -1027,8 +1027,8 @@ ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[INDEX]], i64 0 ; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer -; ENABLED_MASKED_STRIDED-NEXT: [[INDUCTION:%.*]] = or <8 x i32> [[BROADCAST_SPLAT2]], -; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ule <8 x i32> [[INDUCTION]], [[BROADCAST_SPLAT]] +; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IV:%.*]] = or <8 x i32> [[BROADCAST_SPLAT2]], +; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IV]], [[BROADCAST_SPLAT]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw i32 [[INDEX]], 1 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP1]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>* @@ -1268,157 +1268,156 @@ ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE30]] ; DISABLED_MASKED_STRIDED: pred.load.continue30: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP98:%.*]] = phi <8 x i8> [ [[TMP92]], [[PRED_LOAD_CONTINUE28]] ], [ [[TMP97]], [[PRED_LOAD_IF29]] ] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP99:%.*]] = icmp slt <8 x i8> [[TMP49]], [[TMP98]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP100:%.*]] = select <8 x i1> [[TMP99]], <8 x i8> [[TMP98]], <8 x i8> [[TMP49]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP101:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP101]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP99:%.*]] = call <8 x i8> @llvm.smax.v8i8(<8 x i8> [[TMP49]], <8 x i8> [[TMP98]]) +; DISABLED_MASKED_STRIDED-NEXT: [[TMP100:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP100]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP102:%.*]] = extractelement <8 x i32> [[TMP1]], i64 0 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP103:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[TMP102]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP104:%.*]] = extractelement <8 x i8> [[TMP100]], i64 0 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP104]], i8* [[TMP103]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP101:%.*]] = extractelement <8 x i32> [[TMP1]], i64 0 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP102:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[TMP101]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP103:%.*]] = extractelement <8 x i8> [[TMP99]], i64 0 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP103]], i8* [[TMP102]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE]] ; DISABLED_MASKED_STRIDED: pred.store.continue: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP105:%.*]] = extractelement <8 x i1> [[TMP0]], i64 1 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP105]], label [[PRED_STORE_IF31:%.*]], label [[PRED_STORE_CONTINUE32:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP104:%.*]] = extractelement <8 x i1> [[TMP0]], i64 1 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP104]], label [[PRED_STORE_IF31:%.*]], label [[PRED_STORE_CONTINUE32:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if31: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP106:%.*]] = extractelement <8 x i32> [[TMP1]], i64 1 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP107:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP106]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP108:%.*]] = extractelement <8 x i8> [[TMP100]], i64 1 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP108]], i8* [[TMP107]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP105:%.*]] = extractelement <8 x i32> [[TMP1]], i64 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP106:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP105]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP107:%.*]] = extractelement <8 x i8> [[TMP99]], i64 1 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP107]], i8* [[TMP106]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE32]] ; DISABLED_MASKED_STRIDED: pred.store.continue32: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP109:%.*]] = extractelement <8 x i1> [[TMP0]], i64 2 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP109]], label [[PRED_STORE_IF33:%.*]], label [[PRED_STORE_CONTINUE34:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP108:%.*]] = extractelement <8 x i1> [[TMP0]], i64 2 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP108]], label [[PRED_STORE_IF33:%.*]], label [[PRED_STORE_CONTINUE34:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if33: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP110:%.*]] = extractelement <8 x i32> [[TMP1]], i64 2 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP111:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP110]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP112:%.*]] = extractelement <8 x i8> [[TMP100]], i64 2 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP112]], i8* [[TMP111]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP109:%.*]] = extractelement <8 x i32> [[TMP1]], i64 2 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP110:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP109]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP111:%.*]] = extractelement <8 x i8> [[TMP99]], i64 2 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP111]], i8* [[TMP110]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE34]] ; DISABLED_MASKED_STRIDED: pred.store.continue34: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP113:%.*]] = extractelement <8 x i1> [[TMP0]], i64 3 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP113]], label [[PRED_STORE_IF35:%.*]], label [[PRED_STORE_CONTINUE36:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP112:%.*]] = extractelement <8 x i1> [[TMP0]], i64 3 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP112]], label [[PRED_STORE_IF35:%.*]], label [[PRED_STORE_CONTINUE36:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if35: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP114:%.*]] = extractelement <8 x i32> [[TMP1]], i64 3 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP115:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP114]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP116:%.*]] = extractelement <8 x i8> [[TMP100]], i64 3 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP116]], i8* [[TMP115]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP113:%.*]] = extractelement <8 x i32> [[TMP1]], i64 3 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP114:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP113]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP115:%.*]] = extractelement <8 x i8> [[TMP99]], i64 3 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP115]], i8* [[TMP114]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE36]] ; DISABLED_MASKED_STRIDED: pred.store.continue36: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP117:%.*]] = extractelement <8 x i1> [[TMP0]], i64 4 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP117]], label [[PRED_STORE_IF37:%.*]], label [[PRED_STORE_CONTINUE38:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP116:%.*]] = extractelement <8 x i1> [[TMP0]], i64 4 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP116]], label [[PRED_STORE_IF37:%.*]], label [[PRED_STORE_CONTINUE38:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if37: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP118:%.*]] = extractelement <8 x i32> [[TMP1]], i64 4 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP119:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP118]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP120:%.*]] = extractelement <8 x i8> [[TMP100]], i64 4 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP120]], i8* [[TMP119]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP117:%.*]] = extractelement <8 x i32> [[TMP1]], i64 4 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP118:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP117]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP119:%.*]] = extractelement <8 x i8> [[TMP99]], i64 4 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP119]], i8* [[TMP118]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE38]] ; DISABLED_MASKED_STRIDED: pred.store.continue38: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP121:%.*]] = extractelement <8 x i1> [[TMP0]], i64 5 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP121]], label [[PRED_STORE_IF39:%.*]], label [[PRED_STORE_CONTINUE40:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP120:%.*]] = extractelement <8 x i1> [[TMP0]], i64 5 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP120]], label [[PRED_STORE_IF39:%.*]], label [[PRED_STORE_CONTINUE40:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if39: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP122:%.*]] = extractelement <8 x i32> [[TMP1]], i64 5 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP123:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP122]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP124:%.*]] = extractelement <8 x i8> [[TMP100]], i64 5 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP124]], i8* [[TMP123]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP121:%.*]] = extractelement <8 x i32> [[TMP1]], i64 5 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP122:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP121]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP123:%.*]] = extractelement <8 x i8> [[TMP99]], i64 5 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP123]], i8* [[TMP122]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE40]] ; DISABLED_MASKED_STRIDED: pred.store.continue40: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP125:%.*]] = extractelement <8 x i1> [[TMP0]], i64 6 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP125]], label [[PRED_STORE_IF41:%.*]], label [[PRED_STORE_CONTINUE42:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP124:%.*]] = extractelement <8 x i1> [[TMP0]], i64 6 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP124]], label [[PRED_STORE_IF41:%.*]], label [[PRED_STORE_CONTINUE42:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if41: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP126:%.*]] = extractelement <8 x i32> [[TMP1]], i64 6 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP127:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP126]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP128:%.*]] = extractelement <8 x i8> [[TMP100]], i64 6 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP128]], i8* [[TMP127]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP125:%.*]] = extractelement <8 x i32> [[TMP1]], i64 6 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP126:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP125]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP127:%.*]] = extractelement <8 x i8> [[TMP99]], i64 6 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP127]], i8* [[TMP126]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE42]] ; DISABLED_MASKED_STRIDED: pred.store.continue42: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP129:%.*]] = extractelement <8 x i1> [[TMP0]], i64 7 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP129]], label [[PRED_STORE_IF43:%.*]], label [[PRED_STORE_CONTINUE44:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP128:%.*]] = extractelement <8 x i1> [[TMP0]], i64 7 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP128]], label [[PRED_STORE_IF43:%.*]], label [[PRED_STORE_CONTINUE44:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if43: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP130:%.*]] = extractelement <8 x i32> [[TMP1]], i64 7 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP131:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP130]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP132:%.*]] = extractelement <8 x i8> [[TMP100]], i64 7 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP132]], i8* [[TMP131]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP129:%.*]] = extractelement <8 x i32> [[TMP1]], i64 7 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP130:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP129]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP131:%.*]] = extractelement <8 x i8> [[TMP99]], i64 7 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP131]], i8* [[TMP130]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE44]] ; DISABLED_MASKED_STRIDED: pred.store.continue44: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP133:%.*]] = sub <8 x i8> zeroinitializer, [[TMP100]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP134:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP134]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP132:%.*]] = sub <8 x i8> zeroinitializer, [[TMP99]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP133:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP133]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if45: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP135:%.*]] = extractelement <8 x i32> [[TMP50]], i64 0 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP136:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP135]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP137:%.*]] = extractelement <8 x i8> [[TMP133]], i64 0 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP137]], i8* [[TMP136]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP134:%.*]] = extractelement <8 x i32> [[TMP50]], i64 0 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP135:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP134]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP136:%.*]] = extractelement <8 x i8> [[TMP132]], i64 0 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP136]], i8* [[TMP135]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE46]] ; DISABLED_MASKED_STRIDED: pred.store.continue46: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP138:%.*]] = extractelement <8 x i1> [[TMP0]], i64 1 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP138]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP137:%.*]] = extractelement <8 x i1> [[TMP0]], i64 1 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP137]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if47: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP139:%.*]] = extractelement <8 x i32> [[TMP50]], i64 1 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP140:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP139]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP141:%.*]] = extractelement <8 x i8> [[TMP133]], i64 1 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP141]], i8* [[TMP140]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP138:%.*]] = extractelement <8 x i32> [[TMP50]], i64 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP139:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP138]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP140:%.*]] = extractelement <8 x i8> [[TMP132]], i64 1 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP140]], i8* [[TMP139]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE48]] ; DISABLED_MASKED_STRIDED: pred.store.continue48: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP142:%.*]] = extractelement <8 x i1> [[TMP0]], i64 2 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP142]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP141:%.*]] = extractelement <8 x i1> [[TMP0]], i64 2 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP141]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if49: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP143:%.*]] = extractelement <8 x i32> [[TMP50]], i64 2 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP144:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP143]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP145:%.*]] = extractelement <8 x i8> [[TMP133]], i64 2 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP145]], i8* [[TMP144]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP142:%.*]] = extractelement <8 x i32> [[TMP50]], i64 2 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP143:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP142]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP144:%.*]] = extractelement <8 x i8> [[TMP132]], i64 2 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP144]], i8* [[TMP143]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE50]] ; DISABLED_MASKED_STRIDED: pred.store.continue50: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP146:%.*]] = extractelement <8 x i1> [[TMP0]], i64 3 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP146]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP145:%.*]] = extractelement <8 x i1> [[TMP0]], i64 3 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP145]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if51: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP147:%.*]] = extractelement <8 x i32> [[TMP50]], i64 3 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP148:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP147]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP149:%.*]] = extractelement <8 x i8> [[TMP133]], i64 3 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP149]], i8* [[TMP148]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP146:%.*]] = extractelement <8 x i32> [[TMP50]], i64 3 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP147:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP146]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP148:%.*]] = extractelement <8 x i8> [[TMP132]], i64 3 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP148]], i8* [[TMP147]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE52]] ; DISABLED_MASKED_STRIDED: pred.store.continue52: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP150:%.*]] = extractelement <8 x i1> [[TMP0]], i64 4 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP150]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP149:%.*]] = extractelement <8 x i1> [[TMP0]], i64 4 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP149]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if53: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP151:%.*]] = extractelement <8 x i32> [[TMP50]], i64 4 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP152:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP151]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP153:%.*]] = extractelement <8 x i8> [[TMP133]], i64 4 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP153]], i8* [[TMP152]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP150:%.*]] = extractelement <8 x i32> [[TMP50]], i64 4 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP151:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP150]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP152:%.*]] = extractelement <8 x i8> [[TMP132]], i64 4 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP152]], i8* [[TMP151]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE54]] ; DISABLED_MASKED_STRIDED: pred.store.continue54: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP154:%.*]] = extractelement <8 x i1> [[TMP0]], i64 5 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP154]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP153:%.*]] = extractelement <8 x i1> [[TMP0]], i64 5 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP153]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if55: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP155:%.*]] = extractelement <8 x i32> [[TMP50]], i64 5 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP156:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP155]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP157:%.*]] = extractelement <8 x i8> [[TMP133]], i64 5 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP157]], i8* [[TMP156]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP154:%.*]] = extractelement <8 x i32> [[TMP50]], i64 5 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP155:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP154]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP156:%.*]] = extractelement <8 x i8> [[TMP132]], i64 5 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP156]], i8* [[TMP155]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE56]] ; DISABLED_MASKED_STRIDED: pred.store.continue56: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP158:%.*]] = extractelement <8 x i1> [[TMP0]], i64 6 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP158]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP157:%.*]] = extractelement <8 x i1> [[TMP0]], i64 6 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP157]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if57: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP159:%.*]] = extractelement <8 x i32> [[TMP50]], i64 6 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP160:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP159]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP161:%.*]] = extractelement <8 x i8> [[TMP133]], i64 6 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP161]], i8* [[TMP160]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP158:%.*]] = extractelement <8 x i32> [[TMP50]], i64 6 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP159:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP158]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP160:%.*]] = extractelement <8 x i8> [[TMP132]], i64 6 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP160]], i8* [[TMP159]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE58]] ; DISABLED_MASKED_STRIDED: pred.store.continue58: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP162:%.*]] = extractelement <8 x i1> [[TMP0]], i64 7 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP162]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP161:%.*]] = extractelement <8 x i1> [[TMP0]], i64 7 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP161]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60]] ; DISABLED_MASKED_STRIDED: pred.store.if59: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP163:%.*]] = extractelement <8 x i32> [[TMP50]], i64 7 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP164:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP163]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP165:%.*]] = extractelement <8 x i8> [[TMP133]], i64 7 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP165]], i8* [[TMP164]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP162:%.*]] = extractelement <8 x i32> [[TMP50]], i64 7 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP163:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP162]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP164:%.*]] = extractelement <8 x i8> [[TMP132]], i64 7 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP164]], i8* [[TMP163]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE60]] ; DISABLED_MASKED_STRIDED: pred.store.continue60: ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], -; DISABLED_MASKED_STRIDED-NEXT: [[TMP166:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP166]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP165:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP165]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; DISABLED_MASKED_STRIDED: for.end: ; DISABLED_MASKED_STRIDED-NEXT: ret void ; @@ -1440,18 +1439,17 @@ ; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> ; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> ; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = or i32 [[TMP1]], 1 -; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = icmp slt <8 x i8> [[STRIDED_VEC]], [[STRIDED_VEC1]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = select <8 x i1> [[TMP5]], <8 x i8> [[STRIDED_VEC1]], <8 x i8> [[STRIDED_VEC]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = sub <8 x i8> zeroinitializer, [[TMP6]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Q:%.*]], i32 -1 -; ENABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = getelementptr i8, i8* [[TMP8]], i32 [[TMP4]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to <16 x i8>* -; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <16 x i32> -; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> [[INTERLEAVED_VEC]], <16 x i8>* [[TMP10]], i32 1, <16 x i1> [[INTERLEAVED_MASK]]) +; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = call <8 x i8> @llvm.smax.v8i8(<8 x i8> [[STRIDED_VEC]], <8 x i8> [[STRIDED_VEC1]]) +; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = sub <8 x i8> zeroinitializer, [[TMP5]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Q:%.*]], i32 -1 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[TMP7]], i32 [[TMP4]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to <16 x i8>* +; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <16 x i32> +; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> [[INTERLEAVED_VEC]], <16 x i8>* [[TMP9]], i32 1, <16 x i1> [[INTERLEAVED_MASK]]) ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], -; ENABLED_MASKED_STRIDED-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP11]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP10]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; ENABLED_MASKED_STRIDED: for.end: ; ENABLED_MASKED_STRIDED-NEXT: ret void ; @@ -1688,157 +1686,156 @@ ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE35]] ; DISABLED_MASKED_STRIDED: pred.load.continue35: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP103:%.*]] = phi <8 x i8> [ [[TMP97]], [[PRED_LOAD_CONTINUE33]] ], [ [[TMP102]], [[PRED_LOAD_IF34]] ] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP104:%.*]] = icmp slt <8 x i8> [[TMP54]], [[TMP103]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP105:%.*]] = select <8 x i1> [[TMP104]], <8 x i8> [[TMP103]], <8 x i8> [[TMP54]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP106:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP106]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP104:%.*]] = call <8 x i8> @llvm.smax.v8i8(<8 x i8> [[TMP54]], <8 x i8> [[TMP103]]) +; DISABLED_MASKED_STRIDED-NEXT: [[TMP105:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP105]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP107:%.*]] = extractelement <8 x i32> [[TMP6]], i64 0 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP108:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP107]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP109:%.*]] = extractelement <8 x i8> [[TMP105]], i64 0 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP109]], i8* [[TMP108]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP106:%.*]] = extractelement <8 x i32> [[TMP6]], i64 0 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP107:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP106]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP108:%.*]] = extractelement <8 x i8> [[TMP104]], i64 0 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP108]], i8* [[TMP107]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE]] ; DISABLED_MASKED_STRIDED: pred.store.continue: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP110:%.*]] = extractelement <8 x i1> [[TMP5]], i64 1 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP110]], label [[PRED_STORE_IF36:%.*]], label [[PRED_STORE_CONTINUE37:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP109:%.*]] = extractelement <8 x i1> [[TMP5]], i64 1 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP109]], label [[PRED_STORE_IF36:%.*]], label [[PRED_STORE_CONTINUE37:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if36: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP111:%.*]] = extractelement <8 x i32> [[TMP6]], i64 1 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP112:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP111]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP113:%.*]] = extractelement <8 x i8> [[TMP105]], i64 1 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP113]], i8* [[TMP112]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP110:%.*]] = extractelement <8 x i32> [[TMP6]], i64 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP111:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP110]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP112:%.*]] = extractelement <8 x i8> [[TMP104]], i64 1 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP112]], i8* [[TMP111]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE37]] ; DISABLED_MASKED_STRIDED: pred.store.continue37: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP114:%.*]] = extractelement <8 x i1> [[TMP5]], i64 2 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP114]], label [[PRED_STORE_IF38:%.*]], label [[PRED_STORE_CONTINUE39:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP113:%.*]] = extractelement <8 x i1> [[TMP5]], i64 2 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP113]], label [[PRED_STORE_IF38:%.*]], label [[PRED_STORE_CONTINUE39:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if38: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP115:%.*]] = extractelement <8 x i32> [[TMP6]], i64 2 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP116:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP115]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP117:%.*]] = extractelement <8 x i8> [[TMP105]], i64 2 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP117]], i8* [[TMP116]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP114:%.*]] = extractelement <8 x i32> [[TMP6]], i64 2 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP115:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP114]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP116:%.*]] = extractelement <8 x i8> [[TMP104]], i64 2 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP116]], i8* [[TMP115]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE39]] ; DISABLED_MASKED_STRIDED: pred.store.continue39: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP118:%.*]] = extractelement <8 x i1> [[TMP5]], i64 3 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP118]], label [[PRED_STORE_IF40:%.*]], label [[PRED_STORE_CONTINUE41:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP117:%.*]] = extractelement <8 x i1> [[TMP5]], i64 3 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP117]], label [[PRED_STORE_IF40:%.*]], label [[PRED_STORE_CONTINUE41:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if40: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP119:%.*]] = extractelement <8 x i32> [[TMP6]], i64 3 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP120:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP119]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP121:%.*]] = extractelement <8 x i8> [[TMP105]], i64 3 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP121]], i8* [[TMP120]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP118:%.*]] = extractelement <8 x i32> [[TMP6]], i64 3 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP119:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP118]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP120:%.*]] = extractelement <8 x i8> [[TMP104]], i64 3 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP120]], i8* [[TMP119]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE41]] ; DISABLED_MASKED_STRIDED: pred.store.continue41: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP122:%.*]] = extractelement <8 x i1> [[TMP5]], i64 4 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP122]], label [[PRED_STORE_IF42:%.*]], label [[PRED_STORE_CONTINUE43:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP121:%.*]] = extractelement <8 x i1> [[TMP5]], i64 4 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP121]], label [[PRED_STORE_IF42:%.*]], label [[PRED_STORE_CONTINUE43:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if42: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP123:%.*]] = extractelement <8 x i32> [[TMP6]], i64 4 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP124:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP123]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP125:%.*]] = extractelement <8 x i8> [[TMP105]], i64 4 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP125]], i8* [[TMP124]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP122:%.*]] = extractelement <8 x i32> [[TMP6]], i64 4 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP123:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP122]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP124:%.*]] = extractelement <8 x i8> [[TMP104]], i64 4 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP124]], i8* [[TMP123]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE43]] ; DISABLED_MASKED_STRIDED: pred.store.continue43: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP126:%.*]] = extractelement <8 x i1> [[TMP5]], i64 5 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP126]], label [[PRED_STORE_IF44:%.*]], label [[PRED_STORE_CONTINUE45:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP125:%.*]] = extractelement <8 x i1> [[TMP5]], i64 5 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP125]], label [[PRED_STORE_IF44:%.*]], label [[PRED_STORE_CONTINUE45:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if44: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP127:%.*]] = extractelement <8 x i32> [[TMP6]], i64 5 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP128:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP127]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP129:%.*]] = extractelement <8 x i8> [[TMP105]], i64 5 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP129]], i8* [[TMP128]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP126:%.*]] = extractelement <8 x i32> [[TMP6]], i64 5 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP127:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP126]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP128:%.*]] = extractelement <8 x i8> [[TMP104]], i64 5 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP128]], i8* [[TMP127]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE45]] ; DISABLED_MASKED_STRIDED: pred.store.continue45: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP130:%.*]] = extractelement <8 x i1> [[TMP5]], i64 6 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP130]], label [[PRED_STORE_IF46:%.*]], label [[PRED_STORE_CONTINUE47:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP129:%.*]] = extractelement <8 x i1> [[TMP5]], i64 6 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP129]], label [[PRED_STORE_IF46:%.*]], label [[PRED_STORE_CONTINUE47:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if46: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP131:%.*]] = extractelement <8 x i32> [[TMP6]], i64 6 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP132:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP131]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP133:%.*]] = extractelement <8 x i8> [[TMP105]], i64 6 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP133]], i8* [[TMP132]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP130:%.*]] = extractelement <8 x i32> [[TMP6]], i64 6 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP131:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP130]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP132:%.*]] = extractelement <8 x i8> [[TMP104]], i64 6 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP132]], i8* [[TMP131]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE47]] ; DISABLED_MASKED_STRIDED: pred.store.continue47: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP134:%.*]] = extractelement <8 x i1> [[TMP5]], i64 7 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP134]], label [[PRED_STORE_IF48:%.*]], label [[PRED_STORE_CONTINUE49:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP133:%.*]] = extractelement <8 x i1> [[TMP5]], i64 7 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP133]], label [[PRED_STORE_IF48:%.*]], label [[PRED_STORE_CONTINUE49:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if48: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP135:%.*]] = extractelement <8 x i32> [[TMP6]], i64 7 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP136:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP135]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP137:%.*]] = extractelement <8 x i8> [[TMP105]], i64 7 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP137]], i8* [[TMP136]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP134:%.*]] = extractelement <8 x i32> [[TMP6]], i64 7 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP135:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP134]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP136:%.*]] = extractelement <8 x i8> [[TMP104]], i64 7 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP136]], i8* [[TMP135]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE49]] ; DISABLED_MASKED_STRIDED: pred.store.continue49: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP138:%.*]] = sub <8 x i8> zeroinitializer, [[TMP105]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP139:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP139]], label [[PRED_STORE_IF50:%.*]], label [[PRED_STORE_CONTINUE51:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP137:%.*]] = sub <8 x i8> zeroinitializer, [[TMP104]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP138:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP138]], label [[PRED_STORE_IF50:%.*]], label [[PRED_STORE_CONTINUE51:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if50: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP140:%.*]] = extractelement <8 x i32> [[TMP55]], i64 0 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP141:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP140]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP142:%.*]] = extractelement <8 x i8> [[TMP138]], i64 0 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP142]], i8* [[TMP141]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP139:%.*]] = extractelement <8 x i32> [[TMP55]], i64 0 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP140:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP139]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP141:%.*]] = extractelement <8 x i8> [[TMP137]], i64 0 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP141]], i8* [[TMP140]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE51]] ; DISABLED_MASKED_STRIDED: pred.store.continue51: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP143:%.*]] = extractelement <8 x i1> [[TMP5]], i64 1 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP143]], label [[PRED_STORE_IF52:%.*]], label [[PRED_STORE_CONTINUE53:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP142:%.*]] = extractelement <8 x i1> [[TMP5]], i64 1 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP142]], label [[PRED_STORE_IF52:%.*]], label [[PRED_STORE_CONTINUE53:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if52: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP144:%.*]] = extractelement <8 x i32> [[TMP55]], i64 1 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP145:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP144]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP146:%.*]] = extractelement <8 x i8> [[TMP138]], i64 1 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP146]], i8* [[TMP145]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP143:%.*]] = extractelement <8 x i32> [[TMP55]], i64 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP144:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP143]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP145:%.*]] = extractelement <8 x i8> [[TMP137]], i64 1 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP145]], i8* [[TMP144]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE53]] ; DISABLED_MASKED_STRIDED: pred.store.continue53: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP147:%.*]] = extractelement <8 x i1> [[TMP5]], i64 2 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP147]], label [[PRED_STORE_IF54:%.*]], label [[PRED_STORE_CONTINUE55:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP146:%.*]] = extractelement <8 x i1> [[TMP5]], i64 2 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP146]], label [[PRED_STORE_IF54:%.*]], label [[PRED_STORE_CONTINUE55:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if54: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP148:%.*]] = extractelement <8 x i32> [[TMP55]], i64 2 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP149:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP148]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP150:%.*]] = extractelement <8 x i8> [[TMP138]], i64 2 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP150]], i8* [[TMP149]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP147:%.*]] = extractelement <8 x i32> [[TMP55]], i64 2 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP148:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP147]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP149:%.*]] = extractelement <8 x i8> [[TMP137]], i64 2 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP149]], i8* [[TMP148]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE55]] ; DISABLED_MASKED_STRIDED: pred.store.continue55: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP151:%.*]] = extractelement <8 x i1> [[TMP5]], i64 3 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP151]], label [[PRED_STORE_IF56:%.*]], label [[PRED_STORE_CONTINUE57:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP150:%.*]] = extractelement <8 x i1> [[TMP5]], i64 3 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP150]], label [[PRED_STORE_IF56:%.*]], label [[PRED_STORE_CONTINUE57:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if56: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP152:%.*]] = extractelement <8 x i32> [[TMP55]], i64 3 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP153:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP152]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP154:%.*]] = extractelement <8 x i8> [[TMP138]], i64 3 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP154]], i8* [[TMP153]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP151:%.*]] = extractelement <8 x i32> [[TMP55]], i64 3 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP152:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP151]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP153:%.*]] = extractelement <8 x i8> [[TMP137]], i64 3 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP153]], i8* [[TMP152]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE57]] ; DISABLED_MASKED_STRIDED: pred.store.continue57: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP155:%.*]] = extractelement <8 x i1> [[TMP5]], i64 4 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP155]], label [[PRED_STORE_IF58:%.*]], label [[PRED_STORE_CONTINUE59:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP154:%.*]] = extractelement <8 x i1> [[TMP5]], i64 4 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP154]], label [[PRED_STORE_IF58:%.*]], label [[PRED_STORE_CONTINUE59:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if58: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP156:%.*]] = extractelement <8 x i32> [[TMP55]], i64 4 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP157:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP156]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP158:%.*]] = extractelement <8 x i8> [[TMP138]], i64 4 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP158]], i8* [[TMP157]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP155:%.*]] = extractelement <8 x i32> [[TMP55]], i64 4 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP156:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP155]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP157:%.*]] = extractelement <8 x i8> [[TMP137]], i64 4 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP157]], i8* [[TMP156]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE59]] ; DISABLED_MASKED_STRIDED: pred.store.continue59: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP159:%.*]] = extractelement <8 x i1> [[TMP5]], i64 5 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP159]], label [[PRED_STORE_IF60:%.*]], label [[PRED_STORE_CONTINUE61:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP158:%.*]] = extractelement <8 x i1> [[TMP5]], i64 5 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP158]], label [[PRED_STORE_IF60:%.*]], label [[PRED_STORE_CONTINUE61:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if60: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP160:%.*]] = extractelement <8 x i32> [[TMP55]], i64 5 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP161:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP160]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP162:%.*]] = extractelement <8 x i8> [[TMP138]], i64 5 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP162]], i8* [[TMP161]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP159:%.*]] = extractelement <8 x i32> [[TMP55]], i64 5 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP160:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP159]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP161:%.*]] = extractelement <8 x i8> [[TMP137]], i64 5 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP161]], i8* [[TMP160]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE61]] ; DISABLED_MASKED_STRIDED: pred.store.continue61: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP163:%.*]] = extractelement <8 x i1> [[TMP5]], i64 6 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP163]], label [[PRED_STORE_IF62:%.*]], label [[PRED_STORE_CONTINUE63:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP162:%.*]] = extractelement <8 x i1> [[TMP5]], i64 6 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP162]], label [[PRED_STORE_IF62:%.*]], label [[PRED_STORE_CONTINUE63:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if62: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP164:%.*]] = extractelement <8 x i32> [[TMP55]], i64 6 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP165:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP164]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP166:%.*]] = extractelement <8 x i8> [[TMP138]], i64 6 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP166]], i8* [[TMP165]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP163:%.*]] = extractelement <8 x i32> [[TMP55]], i64 6 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP164:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP163]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP165:%.*]] = extractelement <8 x i8> [[TMP137]], i64 6 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP165]], i8* [[TMP164]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE63]] ; DISABLED_MASKED_STRIDED: pred.store.continue63: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP167:%.*]] = extractelement <8 x i1> [[TMP5]], i64 7 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP167]], label [[PRED_STORE_IF64:%.*]], label [[PRED_STORE_CONTINUE65]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP166:%.*]] = extractelement <8 x i1> [[TMP5]], i64 7 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP166]], label [[PRED_STORE_IF64:%.*]], label [[PRED_STORE_CONTINUE65]] ; DISABLED_MASKED_STRIDED: pred.store.if64: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP168:%.*]] = extractelement <8 x i32> [[TMP55]], i64 7 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP169:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP168]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP170:%.*]] = extractelement <8 x i8> [[TMP138]], i64 7 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP170]], i8* [[TMP169]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP167:%.*]] = extractelement <8 x i32> [[TMP55]], i64 7 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP168:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP167]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP169:%.*]] = extractelement <8 x i8> [[TMP137]], i64 7 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP169]], i8* [[TMP168]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE65]] ; DISABLED_MASKED_STRIDED: pred.store.continue65: ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], -; DISABLED_MASKED_STRIDED-NEXT: [[TMP171:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP171]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP170:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP170]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; DISABLED_MASKED_STRIDED: for.body: ; DISABLED_MASKED_STRIDED-NEXT: [[IX_024:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ 1024, [[ENTRY:%.*]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[CMP1:%.*]] = icmp ugt i32 [[IX_024]], [[CONV]] @@ -1846,15 +1843,14 @@ ; DISABLED_MASKED_STRIDED: if.then: ; DISABLED_MASKED_STRIDED-NEXT: [[MUL:%.*]] = shl nuw nsw i32 [[IX_024]], 1 ; DISABLED_MASKED_STRIDED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[MUL]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP172:%.*]] = load i8, i8* [[ARRAYIDX]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP171:%.*]] = load i8, i8* [[ARRAYIDX]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: [[ADD:%.*]] = or i32 [[MUL]], 1 ; DISABLED_MASKED_STRIDED-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[ADD]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP173:%.*]] = load i8, i8* [[ARRAYIDX4]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: [[CMP_I:%.*]] = icmp slt i8 [[TMP172]], [[TMP173]] -; DISABLED_MASKED_STRIDED-NEXT: [[SPEC_SELECT_I:%.*]] = select i1 [[CMP_I]], i8 [[TMP173]], i8 [[TMP172]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP172:%.*]] = load i8, i8* [[ARRAYIDX4]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP173:%.*]] = call i8 @llvm.smax.i8(i8 [[TMP171]], i8 [[TMP172]]) ; DISABLED_MASKED_STRIDED-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[MUL]] -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[SPEC_SELECT_I]], i8* [[ARRAYIDX6]], align 1 -; DISABLED_MASKED_STRIDED-NEXT: [[SUB:%.*]] = sub i8 0, [[SPEC_SELECT_I]] +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP173]], i8* [[ARRAYIDX6]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[SUB:%.*]] = sub i8 0, [[TMP173]] ; DISABLED_MASKED_STRIDED-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[ADD]] ; DISABLED_MASKED_STRIDED-NEXT: store i8 [[SUB]], i8* [[ARRAYIDX11]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[FOR_INC]] @@ -2046,157 +2042,156 @@ ; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE35]] ; ENABLED_MASKED_STRIDED: pred.load.continue35: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP103:%.*]] = phi <8 x i8> [ [[TMP97]], [[PRED_LOAD_CONTINUE33]] ], [ [[TMP102]], [[PRED_LOAD_IF34]] ] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP104:%.*]] = icmp slt <8 x i8> [[TMP54]], [[TMP103]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP105:%.*]] = select <8 x i1> [[TMP104]], <8 x i8> [[TMP103]], <8 x i8> [[TMP54]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP106:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP106]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP104:%.*]] = call <8 x i8> @llvm.smax.v8i8(<8 x i8> [[TMP54]], <8 x i8> [[TMP103]]) +; ENABLED_MASKED_STRIDED-NEXT: [[TMP105:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP105]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; ENABLED_MASKED_STRIDED: pred.store.if: -; ENABLED_MASKED_STRIDED-NEXT: [[TMP107:%.*]] = extractelement <8 x i32> [[TMP6]], i64 0 -; ENABLED_MASKED_STRIDED-NEXT: [[TMP108:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP107]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP109:%.*]] = extractelement <8 x i8> [[TMP105]], i64 0 -; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP109]], i8* [[TMP108]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP106:%.*]] = extractelement <8 x i32> [[TMP6]], i64 0 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP107:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP106]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP108:%.*]] = extractelement <8 x i8> [[TMP104]], i64 0 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP108]], i8* [[TMP107]], align 1 ; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE]] ; ENABLED_MASKED_STRIDED: pred.store.continue: -; ENABLED_MASKED_STRIDED-NEXT: [[TMP110:%.*]] = extractelement <8 x i1> [[TMP5]], i64 1 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP110]], label [[PRED_STORE_IF36:%.*]], label [[PRED_STORE_CONTINUE37:%.*]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP109:%.*]] = extractelement <8 x i1> [[TMP5]], i64 1 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP109]], label [[PRED_STORE_IF36:%.*]], label [[PRED_STORE_CONTINUE37:%.*]] ; ENABLED_MASKED_STRIDED: pred.store.if36: -; ENABLED_MASKED_STRIDED-NEXT: [[TMP111:%.*]] = extractelement <8 x i32> [[TMP6]], i64 1 -; ENABLED_MASKED_STRIDED-NEXT: [[TMP112:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP111]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP113:%.*]] = extractelement <8 x i8> [[TMP105]], i64 1 -; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP113]], i8* [[TMP112]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP110:%.*]] = extractelement <8 x i32> [[TMP6]], i64 1 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP111:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP110]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP112:%.*]] = extractelement <8 x i8> [[TMP104]], i64 1 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP112]], i8* [[TMP111]], align 1 ; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE37]] ; ENABLED_MASKED_STRIDED: pred.store.continue37: -; ENABLED_MASKED_STRIDED-NEXT: [[TMP114:%.*]] = extractelement <8 x i1> [[TMP5]], i64 2 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP114]], label [[PRED_STORE_IF38:%.*]], label [[PRED_STORE_CONTINUE39:%.*]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP113:%.*]] = extractelement <8 x i1> [[TMP5]], i64 2 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP113]], label [[PRED_STORE_IF38:%.*]], label [[PRED_STORE_CONTINUE39:%.*]] ; ENABLED_MASKED_STRIDED: pred.store.if38: -; ENABLED_MASKED_STRIDED-NEXT: [[TMP115:%.*]] = extractelement <8 x i32> [[TMP6]], i64 2 -; ENABLED_MASKED_STRIDED-NEXT: [[TMP116:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP115]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP117:%.*]] = extractelement <8 x i8> [[TMP105]], i64 2 -; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP117]], i8* [[TMP116]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP114:%.*]] = extractelement <8 x i32> [[TMP6]], i64 2 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP115:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP114]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP116:%.*]] = extractelement <8 x i8> [[TMP104]], i64 2 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP116]], i8* [[TMP115]], align 1 ; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE39]] ; ENABLED_MASKED_STRIDED: pred.store.continue39: -; ENABLED_MASKED_STRIDED-NEXT: [[TMP118:%.*]] = extractelement <8 x i1> [[TMP5]], i64 3 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP118]], label [[PRED_STORE_IF40:%.*]], label [[PRED_STORE_CONTINUE41:%.*]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP117:%.*]] = extractelement <8 x i1> [[TMP5]], i64 3 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP117]], label [[PRED_STORE_IF40:%.*]], label [[PRED_STORE_CONTINUE41:%.*]] ; ENABLED_MASKED_STRIDED: pred.store.if40: -; ENABLED_MASKED_STRIDED-NEXT: [[TMP119:%.*]] = extractelement <8 x i32> [[TMP6]], i64 3 -; ENABLED_MASKED_STRIDED-NEXT: [[TMP120:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP119]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP121:%.*]] = extractelement <8 x i8> [[TMP105]], i64 3 -; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP121]], i8* [[TMP120]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP118:%.*]] = extractelement <8 x i32> [[TMP6]], i64 3 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP119:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP118]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP120:%.*]] = extractelement <8 x i8> [[TMP104]], i64 3 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP120]], i8* [[TMP119]], align 1 ; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE41]] ; ENABLED_MASKED_STRIDED: pred.store.continue41: -; ENABLED_MASKED_STRIDED-NEXT: [[TMP122:%.*]] = extractelement <8 x i1> [[TMP5]], i64 4 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP122]], label [[PRED_STORE_IF42:%.*]], label [[PRED_STORE_CONTINUE43:%.*]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP121:%.*]] = extractelement <8 x i1> [[TMP5]], i64 4 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP121]], label [[PRED_STORE_IF42:%.*]], label [[PRED_STORE_CONTINUE43:%.*]] ; ENABLED_MASKED_STRIDED: pred.store.if42: -; ENABLED_MASKED_STRIDED-NEXT: [[TMP123:%.*]] = extractelement <8 x i32> [[TMP6]], i64 4 -; ENABLED_MASKED_STRIDED-NEXT: [[TMP124:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP123]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP125:%.*]] = extractelement <8 x i8> [[TMP105]], i64 4 -; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP125]], i8* [[TMP124]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP122:%.*]] = extractelement <8 x i32> [[TMP6]], i64 4 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP123:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP122]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP124:%.*]] = extractelement <8 x i8> [[TMP104]], i64 4 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP124]], i8* [[TMP123]], align 1 ; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE43]] ; ENABLED_MASKED_STRIDED: pred.store.continue43: -; ENABLED_MASKED_STRIDED-NEXT: [[TMP126:%.*]] = extractelement <8 x i1> [[TMP5]], i64 5 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP126]], label [[PRED_STORE_IF44:%.*]], label [[PRED_STORE_CONTINUE45:%.*]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP125:%.*]] = extractelement <8 x i1> [[TMP5]], i64 5 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP125]], label [[PRED_STORE_IF44:%.*]], label [[PRED_STORE_CONTINUE45:%.*]] ; ENABLED_MASKED_STRIDED: pred.store.if44: -; ENABLED_MASKED_STRIDED-NEXT: [[TMP127:%.*]] = extractelement <8 x i32> [[TMP6]], i64 5 -; ENABLED_MASKED_STRIDED-NEXT: [[TMP128:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP127]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP129:%.*]] = extractelement <8 x i8> [[TMP105]], i64 5 -; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP129]], i8* [[TMP128]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP126:%.*]] = extractelement <8 x i32> [[TMP6]], i64 5 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP127:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP126]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP128:%.*]] = extractelement <8 x i8> [[TMP104]], i64 5 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP128]], i8* [[TMP127]], align 1 ; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE45]] ; ENABLED_MASKED_STRIDED: pred.store.continue45: -; ENABLED_MASKED_STRIDED-NEXT: [[TMP130:%.*]] = extractelement <8 x i1> [[TMP5]], i64 6 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP130]], label [[PRED_STORE_IF46:%.*]], label [[PRED_STORE_CONTINUE47:%.*]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP129:%.*]] = extractelement <8 x i1> [[TMP5]], i64 6 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP129]], label [[PRED_STORE_IF46:%.*]], label [[PRED_STORE_CONTINUE47:%.*]] ; ENABLED_MASKED_STRIDED: pred.store.if46: -; ENABLED_MASKED_STRIDED-NEXT: [[TMP131:%.*]] = extractelement <8 x i32> [[TMP6]], i64 6 -; ENABLED_MASKED_STRIDED-NEXT: [[TMP132:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP131]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP133:%.*]] = extractelement <8 x i8> [[TMP105]], i64 6 -; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP133]], i8* [[TMP132]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP130:%.*]] = extractelement <8 x i32> [[TMP6]], i64 6 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP131:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP130]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP132:%.*]] = extractelement <8 x i8> [[TMP104]], i64 6 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP132]], i8* [[TMP131]], align 1 ; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE47]] ; ENABLED_MASKED_STRIDED: pred.store.continue47: -; ENABLED_MASKED_STRIDED-NEXT: [[TMP134:%.*]] = extractelement <8 x i1> [[TMP5]], i64 7 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP134]], label [[PRED_STORE_IF48:%.*]], label [[PRED_STORE_CONTINUE49:%.*]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP133:%.*]] = extractelement <8 x i1> [[TMP5]], i64 7 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP133]], label [[PRED_STORE_IF48:%.*]], label [[PRED_STORE_CONTINUE49:%.*]] ; ENABLED_MASKED_STRIDED: pred.store.if48: -; ENABLED_MASKED_STRIDED-NEXT: [[TMP135:%.*]] = extractelement <8 x i32> [[TMP6]], i64 7 -; ENABLED_MASKED_STRIDED-NEXT: [[TMP136:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP135]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP137:%.*]] = extractelement <8 x i8> [[TMP105]], i64 7 -; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP137]], i8* [[TMP136]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP134:%.*]] = extractelement <8 x i32> [[TMP6]], i64 7 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP135:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP134]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP136:%.*]] = extractelement <8 x i8> [[TMP104]], i64 7 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP136]], i8* [[TMP135]], align 1 ; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE49]] ; ENABLED_MASKED_STRIDED: pred.store.continue49: -; ENABLED_MASKED_STRIDED-NEXT: [[TMP138:%.*]] = sub <8 x i8> zeroinitializer, [[TMP105]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP139:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP139]], label [[PRED_STORE_IF50:%.*]], label [[PRED_STORE_CONTINUE51:%.*]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP137:%.*]] = sub <8 x i8> zeroinitializer, [[TMP104]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP138:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP138]], label [[PRED_STORE_IF50:%.*]], label [[PRED_STORE_CONTINUE51:%.*]] ; ENABLED_MASKED_STRIDED: pred.store.if50: -; ENABLED_MASKED_STRIDED-NEXT: [[TMP140:%.*]] = extractelement <8 x i32> [[TMP55]], i64 0 -; ENABLED_MASKED_STRIDED-NEXT: [[TMP141:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP140]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP142:%.*]] = extractelement <8 x i8> [[TMP138]], i64 0 -; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP142]], i8* [[TMP141]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP139:%.*]] = extractelement <8 x i32> [[TMP55]], i64 0 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP140:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP139]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP141:%.*]] = extractelement <8 x i8> [[TMP137]], i64 0 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP141]], i8* [[TMP140]], align 1 ; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE51]] ; ENABLED_MASKED_STRIDED: pred.store.continue51: -; ENABLED_MASKED_STRIDED-NEXT: [[TMP143:%.*]] = extractelement <8 x i1> [[TMP5]], i64 1 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP143]], label [[PRED_STORE_IF52:%.*]], label [[PRED_STORE_CONTINUE53:%.*]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP142:%.*]] = extractelement <8 x i1> [[TMP5]], i64 1 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP142]], label [[PRED_STORE_IF52:%.*]], label [[PRED_STORE_CONTINUE53:%.*]] ; ENABLED_MASKED_STRIDED: pred.store.if52: -; ENABLED_MASKED_STRIDED-NEXT: [[TMP144:%.*]] = extractelement <8 x i32> [[TMP55]], i64 1 -; ENABLED_MASKED_STRIDED-NEXT: [[TMP145:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP144]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP146:%.*]] = extractelement <8 x i8> [[TMP138]], i64 1 -; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP146]], i8* [[TMP145]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP143:%.*]] = extractelement <8 x i32> [[TMP55]], i64 1 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP144:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP143]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP145:%.*]] = extractelement <8 x i8> [[TMP137]], i64 1 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP145]], i8* [[TMP144]], align 1 ; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE53]] ; ENABLED_MASKED_STRIDED: pred.store.continue53: -; ENABLED_MASKED_STRIDED-NEXT: [[TMP147:%.*]] = extractelement <8 x i1> [[TMP5]], i64 2 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP147]], label [[PRED_STORE_IF54:%.*]], label [[PRED_STORE_CONTINUE55:%.*]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP146:%.*]] = extractelement <8 x i1> [[TMP5]], i64 2 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP146]], label [[PRED_STORE_IF54:%.*]], label [[PRED_STORE_CONTINUE55:%.*]] ; ENABLED_MASKED_STRIDED: pred.store.if54: -; ENABLED_MASKED_STRIDED-NEXT: [[TMP148:%.*]] = extractelement <8 x i32> [[TMP55]], i64 2 -; ENABLED_MASKED_STRIDED-NEXT: [[TMP149:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP148]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP150:%.*]] = extractelement <8 x i8> [[TMP138]], i64 2 -; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP150]], i8* [[TMP149]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP147:%.*]] = extractelement <8 x i32> [[TMP55]], i64 2 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP148:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP147]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP149:%.*]] = extractelement <8 x i8> [[TMP137]], i64 2 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP149]], i8* [[TMP148]], align 1 ; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE55]] ; ENABLED_MASKED_STRIDED: pred.store.continue55: -; ENABLED_MASKED_STRIDED-NEXT: [[TMP151:%.*]] = extractelement <8 x i1> [[TMP5]], i64 3 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP151]], label [[PRED_STORE_IF56:%.*]], label [[PRED_STORE_CONTINUE57:%.*]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP150:%.*]] = extractelement <8 x i1> [[TMP5]], i64 3 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP150]], label [[PRED_STORE_IF56:%.*]], label [[PRED_STORE_CONTINUE57:%.*]] ; ENABLED_MASKED_STRIDED: pred.store.if56: -; ENABLED_MASKED_STRIDED-NEXT: [[TMP152:%.*]] = extractelement <8 x i32> [[TMP55]], i64 3 -; ENABLED_MASKED_STRIDED-NEXT: [[TMP153:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP152]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP154:%.*]] = extractelement <8 x i8> [[TMP138]], i64 3 -; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP154]], i8* [[TMP153]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP151:%.*]] = extractelement <8 x i32> [[TMP55]], i64 3 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP152:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP151]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP153:%.*]] = extractelement <8 x i8> [[TMP137]], i64 3 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP153]], i8* [[TMP152]], align 1 ; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE57]] ; ENABLED_MASKED_STRIDED: pred.store.continue57: -; ENABLED_MASKED_STRIDED-NEXT: [[TMP155:%.*]] = extractelement <8 x i1> [[TMP5]], i64 4 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP155]], label [[PRED_STORE_IF58:%.*]], label [[PRED_STORE_CONTINUE59:%.*]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP154:%.*]] = extractelement <8 x i1> [[TMP5]], i64 4 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP154]], label [[PRED_STORE_IF58:%.*]], label [[PRED_STORE_CONTINUE59:%.*]] ; ENABLED_MASKED_STRIDED: pred.store.if58: -; ENABLED_MASKED_STRIDED-NEXT: [[TMP156:%.*]] = extractelement <8 x i32> [[TMP55]], i64 4 -; ENABLED_MASKED_STRIDED-NEXT: [[TMP157:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP156]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP158:%.*]] = extractelement <8 x i8> [[TMP138]], i64 4 -; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP158]], i8* [[TMP157]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP155:%.*]] = extractelement <8 x i32> [[TMP55]], i64 4 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP156:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP155]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP157:%.*]] = extractelement <8 x i8> [[TMP137]], i64 4 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP157]], i8* [[TMP156]], align 1 ; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE59]] ; ENABLED_MASKED_STRIDED: pred.store.continue59: -; ENABLED_MASKED_STRIDED-NEXT: [[TMP159:%.*]] = extractelement <8 x i1> [[TMP5]], i64 5 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP159]], label [[PRED_STORE_IF60:%.*]], label [[PRED_STORE_CONTINUE61:%.*]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP158:%.*]] = extractelement <8 x i1> [[TMP5]], i64 5 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP158]], label [[PRED_STORE_IF60:%.*]], label [[PRED_STORE_CONTINUE61:%.*]] ; ENABLED_MASKED_STRIDED: pred.store.if60: -; ENABLED_MASKED_STRIDED-NEXT: [[TMP160:%.*]] = extractelement <8 x i32> [[TMP55]], i64 5 -; ENABLED_MASKED_STRIDED-NEXT: [[TMP161:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP160]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP162:%.*]] = extractelement <8 x i8> [[TMP138]], i64 5 -; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP162]], i8* [[TMP161]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP159:%.*]] = extractelement <8 x i32> [[TMP55]], i64 5 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP160:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP159]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP161:%.*]] = extractelement <8 x i8> [[TMP137]], i64 5 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP161]], i8* [[TMP160]], align 1 ; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE61]] ; ENABLED_MASKED_STRIDED: pred.store.continue61: -; ENABLED_MASKED_STRIDED-NEXT: [[TMP163:%.*]] = extractelement <8 x i1> [[TMP5]], i64 6 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP163]], label [[PRED_STORE_IF62:%.*]], label [[PRED_STORE_CONTINUE63:%.*]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP162:%.*]] = extractelement <8 x i1> [[TMP5]], i64 6 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP162]], label [[PRED_STORE_IF62:%.*]], label [[PRED_STORE_CONTINUE63:%.*]] ; ENABLED_MASKED_STRIDED: pred.store.if62: -; ENABLED_MASKED_STRIDED-NEXT: [[TMP164:%.*]] = extractelement <8 x i32> [[TMP55]], i64 6 -; ENABLED_MASKED_STRIDED-NEXT: [[TMP165:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP164]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP166:%.*]] = extractelement <8 x i8> [[TMP138]], i64 6 -; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP166]], i8* [[TMP165]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP163:%.*]] = extractelement <8 x i32> [[TMP55]], i64 6 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP164:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP163]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP165:%.*]] = extractelement <8 x i8> [[TMP137]], i64 6 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP165]], i8* [[TMP164]], align 1 ; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE63]] ; ENABLED_MASKED_STRIDED: pred.store.continue63: -; ENABLED_MASKED_STRIDED-NEXT: [[TMP167:%.*]] = extractelement <8 x i1> [[TMP5]], i64 7 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP167]], label [[PRED_STORE_IF64:%.*]], label [[PRED_STORE_CONTINUE65]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP166:%.*]] = extractelement <8 x i1> [[TMP5]], i64 7 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP166]], label [[PRED_STORE_IF64:%.*]], label [[PRED_STORE_CONTINUE65]] ; ENABLED_MASKED_STRIDED: pred.store.if64: -; ENABLED_MASKED_STRIDED-NEXT: [[TMP168:%.*]] = extractelement <8 x i32> [[TMP55]], i64 7 -; ENABLED_MASKED_STRIDED-NEXT: [[TMP169:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP168]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP170:%.*]] = extractelement <8 x i8> [[TMP138]], i64 7 -; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP170]], i8* [[TMP169]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP167:%.*]] = extractelement <8 x i32> [[TMP55]], i64 7 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP168:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP167]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP169:%.*]] = extractelement <8 x i8> [[TMP137]], i64 7 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP169]], i8* [[TMP168]], align 1 ; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE65]] ; ENABLED_MASKED_STRIDED: pred.store.continue65: ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], -; ENABLED_MASKED_STRIDED-NEXT: [[TMP171:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP171]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP170:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP170]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; ENABLED_MASKED_STRIDED: for.body: ; ENABLED_MASKED_STRIDED-NEXT: [[IX_024:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ 1024, [[ENTRY:%.*]] ] ; ENABLED_MASKED_STRIDED-NEXT: [[CMP1:%.*]] = icmp ugt i32 [[IX_024]], [[CONV]] @@ -2204,15 +2199,14 @@ ; ENABLED_MASKED_STRIDED: if.then: ; ENABLED_MASKED_STRIDED-NEXT: [[MUL:%.*]] = shl nuw nsw i32 [[IX_024]], 1 ; ENABLED_MASKED_STRIDED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[MUL]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP172:%.*]] = load i8, i8* [[ARRAYIDX]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP171:%.*]] = load i8, i8* [[ARRAYIDX]], align 1 ; ENABLED_MASKED_STRIDED-NEXT: [[ADD:%.*]] = or i32 [[MUL]], 1 ; ENABLED_MASKED_STRIDED-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[ADD]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP173:%.*]] = load i8, i8* [[ARRAYIDX4]], align 1 -; ENABLED_MASKED_STRIDED-NEXT: [[CMP_I:%.*]] = icmp slt i8 [[TMP172]], [[TMP173]] -; ENABLED_MASKED_STRIDED-NEXT: [[SPEC_SELECT_I:%.*]] = select i1 [[CMP_I]], i8 [[TMP173]], i8 [[TMP172]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP172:%.*]] = load i8, i8* [[ARRAYIDX4]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP173:%.*]] = call i8 @llvm.smax.i8(i8 [[TMP171]], i8 [[TMP172]]) ; ENABLED_MASKED_STRIDED-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[MUL]] -; ENABLED_MASKED_STRIDED-NEXT: store i8 [[SPEC_SELECT_I]], i8* [[ARRAYIDX6]], align 1 -; ENABLED_MASKED_STRIDED-NEXT: [[SUB:%.*]] = sub i8 0, [[SPEC_SELECT_I]] +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP173]], i8* [[ARRAYIDX6]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: [[SUB:%.*]] = sub i8 0, [[TMP173]] ; ENABLED_MASKED_STRIDED-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[ADD]] ; ENABLED_MASKED_STRIDED-NEXT: store i8 [[SUB]], i8* [[ARRAYIDX11]], align 1 ; ENABLED_MASKED_STRIDED-NEXT: br label [[FOR_INC]] @@ -2465,157 +2459,156 @@ ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE32]] ; DISABLED_MASKED_STRIDED: pred.load.continue32: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP100:%.*]] = phi <8 x i8> [ [[TMP94]], [[PRED_LOAD_CONTINUE30]] ], [ [[TMP99]], [[PRED_LOAD_IF31]] ] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP101:%.*]] = icmp slt <8 x i8> [[TMP51]], [[TMP100]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP102:%.*]] = select <8 x i1> [[TMP101]], <8 x i8> [[TMP100]], <8 x i8> [[TMP51]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP103:%.*]] = extractelement <8 x i1> [[TMP3]], i64 0 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP103]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP101:%.*]] = call <8 x i8> @llvm.smax.v8i8(<8 x i8> [[TMP51]], <8 x i8> [[TMP100]]) +; DISABLED_MASKED_STRIDED-NEXT: [[TMP102:%.*]] = extractelement <8 x i1> [[TMP3]], i64 0 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP102]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP104:%.*]] = extractelement <8 x i32> [[TMP2]], i64 0 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP105:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[TMP104]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP106:%.*]] = extractelement <8 x i8> [[TMP102]], i64 0 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP106]], i8* [[TMP105]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP103:%.*]] = extractelement <8 x i32> [[TMP2]], i64 0 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP104:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[TMP103]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP105:%.*]] = extractelement <8 x i8> [[TMP101]], i64 0 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP105]], i8* [[TMP104]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE]] ; DISABLED_MASKED_STRIDED: pred.store.continue: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP107:%.*]] = extractelement <8 x i1> [[TMP3]], i64 1 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP107]], label [[PRED_STORE_IF33:%.*]], label [[PRED_STORE_CONTINUE34:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP106:%.*]] = extractelement <8 x i1> [[TMP3]], i64 1 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP106]], label [[PRED_STORE_IF33:%.*]], label [[PRED_STORE_CONTINUE34:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if33: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP108:%.*]] = extractelement <8 x i32> [[TMP2]], i64 1 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP109:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP108]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP110:%.*]] = extractelement <8 x i8> [[TMP102]], i64 1 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP110]], i8* [[TMP109]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP107:%.*]] = extractelement <8 x i32> [[TMP2]], i64 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP108:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP107]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP109:%.*]] = extractelement <8 x i8> [[TMP101]], i64 1 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP109]], i8* [[TMP108]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE34]] ; DISABLED_MASKED_STRIDED: pred.store.continue34: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP111:%.*]] = extractelement <8 x i1> [[TMP3]], i64 2 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP111]], label [[PRED_STORE_IF35:%.*]], label [[PRED_STORE_CONTINUE36:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP110:%.*]] = extractelement <8 x i1> [[TMP3]], i64 2 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP110]], label [[PRED_STORE_IF35:%.*]], label [[PRED_STORE_CONTINUE36:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if35: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP112:%.*]] = extractelement <8 x i32> [[TMP2]], i64 2 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP113:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP112]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP114:%.*]] = extractelement <8 x i8> [[TMP102]], i64 2 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP114]], i8* [[TMP113]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP111:%.*]] = extractelement <8 x i32> [[TMP2]], i64 2 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP112:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP111]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP113:%.*]] = extractelement <8 x i8> [[TMP101]], i64 2 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP113]], i8* [[TMP112]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE36]] ; DISABLED_MASKED_STRIDED: pred.store.continue36: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP115:%.*]] = extractelement <8 x i1> [[TMP3]], i64 3 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP115]], label [[PRED_STORE_IF37:%.*]], label [[PRED_STORE_CONTINUE38:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP114:%.*]] = extractelement <8 x i1> [[TMP3]], i64 3 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP114]], label [[PRED_STORE_IF37:%.*]], label [[PRED_STORE_CONTINUE38:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if37: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP116:%.*]] = extractelement <8 x i32> [[TMP2]], i64 3 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP117:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP116]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP118:%.*]] = extractelement <8 x i8> [[TMP102]], i64 3 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP118]], i8* [[TMP117]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP115:%.*]] = extractelement <8 x i32> [[TMP2]], i64 3 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP116:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP115]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP117:%.*]] = extractelement <8 x i8> [[TMP101]], i64 3 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP117]], i8* [[TMP116]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE38]] ; DISABLED_MASKED_STRIDED: pred.store.continue38: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP119:%.*]] = extractelement <8 x i1> [[TMP3]], i64 4 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP119]], label [[PRED_STORE_IF39:%.*]], label [[PRED_STORE_CONTINUE40:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP118:%.*]] = extractelement <8 x i1> [[TMP3]], i64 4 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP118]], label [[PRED_STORE_IF39:%.*]], label [[PRED_STORE_CONTINUE40:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if39: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP120:%.*]] = extractelement <8 x i32> [[TMP2]], i64 4 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP121:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP120]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP122:%.*]] = extractelement <8 x i8> [[TMP102]], i64 4 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP122]], i8* [[TMP121]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP119:%.*]] = extractelement <8 x i32> [[TMP2]], i64 4 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP120:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP119]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP121:%.*]] = extractelement <8 x i8> [[TMP101]], i64 4 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP121]], i8* [[TMP120]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE40]] ; DISABLED_MASKED_STRIDED: pred.store.continue40: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP123:%.*]] = extractelement <8 x i1> [[TMP3]], i64 5 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP123]], label [[PRED_STORE_IF41:%.*]], label [[PRED_STORE_CONTINUE42:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP122:%.*]] = extractelement <8 x i1> [[TMP3]], i64 5 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP122]], label [[PRED_STORE_IF41:%.*]], label [[PRED_STORE_CONTINUE42:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if41: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP124:%.*]] = extractelement <8 x i32> [[TMP2]], i64 5 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP125:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP124]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP126:%.*]] = extractelement <8 x i8> [[TMP102]], i64 5 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP126]], i8* [[TMP125]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP123:%.*]] = extractelement <8 x i32> [[TMP2]], i64 5 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP124:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP123]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP125:%.*]] = extractelement <8 x i8> [[TMP101]], i64 5 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP125]], i8* [[TMP124]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE42]] ; DISABLED_MASKED_STRIDED: pred.store.continue42: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP127:%.*]] = extractelement <8 x i1> [[TMP3]], i64 6 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP127]], label [[PRED_STORE_IF43:%.*]], label [[PRED_STORE_CONTINUE44:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP126:%.*]] = extractelement <8 x i1> [[TMP3]], i64 6 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP126]], label [[PRED_STORE_IF43:%.*]], label [[PRED_STORE_CONTINUE44:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if43: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP128:%.*]] = extractelement <8 x i32> [[TMP2]], i64 6 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP129:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP128]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP130:%.*]] = extractelement <8 x i8> [[TMP102]], i64 6 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP130]], i8* [[TMP129]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP127:%.*]] = extractelement <8 x i32> [[TMP2]], i64 6 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP128:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP127]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP129:%.*]] = extractelement <8 x i8> [[TMP101]], i64 6 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP129]], i8* [[TMP128]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE44]] ; DISABLED_MASKED_STRIDED: pred.store.continue44: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP131:%.*]] = extractelement <8 x i1> [[TMP3]], i64 7 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP131]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP130:%.*]] = extractelement <8 x i1> [[TMP3]], i64 7 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP130]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if45: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP132:%.*]] = extractelement <8 x i32> [[TMP2]], i64 7 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP133:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP132]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP134:%.*]] = extractelement <8 x i8> [[TMP102]], i64 7 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP134]], i8* [[TMP133]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP131:%.*]] = extractelement <8 x i32> [[TMP2]], i64 7 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP132:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP131]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP133:%.*]] = extractelement <8 x i8> [[TMP101]], i64 7 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP133]], i8* [[TMP132]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE46]] ; DISABLED_MASKED_STRIDED: pred.store.continue46: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP135:%.*]] = sub <8 x i8> zeroinitializer, [[TMP102]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP136:%.*]] = extractelement <8 x i1> [[TMP3]], i64 0 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP136]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP134:%.*]] = sub <8 x i8> zeroinitializer, [[TMP101]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP135:%.*]] = extractelement <8 x i1> [[TMP3]], i64 0 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP135]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if47: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP137:%.*]] = extractelement <8 x i32> [[TMP52]], i64 0 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP138:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP137]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP139:%.*]] = extractelement <8 x i8> [[TMP135]], i64 0 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP139]], i8* [[TMP138]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP136:%.*]] = extractelement <8 x i32> [[TMP52]], i64 0 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP137:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP136]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP138:%.*]] = extractelement <8 x i8> [[TMP134]], i64 0 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP138]], i8* [[TMP137]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE48]] ; DISABLED_MASKED_STRIDED: pred.store.continue48: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP140:%.*]] = extractelement <8 x i1> [[TMP3]], i64 1 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP140]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP139:%.*]] = extractelement <8 x i1> [[TMP3]], i64 1 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP139]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if49: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP141:%.*]] = extractelement <8 x i32> [[TMP52]], i64 1 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP142:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP141]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP143:%.*]] = extractelement <8 x i8> [[TMP135]], i64 1 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP143]], i8* [[TMP142]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP140:%.*]] = extractelement <8 x i32> [[TMP52]], i64 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP141:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP140]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP142:%.*]] = extractelement <8 x i8> [[TMP134]], i64 1 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP142]], i8* [[TMP141]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE50]] ; DISABLED_MASKED_STRIDED: pred.store.continue50: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP144:%.*]] = extractelement <8 x i1> [[TMP3]], i64 2 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP144]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP143:%.*]] = extractelement <8 x i1> [[TMP3]], i64 2 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP143]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if51: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP145:%.*]] = extractelement <8 x i32> [[TMP52]], i64 2 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP146:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP145]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP147:%.*]] = extractelement <8 x i8> [[TMP135]], i64 2 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP147]], i8* [[TMP146]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP144:%.*]] = extractelement <8 x i32> [[TMP52]], i64 2 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP145:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP144]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP146:%.*]] = extractelement <8 x i8> [[TMP134]], i64 2 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP146]], i8* [[TMP145]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE52]] ; DISABLED_MASKED_STRIDED: pred.store.continue52: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP148:%.*]] = extractelement <8 x i1> [[TMP3]], i64 3 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP148]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP147:%.*]] = extractelement <8 x i1> [[TMP3]], i64 3 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP147]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if53: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP149:%.*]] = extractelement <8 x i32> [[TMP52]], i64 3 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP150:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP149]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP151:%.*]] = extractelement <8 x i8> [[TMP135]], i64 3 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP151]], i8* [[TMP150]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP148:%.*]] = extractelement <8 x i32> [[TMP52]], i64 3 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP149:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP148]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP150:%.*]] = extractelement <8 x i8> [[TMP134]], i64 3 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP150]], i8* [[TMP149]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE54]] ; DISABLED_MASKED_STRIDED: pred.store.continue54: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP152:%.*]] = extractelement <8 x i1> [[TMP3]], i64 4 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP152]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP151:%.*]] = extractelement <8 x i1> [[TMP3]], i64 4 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP151]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if55: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP153:%.*]] = extractelement <8 x i32> [[TMP52]], i64 4 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP154:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP153]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP155:%.*]] = extractelement <8 x i8> [[TMP135]], i64 4 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP155]], i8* [[TMP154]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP152:%.*]] = extractelement <8 x i32> [[TMP52]], i64 4 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP153:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP152]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP154:%.*]] = extractelement <8 x i8> [[TMP134]], i64 4 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP154]], i8* [[TMP153]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE56]] ; DISABLED_MASKED_STRIDED: pred.store.continue56: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP156:%.*]] = extractelement <8 x i1> [[TMP3]], i64 5 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP156]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP155:%.*]] = extractelement <8 x i1> [[TMP3]], i64 5 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP155]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if57: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP157:%.*]] = extractelement <8 x i32> [[TMP52]], i64 5 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP158:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP157]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP159:%.*]] = extractelement <8 x i8> [[TMP135]], i64 5 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP159]], i8* [[TMP158]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP156:%.*]] = extractelement <8 x i32> [[TMP52]], i64 5 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP157:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP156]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP158:%.*]] = extractelement <8 x i8> [[TMP134]], i64 5 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP158]], i8* [[TMP157]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE58]] ; DISABLED_MASKED_STRIDED: pred.store.continue58: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP160:%.*]] = extractelement <8 x i1> [[TMP3]], i64 6 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP160]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP159:%.*]] = extractelement <8 x i1> [[TMP3]], i64 6 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP159]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if59: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP161:%.*]] = extractelement <8 x i32> [[TMP52]], i64 6 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP162:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP161]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP163:%.*]] = extractelement <8 x i8> [[TMP135]], i64 6 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP163]], i8* [[TMP162]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP160:%.*]] = extractelement <8 x i32> [[TMP52]], i64 6 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP161:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP160]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP162:%.*]] = extractelement <8 x i8> [[TMP134]], i64 6 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP162]], i8* [[TMP161]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE60]] ; DISABLED_MASKED_STRIDED: pred.store.continue60: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP164:%.*]] = extractelement <8 x i1> [[TMP3]], i64 7 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP164]], label [[PRED_STORE_IF61:%.*]], label [[PRED_STORE_CONTINUE62]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP163:%.*]] = extractelement <8 x i1> [[TMP3]], i64 7 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP163]], label [[PRED_STORE_IF61:%.*]], label [[PRED_STORE_CONTINUE62]] ; DISABLED_MASKED_STRIDED: pred.store.if61: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP165:%.*]] = extractelement <8 x i32> [[TMP52]], i64 7 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP166:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP165]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP167:%.*]] = extractelement <8 x i8> [[TMP135]], i64 7 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP167]], i8* [[TMP166]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP164:%.*]] = extractelement <8 x i32> [[TMP52]], i64 7 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP165:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP164]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP166:%.*]] = extractelement <8 x i8> [[TMP134]], i64 7 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP166]], i8* [[TMP165]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE62]] ; DISABLED_MASKED_STRIDED: pred.store.continue62: ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], -; DISABLED_MASKED_STRIDED-NEXT: [[TMP168:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP168]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP167:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP167]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; DISABLED_MASKED_STRIDED: for.end: ; DISABLED_MASKED_STRIDED-NEXT: ret void ; @@ -2646,18 +2639,17 @@ ; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> ; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> ; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = or i32 [[TMP2]], 1 -; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = icmp slt <8 x i8> [[STRIDED_VEC]], [[STRIDED_VEC3]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i8> [[STRIDED_VEC3]], <8 x i8> [[STRIDED_VEC]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = sub <8 x i8> zeroinitializer, [[TMP8]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[Q:%.*]], i32 -1 -; ENABLED_MASKED_STRIDED-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[TMP10]], i32 [[TMP6]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP11]] to <16 x i8>* -; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i8> [[TMP8]], <8 x i8> [[TMP9]], <16 x i32> -; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> [[INTERLEAVED_VEC]], <16 x i8>* [[TMP12]], i32 1, <16 x i1> [[INTERLEAVED_MASK]]) +; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = call <8 x i8> @llvm.smax.v8i8(<8 x i8> [[STRIDED_VEC]], <8 x i8> [[STRIDED_VEC3]]) +; ENABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = sub <8 x i8> zeroinitializer, [[TMP7]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = getelementptr i8, i8* [[Q:%.*]], i32 -1 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[TMP9]], i32 [[TMP6]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to <16 x i8>* +; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i8> [[TMP7]], <8 x i8> [[TMP8]], <16 x i32> +; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> [[INTERLEAVED_VEC]], <16 x i8>* [[TMP11]], i32 1, <16 x i1> [[INTERLEAVED_MASK]]) ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], -; ENABLED_MASKED_STRIDED-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP13]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP12]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; ENABLED_MASKED_STRIDED: for.end: ; ENABLED_MASKED_STRIDED-NEXT: ret void ; @@ -2905,157 +2897,156 @@ ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE30]] ; DISABLED_MASKED_STRIDED: pred.load.continue30: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP98:%.*]] = phi <8 x i8> [ [[TMP92]], [[PRED_LOAD_CONTINUE28]] ], [ [[TMP97]], [[PRED_LOAD_IF29]] ] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP99:%.*]] = icmp slt <8 x i8> [[TMP49]], [[TMP98]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP100:%.*]] = select <8 x i1> [[TMP99]], <8 x i8> [[TMP98]], <8 x i8> [[TMP49]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP101:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP101]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP99:%.*]] = call <8 x i8> @llvm.smax.v8i8(<8 x i8> [[TMP49]], <8 x i8> [[TMP98]]) +; DISABLED_MASKED_STRIDED-NEXT: [[TMP100:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP100]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP102:%.*]] = extractelement <8 x i32> [[TMP1]], i64 0 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP103:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[TMP102]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP104:%.*]] = extractelement <8 x i8> [[TMP100]], i64 0 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP104]], i8* [[TMP103]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP101:%.*]] = extractelement <8 x i32> [[TMP1]], i64 0 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP102:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[TMP101]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP103:%.*]] = extractelement <8 x i8> [[TMP99]], i64 0 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP103]], i8* [[TMP102]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE]] ; DISABLED_MASKED_STRIDED: pred.store.continue: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP105:%.*]] = extractelement <8 x i1> [[TMP0]], i64 1 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP105]], label [[PRED_STORE_IF31:%.*]], label [[PRED_STORE_CONTINUE32:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP104:%.*]] = extractelement <8 x i1> [[TMP0]], i64 1 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP104]], label [[PRED_STORE_IF31:%.*]], label [[PRED_STORE_CONTINUE32:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if31: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP106:%.*]] = extractelement <8 x i32> [[TMP1]], i64 1 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP107:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP106]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP108:%.*]] = extractelement <8 x i8> [[TMP100]], i64 1 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP108]], i8* [[TMP107]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP105:%.*]] = extractelement <8 x i32> [[TMP1]], i64 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP106:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP105]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP107:%.*]] = extractelement <8 x i8> [[TMP99]], i64 1 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP107]], i8* [[TMP106]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE32]] ; DISABLED_MASKED_STRIDED: pred.store.continue32: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP109:%.*]] = extractelement <8 x i1> [[TMP0]], i64 2 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP109]], label [[PRED_STORE_IF33:%.*]], label [[PRED_STORE_CONTINUE34:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP108:%.*]] = extractelement <8 x i1> [[TMP0]], i64 2 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP108]], label [[PRED_STORE_IF33:%.*]], label [[PRED_STORE_CONTINUE34:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if33: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP110:%.*]] = extractelement <8 x i32> [[TMP1]], i64 2 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP111:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP110]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP112:%.*]] = extractelement <8 x i8> [[TMP100]], i64 2 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP112]], i8* [[TMP111]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP109:%.*]] = extractelement <8 x i32> [[TMP1]], i64 2 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP110:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP109]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP111:%.*]] = extractelement <8 x i8> [[TMP99]], i64 2 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP111]], i8* [[TMP110]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE34]] ; DISABLED_MASKED_STRIDED: pred.store.continue34: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP113:%.*]] = extractelement <8 x i1> [[TMP0]], i64 3 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP113]], label [[PRED_STORE_IF35:%.*]], label [[PRED_STORE_CONTINUE36:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP112:%.*]] = extractelement <8 x i1> [[TMP0]], i64 3 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP112]], label [[PRED_STORE_IF35:%.*]], label [[PRED_STORE_CONTINUE36:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if35: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP114:%.*]] = extractelement <8 x i32> [[TMP1]], i64 3 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP115:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP114]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP116:%.*]] = extractelement <8 x i8> [[TMP100]], i64 3 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP116]], i8* [[TMP115]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP113:%.*]] = extractelement <8 x i32> [[TMP1]], i64 3 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP114:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP113]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP115:%.*]] = extractelement <8 x i8> [[TMP99]], i64 3 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP115]], i8* [[TMP114]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE36]] ; DISABLED_MASKED_STRIDED: pred.store.continue36: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP117:%.*]] = extractelement <8 x i1> [[TMP0]], i64 4 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP117]], label [[PRED_STORE_IF37:%.*]], label [[PRED_STORE_CONTINUE38:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP116:%.*]] = extractelement <8 x i1> [[TMP0]], i64 4 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP116]], label [[PRED_STORE_IF37:%.*]], label [[PRED_STORE_CONTINUE38:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if37: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP118:%.*]] = extractelement <8 x i32> [[TMP1]], i64 4 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP119:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP118]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP120:%.*]] = extractelement <8 x i8> [[TMP100]], i64 4 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP120]], i8* [[TMP119]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP117:%.*]] = extractelement <8 x i32> [[TMP1]], i64 4 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP118:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP117]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP119:%.*]] = extractelement <8 x i8> [[TMP99]], i64 4 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP119]], i8* [[TMP118]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE38]] ; DISABLED_MASKED_STRIDED: pred.store.continue38: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP121:%.*]] = extractelement <8 x i1> [[TMP0]], i64 5 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP121]], label [[PRED_STORE_IF39:%.*]], label [[PRED_STORE_CONTINUE40:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP120:%.*]] = extractelement <8 x i1> [[TMP0]], i64 5 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP120]], label [[PRED_STORE_IF39:%.*]], label [[PRED_STORE_CONTINUE40:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if39: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP122:%.*]] = extractelement <8 x i32> [[TMP1]], i64 5 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP123:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP122]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP124:%.*]] = extractelement <8 x i8> [[TMP100]], i64 5 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP124]], i8* [[TMP123]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP121:%.*]] = extractelement <8 x i32> [[TMP1]], i64 5 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP122:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP121]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP123:%.*]] = extractelement <8 x i8> [[TMP99]], i64 5 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP123]], i8* [[TMP122]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE40]] ; DISABLED_MASKED_STRIDED: pred.store.continue40: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP125:%.*]] = extractelement <8 x i1> [[TMP0]], i64 6 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP125]], label [[PRED_STORE_IF41:%.*]], label [[PRED_STORE_CONTINUE42:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP124:%.*]] = extractelement <8 x i1> [[TMP0]], i64 6 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP124]], label [[PRED_STORE_IF41:%.*]], label [[PRED_STORE_CONTINUE42:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if41: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP126:%.*]] = extractelement <8 x i32> [[TMP1]], i64 6 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP127:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP126]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP128:%.*]] = extractelement <8 x i8> [[TMP100]], i64 6 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP128]], i8* [[TMP127]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP125:%.*]] = extractelement <8 x i32> [[TMP1]], i64 6 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP126:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP125]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP127:%.*]] = extractelement <8 x i8> [[TMP99]], i64 6 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP127]], i8* [[TMP126]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE42]] ; DISABLED_MASKED_STRIDED: pred.store.continue42: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP129:%.*]] = extractelement <8 x i1> [[TMP0]], i64 7 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP129]], label [[PRED_STORE_IF43:%.*]], label [[PRED_STORE_CONTINUE44:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP128:%.*]] = extractelement <8 x i1> [[TMP0]], i64 7 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP128]], label [[PRED_STORE_IF43:%.*]], label [[PRED_STORE_CONTINUE44:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if43: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP130:%.*]] = extractelement <8 x i32> [[TMP1]], i64 7 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP131:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP130]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP132:%.*]] = extractelement <8 x i8> [[TMP100]], i64 7 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP132]], i8* [[TMP131]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP129:%.*]] = extractelement <8 x i32> [[TMP1]], i64 7 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP130:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP129]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP131:%.*]] = extractelement <8 x i8> [[TMP99]], i64 7 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP131]], i8* [[TMP130]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE44]] ; DISABLED_MASKED_STRIDED: pred.store.continue44: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP133:%.*]] = sub <8 x i8> zeroinitializer, [[TMP100]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP134:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP134]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP132:%.*]] = sub <8 x i8> zeroinitializer, [[TMP99]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP133:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP133]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if45: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP135:%.*]] = extractelement <8 x i32> [[TMP50]], i64 0 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP136:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP135]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP137:%.*]] = extractelement <8 x i8> [[TMP133]], i64 0 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP137]], i8* [[TMP136]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP134:%.*]] = extractelement <8 x i32> [[TMP50]], i64 0 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP135:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP134]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP136:%.*]] = extractelement <8 x i8> [[TMP132]], i64 0 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP136]], i8* [[TMP135]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE46]] ; DISABLED_MASKED_STRIDED: pred.store.continue46: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP138:%.*]] = extractelement <8 x i1> [[TMP0]], i64 1 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP138]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP137:%.*]] = extractelement <8 x i1> [[TMP0]], i64 1 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP137]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if47: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP139:%.*]] = extractelement <8 x i32> [[TMP50]], i64 1 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP140:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP139]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP141:%.*]] = extractelement <8 x i8> [[TMP133]], i64 1 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP141]], i8* [[TMP140]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP138:%.*]] = extractelement <8 x i32> [[TMP50]], i64 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP139:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP138]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP140:%.*]] = extractelement <8 x i8> [[TMP132]], i64 1 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP140]], i8* [[TMP139]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE48]] ; DISABLED_MASKED_STRIDED: pred.store.continue48: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP142:%.*]] = extractelement <8 x i1> [[TMP0]], i64 2 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP142]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP141:%.*]] = extractelement <8 x i1> [[TMP0]], i64 2 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP141]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if49: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP143:%.*]] = extractelement <8 x i32> [[TMP50]], i64 2 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP144:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP143]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP145:%.*]] = extractelement <8 x i8> [[TMP133]], i64 2 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP145]], i8* [[TMP144]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP142:%.*]] = extractelement <8 x i32> [[TMP50]], i64 2 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP143:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP142]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP144:%.*]] = extractelement <8 x i8> [[TMP132]], i64 2 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP144]], i8* [[TMP143]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE50]] ; DISABLED_MASKED_STRIDED: pred.store.continue50: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP146:%.*]] = extractelement <8 x i1> [[TMP0]], i64 3 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP146]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP145:%.*]] = extractelement <8 x i1> [[TMP0]], i64 3 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP145]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if51: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP147:%.*]] = extractelement <8 x i32> [[TMP50]], i64 3 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP148:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP147]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP149:%.*]] = extractelement <8 x i8> [[TMP133]], i64 3 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP149]], i8* [[TMP148]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP146:%.*]] = extractelement <8 x i32> [[TMP50]], i64 3 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP147:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP146]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP148:%.*]] = extractelement <8 x i8> [[TMP132]], i64 3 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP148]], i8* [[TMP147]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE52]] ; DISABLED_MASKED_STRIDED: pred.store.continue52: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP150:%.*]] = extractelement <8 x i1> [[TMP0]], i64 4 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP150]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP149:%.*]] = extractelement <8 x i1> [[TMP0]], i64 4 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP149]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if53: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP151:%.*]] = extractelement <8 x i32> [[TMP50]], i64 4 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP152:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP151]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP153:%.*]] = extractelement <8 x i8> [[TMP133]], i64 4 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP153]], i8* [[TMP152]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP150:%.*]] = extractelement <8 x i32> [[TMP50]], i64 4 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP151:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP150]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP152:%.*]] = extractelement <8 x i8> [[TMP132]], i64 4 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP152]], i8* [[TMP151]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE54]] ; DISABLED_MASKED_STRIDED: pred.store.continue54: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP154:%.*]] = extractelement <8 x i1> [[TMP0]], i64 5 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP154]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP153:%.*]] = extractelement <8 x i1> [[TMP0]], i64 5 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP153]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if55: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP155:%.*]] = extractelement <8 x i32> [[TMP50]], i64 5 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP156:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP155]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP157:%.*]] = extractelement <8 x i8> [[TMP133]], i64 5 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP157]], i8* [[TMP156]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP154:%.*]] = extractelement <8 x i32> [[TMP50]], i64 5 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP155:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP154]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP156:%.*]] = extractelement <8 x i8> [[TMP132]], i64 5 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP156]], i8* [[TMP155]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE56]] ; DISABLED_MASKED_STRIDED: pred.store.continue56: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP158:%.*]] = extractelement <8 x i1> [[TMP0]], i64 6 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP158]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP157:%.*]] = extractelement <8 x i1> [[TMP0]], i64 6 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP157]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if57: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP159:%.*]] = extractelement <8 x i32> [[TMP50]], i64 6 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP160:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP159]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP161:%.*]] = extractelement <8 x i8> [[TMP133]], i64 6 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP161]], i8* [[TMP160]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP158:%.*]] = extractelement <8 x i32> [[TMP50]], i64 6 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP159:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP158]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP160:%.*]] = extractelement <8 x i8> [[TMP132]], i64 6 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP160]], i8* [[TMP159]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE58]] ; DISABLED_MASKED_STRIDED: pred.store.continue58: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP162:%.*]] = extractelement <8 x i1> [[TMP0]], i64 7 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP162]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP161:%.*]] = extractelement <8 x i1> [[TMP0]], i64 7 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP161]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60]] ; DISABLED_MASKED_STRIDED: pred.store.if59: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP163:%.*]] = extractelement <8 x i32> [[TMP50]], i64 7 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP164:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP163]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP165:%.*]] = extractelement <8 x i8> [[TMP133]], i64 7 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP165]], i8* [[TMP164]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP162:%.*]] = extractelement <8 x i32> [[TMP50]], i64 7 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP163:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP162]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP164:%.*]] = extractelement <8 x i8> [[TMP132]], i64 7 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP164]], i8* [[TMP163]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE60]] ; DISABLED_MASKED_STRIDED: pred.store.continue60: ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], -; DISABLED_MASKED_STRIDED-NEXT: [[TMP166:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP166]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP165:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP165]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; DISABLED_MASKED_STRIDED: for.end: ; DISABLED_MASKED_STRIDED-NEXT: ret void ; @@ -3074,8 +3065,8 @@ ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[INDEX]], i64 0 ; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer -; ENABLED_MASKED_STRIDED-NEXT: [[INDUCTION:%.*]] = or <8 x i32> [[BROADCAST_SPLAT2]], -; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ule <8 x i32> [[INDUCTION]], [[BROADCAST_SPLAT]] +; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IV:%.*]] = or <8 x i32> [[BROADCAST_SPLAT2]], +; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IV]], [[BROADCAST_SPLAT]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw i32 [[INDEX]], 1 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP1]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>* @@ -3084,17 +3075,16 @@ ; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> ; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> ; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = or i32 [[TMP1]], 1 -; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = icmp slt <8 x i8> [[STRIDED_VEC]], [[STRIDED_VEC3]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = select <8 x i1> [[TMP5]], <8 x i8> [[STRIDED_VEC3]], <8 x i8> [[STRIDED_VEC]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = sub <8 x i8> zeroinitializer, [[TMP6]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 -1 -; ENABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, i8* [[TMP8]], i32 [[TMP4]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to <16 x i8>* -; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <16 x i32> -; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> [[INTERLEAVED_VEC]], <16 x i8>* [[TMP10]], i32 1, <16 x i1> [[INTERLEAVED_MASK]]) +; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = call <8 x i8> @llvm.smax.v8i8(<8 x i8> [[STRIDED_VEC]], <8 x i8> [[STRIDED_VEC3]]) +; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = sub <8 x i8> zeroinitializer, [[TMP5]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 -1 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i32 [[TMP4]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to <16 x i8>* +; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <16 x i32> +; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> [[INTERLEAVED_VEC]], <16 x i8>* [[TMP9]], i32 1, <16 x i1> [[INTERLEAVED_MASK]]) ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 -; ENABLED_MASKED_STRIDED-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP11]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP10]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; ENABLED_MASKED_STRIDED: for.end: ; ENABLED_MASKED_STRIDED-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/bzip_reverse_loops.ll b/llvm/test/Transforms/LoopVectorize/bzip_reverse_loops.ll --- a/llvm/test/Transforms/LoopVectorize/bzip_reverse_loops.ll +++ b/llvm/test/Transforms/LoopVectorize/bzip_reverse_loops.ll @@ -40,7 +40,7 @@ ;CHECK: example1 ;CHECK: load <4 x i32> ;CHECK-NEXT: shufflevector <4 x i32> -;CHECK: select <4 x i1> +;CHECK: call <4 x i32> @llvm.smax.v4i32 ;CHECK: store <4 x i32> ;CHECK: ret define void @example1(i32* nocapture %a, i32 %n, i32 %wsize) nounwind uwtable ssp { diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -513,46 +513,42 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ poison, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ poison, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>* ; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <4 x i32> [[WIDE_LOAD]], [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt <4 x i32> [[TMP6]], zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> [[TMP6]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = icmp slt <4 x i32> [[VEC_PHI]], [[TMP8]] -; CHECK-NEXT: [[TMP10]] = select <4 x i1> [[TMP9]], <4 x i32> [[VEC_PHI]], <4 x i32> [[TMP8]] +; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[TMP6]], <4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP8]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[TMP7]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP10]]) +; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP8]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i64 3 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[DOTPRE]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[FOR_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ poison, [[FOR_PREHEADER]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ poison, [[FOR_PREHEADER]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[SCALAR_BODY:%.*]] ; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[MINMAX_0_COND_LCSSA:%.*]] = phi i32 [ [[MINMAX_0_COND:%.*]], [[SCALAR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[MINMAX_0_COND_LCSSA:%.*]] = phi i32 [ [[TMP13:%.*]], [[SCALAR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: [[MINMAX_0_LCSSA:%.*]] = phi i32 [ poison, [[ENTRY:%.*]] ], [ [[MINMAX_0_COND_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] ; CHECK-NEXT: ret i32 [[MINMAX_0_LCSSA]] ; CHECK: scalar.body: -; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP13:%.*]], [[SCALAR_BODY]] ] +; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP11:%.*]], [[SCALAR_BODY]] ] ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ] -; CHECK-NEXT: [[MINMAX_028:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MINMAX_0_COND]], [[SCALAR_BODY]] ] +; CHECK-NEXT: [[MINMAX_028:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP13]], [[SCALAR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP13]] = load i32, i32* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[SUB3:%.*]] = sub nsw i32 [[TMP13]], [[SCALAR_RECUR]] -; CHECK-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[SUB3]], 0 -; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP4]], i32 [[SUB3]], i32 0 -; CHECK-NEXT: [[CMP5:%.*]] = icmp slt i32 [[MINMAX_028]], [[COND]] -; CHECK-NEXT: [[MINMAX_0_COND]] = select i1 [[CMP5]], i32 [[MINMAX_028]], i32 [[COND]] +; CHECK-NEXT: [[TMP11]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[SUB3:%.*]] = sub nsw i32 [[TMP11]], [[SCALAR_RECUR]] +; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.smax.i32(i32 [[SUB3]], i32 0) +; CHECK-NEXT: [[TMP13]] = call i32 @llvm.smin.i32(i32 [[MINMAX_028]], i32 [[TMP12]]) ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] @@ -577,8 +573,8 @@ ; UNROLL: vector.body: ; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD2:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ poison, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ poison, [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ poison, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ poison, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDEX]] ; UNROLL-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>* ; UNROLL-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 @@ -589,46 +585,39 @@ ; UNROLL-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> [[WIDE_LOAD2]], <4 x i32> ; UNROLL-NEXT: [[TMP9:%.*]] = sub nsw <4 x i32> [[WIDE_LOAD]], [[TMP7]] ; UNROLL-NEXT: [[TMP10:%.*]] = sub nsw <4 x i32> [[WIDE_LOAD2]], [[TMP8]] -; UNROLL-NEXT: [[TMP11:%.*]] = icmp sgt <4 x i32> [[TMP9]], zeroinitializer -; UNROLL-NEXT: [[TMP12:%.*]] = icmp sgt <4 x i32> [[TMP10]], zeroinitializer -; UNROLL-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> [[TMP9]], <4 x i32> zeroinitializer -; UNROLL-NEXT: [[TMP14:%.*]] = select <4 x i1> [[TMP12]], <4 x i32> [[TMP10]], <4 x i32> zeroinitializer -; UNROLL-NEXT: [[TMP15:%.*]] = icmp slt <4 x i32> [[VEC_PHI]], [[TMP13]] -; UNROLL-NEXT: [[TMP16:%.*]] = icmp slt <4 x i32> [[VEC_PHI1]], [[TMP14]] -; UNROLL-NEXT: [[TMP17]] = select <4 x i1> [[TMP15]], <4 x i32> [[VEC_PHI]], <4 x i32> [[TMP13]] -; UNROLL-NEXT: [[TMP18]] = select <4 x i1> [[TMP16]], <4 x i32> [[VEC_PHI1]], <4 x i32> [[TMP14]] +; UNROLL-NEXT: [[TMP11:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[TMP9]], <4 x i32> zeroinitializer) +; UNROLL-NEXT: [[TMP12:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[TMP10]], <4 x i32> zeroinitializer) +; UNROLL-NEXT: [[TMP13]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[TMP11]]) +; UNROLL-NEXT: [[TMP14]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[VEC_PHI1]], <4 x i32> [[TMP12]]) ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; UNROLL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; UNROLL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; UNROLL-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; UNROLL: middle.block: -; UNROLL-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp slt <4 x i32> [[TMP17]], [[TMP18]] -; UNROLL-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP17]], <4 x i32> [[TMP18]] -; UNROLL-NEXT: [[TMP20:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[RDX_MINMAX_SELECT]]) +; UNROLL-NEXT: [[TMP16:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[TMP13]], <4 x i32> [[TMP14]]) +; UNROLL-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP16]]) ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; UNROLL-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD2]], i64 3 ; UNROLL-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; UNROLL: scalar.ph: ; UNROLL-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[DOTPRE]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] ; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[FOR_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] -; UNROLL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ poison, [[FOR_PREHEADER]] ], [ [[TMP20]], [[MIDDLE_BLOCK]] ] +; UNROLL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ poison, [[FOR_PREHEADER]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ] ; UNROLL-NEXT: br label [[SCALAR_BODY:%.*]] ; UNROLL: for.cond.cleanup.loopexit: -; UNROLL-NEXT: [[MINMAX_0_COND_LCSSA:%.*]] = phi i32 [ [[MINMAX_0_COND:%.*]], [[SCALAR_BODY]] ], [ [[TMP20]], [[MIDDLE_BLOCK]] ] +; UNROLL-NEXT: [[MINMAX_0_COND_LCSSA:%.*]] = phi i32 [ [[TMP20:%.*]], [[SCALAR_BODY]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ] ; UNROLL-NEXT: br label [[FOR_COND_CLEANUP]] ; UNROLL: for.cond.cleanup: ; UNROLL-NEXT: [[MINMAX_0_LCSSA:%.*]] = phi i32 [ poison, [[ENTRY:%.*]] ], [ [[MINMAX_0_COND_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] ; UNROLL-NEXT: ret i32 [[MINMAX_0_LCSSA]] ; UNROLL: scalar.body: -; UNROLL-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP21:%.*]], [[SCALAR_BODY]] ] +; UNROLL-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP18:%.*]], [[SCALAR_BODY]] ] ; UNROLL-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ] -; UNROLL-NEXT: [[MINMAX_028:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MINMAX_0_COND]], [[SCALAR_BODY]] ] +; UNROLL-NEXT: [[MINMAX_028:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP20]], [[SCALAR_BODY]] ] ; UNROLL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] -; UNROLL-NEXT: [[TMP21]] = load i32, i32* [[ARRAYIDX]], align 4 -; UNROLL-NEXT: [[SUB3:%.*]] = sub nsw i32 [[TMP21]], [[SCALAR_RECUR]] -; UNROLL-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[SUB3]], 0 -; UNROLL-NEXT: [[COND:%.*]] = select i1 [[CMP4]], i32 [[SUB3]], i32 0 -; UNROLL-NEXT: [[CMP5:%.*]] = icmp slt i32 [[MINMAX_028]], [[COND]] -; UNROLL-NEXT: [[MINMAX_0_COND]] = select i1 [[CMP5]], i32 [[MINMAX_028]], i32 [[COND]] +; UNROLL-NEXT: [[TMP18]] = load i32, i32* [[ARRAYIDX]], align 4 +; UNROLL-NEXT: [[SUB3:%.*]] = sub nsw i32 [[TMP18]], [[SCALAR_RECUR]] +; UNROLL-NEXT: [[TMP19:%.*]] = call i32 @llvm.smax.i32(i32 [[SUB3]], i32 0) +; UNROLL-NEXT: [[TMP20]] = call i32 @llvm.smin.i32(i32 [[MINMAX_028]], i32 [[TMP19]]) ; UNROLL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; UNROLL-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; UNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] @@ -6327,44 +6316,44 @@ ; UNROLL-NO-VF-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1 ; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-VF: vector.body: -; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE9:%.*]] ] -; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[PRED_STORE_CONTINUE9]] ] -; UNROLL-NO-VF-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[PRED_STORE_CONTINUE9]] ] -; UNROLL-NO-VF-NEXT: [[VEC_PHI5:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[PRED_STORE_CONTINUE9]] ] +; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE10:%.*]] ] +; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[PRED_STORE_CONTINUE10]] ] +; UNROLL-NO-VF-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[PRED_STORE_CONTINUE10]] ] +; UNROLL-NO-VF-NEXT: [[VEC_PHI5:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[PRED_STORE_CONTINUE10]] ] ; UNROLL-NO-VF-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]] ; UNROLL-NO-VF-NEXT: [[INDUCTION:%.*]] = add i32 [[OFFSET_IDX]], 0 ; UNROLL-NO-VF-NEXT: [[INDUCTION2:%.*]] = add i32 [[OFFSET_IDX]], -1 -; UNROLL-NO-VF-NEXT: [[INDUCTION3:%.*]] = add i32 [[INDEX]], 0 -; UNROLL-NO-VF-NEXT: [[INDUCTION4:%.*]] = add i32 [[INDEX]], 1 -; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = icmp ule i32 [[INDUCTION3]], [[TRIP_COUNT_MINUS_1]] -; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = icmp ule i32 [[INDUCTION4]], [[TRIP_COUNT_MINUS_1]] +; UNROLL-NO-VF-NEXT: [[VEC_IV:%.*]] = add i32 [[INDEX]], 0 +; UNROLL-NO-VF-NEXT: [[VEC_IV6:%.*]] = add i32 [[INDEX]], 1 +; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = icmp ule i32 [[VEC_IV]], [[TRIP_COUNT_MINUS_1]] +; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = icmp ule i32 [[VEC_IV6]], [[TRIP_COUNT_MINUS_1]] ; UNROLL-NO-VF-NEXT: br i1 [[TMP2]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] ; UNROLL-NO-VF: pred.udiv.if: ; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = udiv i32 219220132, [[INDUCTION]] ; UNROLL-NO-VF-NEXT: br label [[PRED_UDIV_CONTINUE]] ; UNROLL-NO-VF: pred.udiv.continue: ; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_UDIV_IF]] ] -; UNROLL-NO-VF-NEXT: br i1 [[TMP3]], label [[PRED_UDIV_IF6:%.*]], label [[PRED_UDIV_CONTINUE7:%.*]] +; UNROLL-NO-VF-NEXT: br i1 [[TMP3]], label [[PRED_UDIV_IF7:%.*]], label [[PRED_UDIV_CONTINUE8:%.*]] ; UNROLL-NO-VF: pred.udiv.if7: ; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = udiv i32 219220132, [[INDUCTION2]] -; UNROLL-NO-VF-NEXT: br label [[PRED_UDIV_CONTINUE7]] +; UNROLL-NO-VF-NEXT: br label [[PRED_UDIV_CONTINUE8]] ; UNROLL-NO-VF: pred.udiv.continue8: -; UNROLL-NO-VF-NEXT: [[TMP7]] = phi i32 [ poison, [[PRED_UDIV_CONTINUE]] ], [ [[TMP6]], [[PRED_UDIV_IF6]] ] +; UNROLL-NO-VF-NEXT: [[TMP7]] = phi i32 [ poison, [[PRED_UDIV_CONTINUE]] ], [ [[TMP6]], [[PRED_UDIV_IF7]] ] ; UNROLL-NO-VF-NEXT: [[TMP8]] = add i32 [[VEC_PHI]], [[VECTOR_RECUR]] ; UNROLL-NO-VF-NEXT: [[TMP9]] = add i32 [[VEC_PHI5]], [[TMP5]] ; UNROLL-NO-VF-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; UNROLL-NO-VF: pred.store.if: -; UNROLL-NO-VF: [[SUNK_IND0:%.+]] = add i32 [[INDEX]], 0 -; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[SUNK_IND0]] +; UNROLL-NO-VF-NEXT: [[INDUCTION3:%.*]] = add i32 [[INDEX]], 0 +; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[INDUCTION3]] ; UNROLL-NO-VF-NEXT: store i32 [[INDUCTION]], i32* [[TMP10]], align 4 ; UNROLL-NO-VF-NEXT: br label [[PRED_STORE_CONTINUE]] ; UNROLL-NO-VF: pred.store.continue: -; UNROLL-NO-VF-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9]] +; UNROLL-NO-VF-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10]] ; UNROLL-NO-VF: pred.store.if9: -; UNROLL-NO-VF-NEXT: [[SUNK_IND1:%.+]] = add i32 [[INDEX]], 1 -; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[SUNK_IND1]] +; UNROLL-NO-VF-NEXT: [[INDUCTION4:%.*]] = add i32 [[INDEX]], 1 +; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[INDUCTION4]] ; UNROLL-NO-VF-NEXT: store i32 [[INDUCTION2]], i32* [[TMP11]], align 4 -; UNROLL-NO-VF-NEXT: br label [[PRED_STORE_CONTINUE9]] +; UNROLL-NO-VF-NEXT: br label [[PRED_STORE_CONTINUE10]] ; UNROLL-NO-VF: pred.store.continue10: ; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = select i1 [[TMP2]], i32 [[TMP8]], i32 [[VEC_PHI]] ; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = select i1 [[TMP3]], i32 [[TMP9]], i32 [[VEC_PHI5]] diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll @@ -1173,7 +1173,7 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 1000, [[VECTOR_PH]] ], [ [[RDX_MINMAX_SELECT:%.*]], [[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 1000, [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] @@ -1216,12 +1216,11 @@ ; CHECK-NEXT: [[TMP23:%.*]] = phi <4 x i32> [ [[TMP17]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ] ; CHECK-NEXT: [[TMP24:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP23]], <4 x i32> ; CHECK-NEXT: [[TMP25:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP24]]) -; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp slt i32 [[TMP25]], [[VEC_PHI]] -; CHECK-NEXT: [[RDX_MINMAX_SELECT]] = select i1 [[RDX_MINMAX_CMP]], i32 [[TMP25]], i32 [[VEC_PHI]] +; CHECK-NEXT: [[TMP26]] = call i32 @llvm.smin.i32(i32 [[TMP25]], i32 [[VEC_PHI]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 -; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] +; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 +; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -1229,7 +1228,7 @@ ; CHECK: for.body: ; CHECK-NEXT: br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[RDX_MINMAX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[TMP26]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] ; entry: @@ -1261,7 +1260,7 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 1000, [[VECTOR_PH]] ], [ [[RDX_MINMAX_SELECT:%.*]], [[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 1000, [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] @@ -1304,12 +1303,11 @@ ; CHECK-NEXT: [[TMP23:%.*]] = phi <4 x i32> [ [[TMP17]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ] ; CHECK-NEXT: [[TMP24:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP23]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP25:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP24]]) -; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp ugt i32 [[TMP25]], [[VEC_PHI]] -; CHECK-NEXT: [[RDX_MINMAX_SELECT]] = select i1 [[RDX_MINMAX_CMP]], i32 [[TMP25]], i32 [[VEC_PHI]] +; CHECK-NEXT: [[TMP26]] = call i32 @llvm.umax.i32(i32 [[TMP25]], i32 [[VEC_PHI]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 -; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] +; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 +; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -1317,7 +1315,7 @@ ; CHECK: for.body: ; CHECK-NEXT: br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[RDX_MINMAX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[TMP26]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll @@ -657,16 +657,15 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 1000, [[VECTOR_PH]] ], [ [[RDX_MINMAX_SELECT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 1000, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[WIDE_LOAD]]) -; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp slt i32 [[TMP2]], [[VEC_PHI]] -; CHECK-NEXT: [[RDX_MINMAX_SELECT]] = select i1 [[RDX_MINMAX_CMP]], i32 [[TMP2]], i32 [[VEC_PHI]] +; CHECK-NEXT: [[TMP3]] = call i32 @llvm.smin.i32(i32 [[TMP2]], i32 [[VEC_PHI]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 -; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 +; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -674,7 +673,7 @@ ; CHECK: for.body: ; CHECK-NEXT: br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[RDX_MINMAX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] ; entry: @@ -705,16 +704,15 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 1000, [[VECTOR_PH]] ], [ [[RDX_MINMAX_SELECT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 1000, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[WIDE_LOAD]]) -; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp ugt i32 [[TMP2]], [[VEC_PHI]] -; CHECK-NEXT: [[RDX_MINMAX_SELECT]] = select i1 [[RDX_MINMAX_CMP]], i32 [[TMP2]], i32 [[VEC_PHI]] +; CHECK-NEXT: [[TMP3]] = call i32 @llvm.umax.i32(i32 [[TMP2]], i32 [[VEC_PHI]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 -; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 +; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -722,7 +720,7 @@ ; CHECK: for.body: ; CHECK-NEXT: br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[RDX_MINMAX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll b/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll --- a/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll @@ -834,7 +834,7 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] @@ -875,22 +875,21 @@ ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.continue6: ; CHECK-NEXT: [[TMP23:%.*]] = phi <4 x i32> [ [[TMP17]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ] -; CHECK-NEXT: [[TMP24:%.*]] = icmp slt <4 x i32> [[VEC_PHI]], [[TMP23]] -; CHECK-NEXT: [[TMP25:%.*]] = select <4 x i1> [[TMP24]], <4 x i32> [[VEC_PHI]], <4 x i32> [[TMP23]] -; CHECK-NEXT: [[TMP26]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP25]], <4 x i32> [[VEC_PHI]] +; CHECK-NEXT: [[TMP24:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[TMP23]]) +; CHECK-NEXT: [[TMP25]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP24]], <4 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], -; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 -; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 +; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP28:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP26]]) +; CHECK-NEXT: [[TMP27:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP25]]) ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[TMP28]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[TMP27]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] ; entry: @@ -921,7 +920,7 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] @@ -962,22 +961,21 @@ ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.continue6: ; CHECK-NEXT: [[TMP23:%.*]] = phi <4 x i32> [ [[TMP17]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ] -; CHECK-NEXT: [[TMP24:%.*]] = icmp ugt <4 x i32> [[VEC_PHI]], [[TMP23]] -; CHECK-NEXT: [[TMP25:%.*]] = select <4 x i1> [[TMP24]], <4 x i32> [[VEC_PHI]], <4 x i32> [[TMP23]] -; CHECK-NEXT: [[TMP26]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP25]], <4 x i32> [[VEC_PHI]] +; CHECK-NEXT: [[TMP24:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[TMP23]]) +; CHECK-NEXT: [[TMP25]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP24]], <4 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], -; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 -; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 +; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP28:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP26]]) +; CHECK-NEXT: [[TMP27:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP25]]) ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[TMP28]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[TMP27]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] ; entry: diff --git a/llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll b/llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll --- a/llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll +++ b/llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll @@ -36,14 +36,13 @@ ; CHECK-NEXT: [[TMP3:%.*]] = sext <8 x i16> [[WIDE_LOAD16]] to <8 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = mul nsw <8 x i32> [[TMP3]], [[TMP1]] ; CHECK-NEXT: [[TMP5:%.*]] = ashr <8 x i32> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = icmp slt <8 x i32> [[TMP5]], -; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[TMP6]], <8 x i32> [[TMP5]], <8 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = trunc <8 x i32> [[TMP7]] to <8 x i16> -; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16* [[NEXT_GEP14]] to <8 x i16>* -; CHECK-NEXT: store <8 x i16> [[TMP8]], <8 x i16>* [[TMP9]], align 2 +; CHECK-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.smin.v8i32(<8 x i32> [[TMP5]], <8 x i32> ) +; CHECK-NEXT: [[TMP7:%.*]] = trunc <8 x i32> [[TMP6]] to <8 x i16> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[NEXT_GEP14]] to <8 x i16>* +; CHECK-NEXT: store <8 x i16> [[TMP7]], <8 x i16>* [[TMP8]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[BLOCKSIZE]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[WHILE_END]], label [[WHILE_BODY_PREHEADER17]] @@ -59,16 +58,15 @@ ; CHECK-NEXT: [[PDST_ADDR_05:%.*]] = phi i16* [ [[INCDEC_PTR4:%.*]], [[WHILE_BODY]] ], [ [[PDST_ADDR_05_PH]], [[WHILE_BODY_PREHEADER17]] ] ; CHECK-NEXT: [[PSRCB_ADDR_04:%.*]] = phi i16* [ [[INCDEC_PTR1:%.*]], [[WHILE_BODY]] ], [ [[PSRCB_ADDR_04_PH]], [[WHILE_BODY_PREHEADER17]] ] ; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i16, i16* [[PSRCA_ADDR_06]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = load i16, i16* [[PSRCA_ADDR_06]], align 2 -; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP11]] to i32 +; CHECK-NEXT: [[TMP10:%.*]] = load i16, i16* [[PSRCA_ADDR_06]], align 2 +; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 ; CHECK-NEXT: [[INCDEC_PTR1]] = getelementptr inbounds i16, i16* [[PSRCB_ADDR_04]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = load i16, i16* [[PSRCB_ADDR_04]], align 2 -; CHECK-NEXT: [[CONV2:%.*]] = sext i16 [[TMP12]] to i32 +; CHECK-NEXT: [[TMP11:%.*]] = load i16, i16* [[PSRCB_ADDR_04]], align 2 +; CHECK-NEXT: [[CONV2:%.*]] = sext i16 [[TMP11]] to i32 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV2]], [[CONV]] ; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[MUL]], 15 -; CHECK-NEXT: [[TMP13:%.*]] = icmp slt i32 [[SHR]], 32767 -; CHECK-NEXT: [[RETVAL_1_I:%.*]] = select i1 [[TMP13]], i32 [[SHR]], i32 32767 -; CHECK-NEXT: [[CONV3:%.*]] = trunc i32 [[RETVAL_1_I]] to i16 +; CHECK-NEXT: [[TMP12:%.*]] = tail call i32 @llvm.smin.i32(i32 [[SHR]], i32 32767) #[[ATTR2:[0-9]+]] +; CHECK-NEXT: [[CONV3:%.*]] = trunc i32 [[TMP12]] to i16 ; CHECK-NEXT: [[INCDEC_PTR4]] = getelementptr inbounds i16, i16* [[PDST_ADDR_05]], i32 1 ; CHECK-NEXT: store i16 [[CONV3]], i16* [[PDST_ADDR_05]], align 2 ; CHECK-NEXT: [[DEC]] = add i32 [[BLKCNT_07]], -1 diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-math.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-math.ll --- a/llvm/test/Transforms/PhaseOrdering/X86/vector-math.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-math.ll @@ -83,8 +83,7 @@ ; CHECK-LABEL: @max_v4i32( ; CHECK-NEXT: [[T0_I_I:%.*]] = bitcast <2 x i64> [[X:%.*]] to <4 x i32> ; CHECK-NEXT: [[T1_I_I:%.*]] = bitcast <2 x i64> [[Y:%.*]] to <4 x i32> -; CHECK-NEXT: [[CMP_I_I:%.*]] = icmp sgt <4 x i32> [[T0_I_I]], [[T1_I_I]] -; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[CMP_I_I]], <4 x i32> [[T0_I_I]], <4 x i32> [[T1_I_I]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[T0_I_I]], <4 x i32> [[T1_I_I]]) ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[TMP2]] ; diff --git a/llvm/test/Transforms/PhaseOrdering/min-max-abs-cse.ll b/llvm/test/Transforms/PhaseOrdering/min-max-abs-cse.ll --- a/llvm/test/Transforms/PhaseOrdering/min-max-abs-cse.ll +++ b/llvm/test/Transforms/PhaseOrdering/min-max-abs-cse.ll @@ -7,18 +7,9 @@ ; sub (smax a,b), (smax a,b) --> 0 -; FIXME: We should canonicalize min/max to a form -; where the cmp operands match the select operands. - define i8 @smax_nsw(i8 %a, i8 %b) { ; CHECK-LABEL: @smax_nsw( -; CHECK-NEXT: [[SUB:%.*]] = sub nsw i8 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i8 [[A]], [[B]] -; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i8 [[SUB]], 0 -; CHECK-NEXT: [[M1:%.*]] = select i1 [[CMP1]], i8 0, i8 [[SUB]] -; CHECK-NEXT: [[M2:%.*]] = select i1 [[CMP2]], i8 [[SUB]], i8 0 -; CHECK-NEXT: [[R:%.*]] = sub nsw i8 [[M2]], [[M1]] -; CHECK-NEXT: ret i8 [[R]] +; CHECK-NEXT: ret i8 0 ; %sub = sub nsw i8 %a, %b %cmp1 = icmp slt i8 %a, %b diff --git a/llvm/test/Transforms/PhaseOrdering/minmax.ll b/llvm/test/Transforms/PhaseOrdering/minmax.ll --- a/llvm/test/Transforms/PhaseOrdering/minmax.ll +++ b/llvm/test/Transforms/PhaseOrdering/minmax.ll @@ -10,15 +10,13 @@ define void @cmyk(i8 %r, i8 %g, i8 %b) { ; CHECK-LABEL: @cmyk( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i8 [[R:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i8 [[R]], i8 [[B]] -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i8 [[TMP1]], [[G:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i8 [[TMP1]], i8 [[G]] -; CHECK-NEXT: [[TMP4:%.*]] = xor i8 [[TMP3]], -1 -; CHECK-NEXT: [[SUB31:%.*]] = sub i8 [[TMP3]], [[R]] -; CHECK-NEXT: [[SUB35:%.*]] = sub i8 [[TMP3]], [[G]] -; CHECK-NEXT: [[SUB39:%.*]] = sub i8 [[TMP3]], [[B]] -; CHECK-NEXT: call void @use(i8 [[SUB31]], i8 [[SUB35]], i8 [[SUB39]], i8 [[TMP4]]) +; CHECK-NEXT: [[TMP0:%.*]] = call i8 @llvm.smax.i8(i8 [[R:%.*]], i8 [[G:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.smax.i8(i8 [[B:%.*]], i8 [[TMP0]]) +; CHECK-NEXT: [[K_0:%.*]] = xor i8 [[TMP1]], -1 +; CHECK-NEXT: [[SUB31:%.*]] = sub i8 [[TMP1]], [[R]] +; CHECK-NEXT: [[SUB35:%.*]] = sub i8 [[TMP1]], [[G]] +; CHECK-NEXT: [[SUB39:%.*]] = sub i8 [[TMP1]], [[B]] +; CHECK-NEXT: call void @use(i8 [[SUB31]], i8 [[SUB35]], i8 [[SUB39]], i8 [[K_0]]) ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/PhaseOrdering/pr36760.ll b/llvm/test/Transforms/PhaseOrdering/pr36760.ll --- a/llvm/test/Transforms/PhaseOrdering/pr36760.ll +++ b/llvm/test/Transforms/PhaseOrdering/pr36760.ll @@ -5,9 +5,8 @@ define i64 @PR36760(i64 %a) { ; CHECK-LABEL: @PR36760( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i64 [[A:%.*]], 0 -; CHECK-NEXT: [[DOTA:%.*]] = select i1 [[TMP0]], i64 [[A]], i64 0 -; CHECK-NEXT: ret i64 [[DOTA]] +; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.smax.i64(i64 [[A:%.*]], i64 0) +; CHECK-NEXT: ret i64 [[TMP0]] ; entry: %retval = alloca i64, align 8 @@ -37,10 +36,9 @@ define i64 @PR36760_2(i64 %a) #0 { ; CHECK-LABEL: @PR36760_2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i64 [[A:%.*]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i64 [[A]], i64 -1 -; CHECK-NEXT: [[RETVAL_0:%.*]] = xor i64 [[TMP1]], -1 -; CHECK-NEXT: ret i64 [[RETVAL_0]] +; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.smin.i64(i64 [[A:%.*]], i64 -1) +; CHECK-NEXT: [[TMP1:%.*]] = xor i64 [[TMP0]], -1 +; CHECK-NEXT: ret i64 [[TMP1]] ; entry: %retval = alloca i64, align 8 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr46983.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr46983.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/pr46983.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr46983.ll @@ -13,10 +13,9 @@ ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = mul <4 x i32> [[TMP5]], [[SHUFFLE]] ; CHECK-NEXT: [[TMP8:%.*]] = lshr <4 x i32> [[TMP7]], -; CHECK-NEXT: [[TMP9:%.*]] = icmp ult <4 x i32> [[TMP8]], -; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP9]], <4 x i32> [[TMP8]], <4 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP11]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: [[TMP9:%.*]] = call <4 x i32> @llvm.umin.v4i32(<4 x i32> [[TMP8]], <4 x i32> ) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* [[TMP10]], align 4, !tbaa [[TBAA0]] ; CHECK-NEXT: ret void ; %4 = load i32, i32* %0, align 4, !tbaa !2 @@ -58,11 +57,10 @@ ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP7]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = mul <4 x i32> [[SHUFFLE]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = lshr <4 x i32> [[TMP8]], -; CHECK-NEXT: [[TMP10:%.*]] = icmp ult <4 x i32> [[TMP9]], -; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[TMP9]], <4 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = trunc <4 x i32> [[TMP11]] to <4 x i8> -; CHECK-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP0]] to <4 x i8>* -; CHECK-NEXT: store <4 x i8> [[TMP12]], <4 x i8>* [[TMP13]], align 1, !tbaa [[TBAA4]] +; CHECK-NEXT: [[TMP10:%.*]] = call <4 x i32> @llvm.umin.v4i32(<4 x i32> [[TMP9]], <4 x i32> ) +; CHECK-NEXT: [[TMP11:%.*]] = trunc <4 x i32> [[TMP10]] to <4 x i8> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP0]] to <4 x i8>* +; CHECK-NEXT: store <4 x i8> [[TMP11]], <4 x i8>* [[TMP12]], align 1, !tbaa [[TBAA4]] ; CHECK-NEXT: ret void ; %4 = load i8, i8* %0, align 1, !tbaa !6