diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2468,6 +2468,102 @@ // icmp X, undef -> true/false because undef could be X. if (N1 == N2) return getBoolConstant(ISD::isTrueWhenEqual(Cond), dl, VT, OpVT); + + KnownBits KnownRHS = computeKnownBits(N2); + if (!KnownRHS.isUnknown()) { + KnownBits KnownLHS = computeKnownBits(N1); + std::optional Res; + // Check if we can constant fold this with knownbits. + switch (Cond) { + case ISD::SETEQ: + Res = KnownBits::eq(KnownLHS, KnownRHS); + break; + case ISD::SETNE: + Res = KnownBits::ne(KnownLHS, KnownRHS); + break; + case ISD::SETLT: + Res = KnownBits::slt(KnownLHS, KnownRHS); + break; + case ISD::SETULT: + Res = KnownBits::ult(KnownLHS, KnownRHS); + break; + case ISD::SETGT: + Res = KnownBits::sgt(KnownLHS, KnownRHS); + break; + case ISD::SETUGT: + Res = KnownBits::ugt(KnownLHS, KnownRHS); + break; + case ISD::SETLE: + Res = KnownBits::sle(KnownLHS, KnownRHS); + break; + case ISD::SETULE: + Res = KnownBits::ule(KnownLHS, KnownRHS); + break; + case ISD::SETGE: + Res = KnownBits::sge(KnownLHS, KnownRHS); + break; + case ISD::SETUGE: + Res = KnownBits::uge(KnownLHS, KnownRHS); + break; + default: + break; + } + + if (Res) + return getBoolConstant(*Res, dl, VT, OpVT); + + // We aren't able to constant fold with known bits but can either 1) make + // conditions stronger (i.e ule -> ult) or 2) simplify with + // isKnownNeverZero if RHS is zero. + switch (Cond) { + case ISD::SETLE: + case ISD::SETULE: + case ISD::SETGE: + case ISD::SETUGE: + Res = KnownBits::eq(KnownLHS, KnownRHS); + [[fallthrough]]; + case ISD::SETEQ: + case ISD::SETNE: + // isKnownNeverZero is able to prove cases computeKnownBits can't. + if (!Res && KnownRHS.isZero() && isKnownNeverZero(N1)) + Res = false; + break; + default: + break; + } + + if (Res) { + assert(*Res == false && + "There is a bug in KnownBits::{sge,uge,sle,ule}"); + ISD::CondCode NewCond = Cond; + // NB: We could remove this switch and just do `Cond ^ ISD::SETEQ` for + // the new opcode. + switch (Cond) { + // Remove the or eq portion of the condition. + case ISD::SETULE: + NewCond = ISD::SETULT; + break; + case ISD::SETLE: + NewCond = ISD::SETLT; + break; + case ISD::SETUGE: + NewCond = ISD::SETUGT; + break; + case ISD::SETGE: + NewCond = ISD::SETGT; + break; + // Evaluate to true/false. + case ISD::SETNE: + return getBoolConstant(true, dl, VT, OpVT); + case ISD::SETEQ: + return getBoolConstant(false, dl, VT, OpVT); + default: + break; + } + if (Cond != NewCond) + return getSetCC(dl, VT, N1, N2, NewCond); + } + } } if (ConstantSDNode *N2C = dyn_cast(N2)) { diff --git a/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll b/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll --- a/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll @@ -20,10 +20,7 @@ define i8 @test2(i32 %a) { ; CHECK-LABEL: test2: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #135 -; CHECK-NEXT: and w8, w0, w8 -; CHECK-NEXT: cmp w8, #1024 -; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ret entry: %and = and i32 %a, 135 @@ -37,7 +34,7 @@ define i8 @test3(i32 %a) { ; CHECK-LABEL: test3: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #1024 +; CHECK-NEXT: mov w8, #1024 // =0x400 ; CHECK-NEXT: movk w8, #33, lsl #16 ; CHECK-NEXT: and w8, w0, w8 ; CHECK-NEXT: cmp w8, #1024 @@ -68,10 +65,7 @@ define i8 @test5(i64 %a) { ; CHECK-LABEL: test5: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: and x8, x0, #0x3ffffc000 -; CHECK-NEXT: and x8, x8, #0xfffffffe00007fff -; CHECK-NEXT: cmp x8, #1024 -; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ret entry: %and = and i64 %a, 8589950976 @@ -84,10 +78,7 @@ define i8 @test6(i64 %a) { ; CHECK-LABEL: test6: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #135 -; CHECK-NEXT: and x8, x0, x8 -; CHECK-NEXT: cmp x8, #1024 -; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ret entry: %and = and i64 %a, 135 @@ -101,7 +92,7 @@ define i8 @test7(i64 %a) { ; CHECK-LABEL: test7: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #1024 +; CHECK-NEXT: mov w8, #1024 // =0x400 ; CHECK-NEXT: movk w8, #33, lsl #16 ; CHECK-NEXT: and x8, x0, x8 ; CHECK-NEXT: cmp x8, #1024 @@ -175,7 +166,7 @@ ; CHECK-NEXT: cmp w2, #1 ; CHECK-NEXT: b.lt .LBB8_3 ; CHECK-NEXT: // %bb.1: // %for.body.preheader -; CHECK-NEXT: mov w9, #1024 +; CHECK-NEXT: mov w9, #1024 // =0x400 ; CHECK-NEXT: mov w8, w2 ; CHECK-NEXT: movk w9, #32, lsl #16 ; CHECK-NEXT: .LBB8_2: // %for.body @@ -226,7 +217,7 @@ ; CHECK-LABEL: test10: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ldr w8, [x1] -; CHECK-NEXT: mov w9, #1024 +; CHECK-NEXT: mov w9, #1024 // =0x400 ; CHECK-NEXT: movk w9, #32, lsl #16 ; CHECK-NEXT: and w8, w8, w9 ; CHECK-NEXT: str w8, [x0] @@ -253,10 +244,7 @@ define i8 @test11(i64 %a) { ; CHECK-LABEL: test11: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #-1610612736 -; CHECK-NEXT: and x8, x0, x8 -; CHECK-NEXT: cmp x8, #1024 -; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ret entry: %and = and i64 %a, 2684354560 diff --git a/llvm/test/CodeGen/AArch64/andcompare.ll b/llvm/test/CodeGen/AArch64/andcompare.ll --- a/llvm/test/CodeGen/AArch64/andcompare.ll +++ b/llvm/test/CodeGen/AArch64/andcompare.ll @@ -2451,7 +2451,7 @@ ; ; GISEL-LABEL: cmp_to_ands3: ; GISEL: // %bb.0: -; GISEL-NEXT: mov w8, #23 +; GISEL-NEXT: mov w8, #23 // =0x17 ; GISEL-NEXT: and w8, w0, w8 ; GISEL-NEXT: cmp w8, #7 ; GISEL-NEXT: csel w0, w1, wzr, hi @@ -2466,8 +2466,8 @@ ; SDISEL-LABEL: cmp_to_ands4: ; SDISEL: // %bb.0: ; SDISEL-NEXT: and w8, w0, #0x30 -; SDISEL-NEXT: tst w0, #0x20 -; SDISEL-NEXT: csel w0, w8, w1, eq +; SDISEL-NEXT: cmp w8, #31 +; SDISEL-NEXT: csel w0, w8, w1, lo ; SDISEL-NEXT: ret ; ; GISEL-LABEL: cmp_to_ands4: diff --git a/llvm/test/CodeGen/AArch64/cmp-const-max.ll b/llvm/test/CodeGen/AArch64/cmp-const-max.ll --- a/llvm/test/CodeGen/AArch64/cmp-const-max.ll +++ b/llvm/test/CodeGen/AArch64/cmp-const-max.ll @@ -1,11 +1,18 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -verify-machineinstrs -aarch64-enable-atomic-cfg-tidy=0 < %s -mtriple=aarch64-none-eabihf -fast-isel=false | FileCheck %s define i32 @ule_64_max(i64 %p) { -entry: ; CHECK-LABEL: ule_64_max: -; CHECK: cmn x0, #1 -; CHECK: b.hi [[RET_ZERO:.LBB[0-9]+_[0-9]+]] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cbnz wzr, .LBB0_2 +; CHECK-NEXT: // %bb.1: // %ret_one +; CHECK-NEXT: mov w0, #1 // =0x1 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_2: // %ret_zero +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +entry: %cmp = icmp ule i64 %p, 18446744073709551615 ; 0xffffffffffffffff br i1 %cmp, label %ret_one, label %ret_zero @@ -13,16 +20,21 @@ ret i32 1 ret_zero: -; CHECK: [[RET_ZERO]]: -; CHECK-NEXT: mov w0, wzr ret i32 0 } define i32 @ugt_64_max(i64 %p) { -entry: ; CHECK-LABEL: ugt_64_max: -; CHECK: cmn x0, #1 -; CHECK: b.ls [[RET_ZERO:.LBB[0-9]+_[0-9]+]] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: cbnz w8, .LBB1_2 +; CHECK-NEXT: // %bb.1: // %ret_one +; CHECK-NEXT: mov w0, #1 // =0x1 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB1_2: // %ret_zero +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +entry: %cmp = icmp ugt i64 %p, 18446744073709551615 ; 0xffffffffffffffff br i1 %cmp, label %ret_one, label %ret_zero @@ -30,7 +42,5 @@ ret i32 1 ret_zero: -; CHECK: [[RET_ZERO]]: -; CHECK-NEXT: mov w0, wzr ret i32 0 } diff --git a/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll b/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll --- a/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll +++ b/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll @@ -289,7 +289,7 @@ define i1 @scalar_i32_x_is_const_eq(i32 %y) nounwind { ; CHECK-LABEL: scalar_i32_x_is_const_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #43605 +; CHECK-NEXT: mov w8, #43605 // =0xaa55 ; CHECK-NEXT: movk w8, #43605, lsl #16 ; CHECK-NEXT: lsl w8, w8, w0 ; CHECK-NEXT: tst w8, #0x1 @@ -303,8 +303,8 @@ define i1 @scalar_i32_x_is_const2_eq(i32 %y) nounwind { ; CHECK-LABEL: scalar_i32_x_is_const2_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 -; CHECK-NEXT: mov w9, #43605 +; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: mov w9, #43605 // =0xaa55 ; CHECK-NEXT: lsl w8, w8, w0 ; CHECK-NEXT: movk w9, #43605, lsl #16 ; CHECK-NEXT: tst w8, w9 @@ -319,7 +319,7 @@ define i1 @scalar_i8_bitsinmiddle_slt(i8 %x, i8 %y) nounwind { ; CHECK-LABEL: scalar_i8_bitsinmiddle_slt: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #24 +; CHECK-NEXT: mov w8, #24 // =0x18 ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 ; CHECK-NEXT: lsl w8, w8, w1 ; CHECK-NEXT: and w8, w8, w0 @@ -334,13 +334,7 @@ define i1 @scalar_i8_signbit_eq_with_nonzero(i8 %x, i8 %y) nounwind { ; CHECK-LABEL: scalar_i8_signbit_eq_with_nonzero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-128 -; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: lsl w8, w8, w1 -; CHECK-NEXT: and w8, w8, w0 -; CHECK-NEXT: and w8, w8, #0x80 -; CHECK-NEXT: cmp w8, #1 -; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ret %t0 = shl i8 128, %y %t1 = and i8 %t0, %x diff --git a/llvm/test/CodeGen/AArch64/pr59902.ll b/llvm/test/CodeGen/AArch64/pr59902.ll --- a/llvm/test/CodeGen/AArch64/pr59902.ll +++ b/llvm/test/CodeGen/AArch64/pr59902.ll @@ -6,13 +6,7 @@ define i1 @test() { ; CHECK-LABEL: test: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #9007199254740990 -; CHECK-NEXT: movk x8, #65503, lsl #16 -; CHECK-NEXT: movk x8, #65407, lsl #32 -; CHECK-NEXT: cmp x8, x8 -; CHECK-NEXT: csel x9, x8, x8, gt -; CHECK-NEXT: cmp x9, x8 -; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: mov w0, #1 // =0x1 ; CHECK-NEXT: ret %1 = select i1 false, i64 0, i64 9006649496829950 %2 = call i64 @llvm.smax.i64(i64 %1, i64 9006649496829950) diff --git a/llvm/test/CodeGen/AArch64/urem-seteq-vec-tautological.ll b/llvm/test/CodeGen/AArch64/urem-seteq-vec-tautological.ll --- a/llvm/test/CodeGen/AArch64/urem-seteq-vec-tautological.ll +++ b/llvm/test/CodeGen/AArch64/urem-seteq-vec-tautological.ll @@ -20,7 +20,7 @@ define <4 x i1> @t1_all_odd_eq(<4 x i32> %X) nounwind { ; CHECK-LABEL: t1_all_odd_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #43691 +; CHECK-NEXT: mov w8, #43691 // =0xaaab ; CHECK-NEXT: movk w8, #43690, lsl #16 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: adrp x8, .LCPI1_0 @@ -39,7 +39,7 @@ define <4 x i1> @t1_all_odd_ne(<4 x i32> %X) nounwind { ; CHECK-LABEL: t1_all_odd_ne: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #43691 +; CHECK-NEXT: mov w8, #43691 // =0xaaab ; CHECK-NEXT: movk w8, #43690, lsl #16 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: adrp x8, .LCPI2_0 @@ -58,7 +58,7 @@ define <8 x i1> @t2_narrow(<8 x i16> %X) nounwind { ; CHECK-LABEL: t2_narrow: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #43691 +; CHECK-NEXT: mov w8, #43691 // =0xaaab ; CHECK-NEXT: dup v1.8h, w8 ; CHECK-NEXT: adrp x8, .LCPI3_0 ; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h @@ -76,7 +76,7 @@ define <2 x i1> @t3_wide(<2 x i64> %X) nounwind { ; CHECK-LABEL: t3_wide: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-6148914691236517206 +; CHECK-NEXT: mov x8, #-6148914691236517206 // =0xaaaaaaaaaaaaaaaa ; CHECK-NEXT: fmov x9, d0 ; CHECK-NEXT: movk x8, #43691 ; CHECK-NEXT: mov x10, v0.d[1] diff --git a/llvm/test/CodeGen/ARM/bfi.ll b/llvm/test/CodeGen/ARM/bfi.ll --- a/llvm/test/CodeGen/ARM/bfi.ll +++ b/llvm/test/CodeGen/ARM/bfi.ll @@ -204,10 +204,9 @@ define i32 @f13(i32 %x, i32 %y) { ; CHECK-LABEL: f13: ; CHECK: @ %bb.0: -; CHECK-NEXT: and r2, r0, #4 -; CHECK-NEXT: bic r0, r1, #255 -; CHECK-NEXT: cmp r2, #42 -; CHECK-NEXT: orrne r0, r0, #16 +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: mov r1, #16 +; CHECK-NEXT: bfi r0, r1, #0, #8 ; CHECK-NEXT: bx lr %y2 = and i32 %y, 4294967040 ; 0xFFFFFF00 %and = and i32 %x, 4 diff --git a/llvm/test/CodeGen/ARM/cmp-peephole.ll b/llvm/test/CodeGen/ARM/cmp-peephole.ll --- a/llvm/test/CodeGen/ARM/cmp-peephole.ll +++ b/llvm/test/CodeGen/ARM/cmp-peephole.ll @@ -137,23 +137,17 @@ define i1 @cmp_ne_zero_or_ri(i32 %a) { ; ARM-LABEL: cmp_ne_zero_or_ri: ; ARM: @ %bb.0: -; ARM-NEXT: orrs r0, r0, #42 -; ARM-NEXT: movwne r0, #1 +; ARM-NEXT: mov r0, #1 ; ARM-NEXT: bx lr ; ; THUMB-LABEL: cmp_ne_zero_or_ri: ; THUMB: @ %bb.0: -; THUMB-NEXT: movs r1, #42 -; THUMB-NEXT: orrs r0, r1 -; THUMB-NEXT: subs r1, r0, #1 -; THUMB-NEXT: sbcs r0, r1 +; THUMB-NEXT: movs r0, #1 ; THUMB-NEXT: bx lr ; ; THUMB2-LABEL: cmp_ne_zero_or_ri: ; THUMB2: @ %bb.0: -; THUMB2-NEXT: orrs r0, r0, #42 -; THUMB2-NEXT: it ne -; THUMB2-NEXT: movne r0, #1 +; THUMB2-NEXT: movs r0, #1 ; THUMB2-NEXT: bx lr %or = or i32 %a, 42 %res = icmp ne i32 %or, 0 @@ -726,10 +720,7 @@ ; ; THUMB-LABEL: cmp_eq_zero_or_ri: ; THUMB: @ %bb.0: -; THUMB-NEXT: movs r1, #42 -; THUMB-NEXT: orrs r0, r1 -; THUMB-NEXT: rsbs r1, r0, #0 -; THUMB-NEXT: adcs r0, r1 +; THUMB-NEXT: movs r0, #0 ; THUMB-NEXT: bx lr ; ; THUMB2-LABEL: cmp_eq_zero_or_ri: @@ -1265,9 +1256,9 @@ define void @br_on_binop_lt_zero(i32 %a, i32 %b) { ; ARM-LABEL: br_on_binop_lt_zero: ; ARM: @ %bb.0: -; ARM-NEXT: orr r1, r0, r1 +; ARM-NEXT: mov r1, #1 ; ARM-NEXT: cmp r1, #0 -; ARM-NEXT: bxhs lr +; ARM-NEXT: bxne lr ; ARM-NEXT: .LBB46_1: @ %true_br ; ARM-NEXT: push {r11, lr} ; ARM-NEXT: bl consume @@ -1277,9 +1268,9 @@ ; THUMB-LABEL: br_on_binop_lt_zero: ; THUMB: @ %bb.0: ; THUMB-NEXT: push {r7, lr} -; THUMB-NEXT: orrs r1, r0 +; THUMB-NEXT: movs r1, #1 ; THUMB-NEXT: cmp r1, #0 -; THUMB-NEXT: bhs .LBB46_2 +; THUMB-NEXT: bne .LBB46_2 ; THUMB-NEXT: @ %bb.1: @ %true_br ; THUMB-NEXT: bl consume ; THUMB-NEXT: .LBB46_2: @ %exit @@ -1287,10 +1278,10 @@ ; ; THUMB2-LABEL: br_on_binop_lt_zero: ; THUMB2: @ %bb.0: -; THUMB2-NEXT: orrs r1, r0 +; THUMB2-NEXT: movs r1, #1 ; THUMB2-NEXT: cmp r1, #0 -; THUMB2-NEXT: it hs -; THUMB2-NEXT: bxhs lr +; THUMB2-NEXT: it ne +; THUMB2-NEXT: bxne lr ; THUMB2-NEXT: .LBB46_1: @ %true_br ; THUMB2-NEXT: push {r7, lr} ; THUMB2-NEXT: bl consume @@ -1570,9 +1561,9 @@ define void @br_on_shift_lt_zero(i32 %a, i32 %b) { ; ARM-LABEL: br_on_shift_lt_zero: ; ARM: @ %bb.0: -; ARM-NEXT: asr r1, r0, r1 +; ARM-NEXT: mov r1, #1 ; ARM-NEXT: cmp r1, #0 -; ARM-NEXT: bxhs lr +; ARM-NEXT: bxne lr ; ARM-NEXT: .LBB53_1: @ %true_br ; ARM-NEXT: push {r11, lr} ; ARM-NEXT: bl consume @@ -1582,10 +1573,9 @@ ; THUMB-LABEL: br_on_shift_lt_zero: ; THUMB: @ %bb.0: ; THUMB-NEXT: push {r7, lr} -; THUMB-NEXT: mov r2, r0 -; THUMB-NEXT: asrs r2, r1 -; THUMB-NEXT: cmp r2, #0 -; THUMB-NEXT: bhs .LBB53_2 +; THUMB-NEXT: movs r1, #1 +; THUMB-NEXT: cmp r1, #0 +; THUMB-NEXT: bne .LBB53_2 ; THUMB-NEXT: @ %bb.1: @ %true_br ; THUMB-NEXT: bl consume ; THUMB-NEXT: .LBB53_2: @ %exit @@ -1593,10 +1583,10 @@ ; ; THUMB2-LABEL: br_on_shift_lt_zero: ; THUMB2: @ %bb.0: -; THUMB2-NEXT: asr.w r1, r0, r1 +; THUMB2-NEXT: movs r1, #1 ; THUMB2-NEXT: cmp r1, #0 -; THUMB2-NEXT: it hs -; THUMB2-NEXT: bxhs lr +; THUMB2-NEXT: it ne +; THUMB2-NEXT: bxne lr ; THUMB2-NEXT: .LBB53_1: @ %true_br ; THUMB2-NEXT: push {r7, lr} ; THUMB2-NEXT: bl consume diff --git a/llvm/test/CodeGen/ARM/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll b/llvm/test/CodeGen/ARM/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll --- a/llvm/test/CodeGen/ARM/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll +++ b/llvm/test/CodeGen/ARM/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll @@ -966,48 +966,15 @@ ;------------------------------------------------------------------------------; define i1 @negative_scalar_i8_bitsinmiddle_slt(i8 %x, i8 %y) nounwind { -; ARM6-LABEL: negative_scalar_i8_bitsinmiddle_slt: -; ARM6: @ %bb.0: -; ARM6-NEXT: uxtb r1, r1 -; ARM6-NEXT: mov r2, #24 -; ARM6-NEXT: ands r0, r0, r2, lsr r1 -; ARM6-NEXT: mov r0, #0 -; ARM6-NEXT: movmi r0, #1 -; ARM6-NEXT: bx lr -; -; ARM78-LABEL: negative_scalar_i8_bitsinmiddle_slt: -; ARM78: @ %bb.0: -; ARM78-NEXT: uxtb r1, r1 -; ARM78-NEXT: mov r2, #24 -; ARM78-NEXT: ands r0, r0, r2, lsr r1 -; ARM78-NEXT: mov r0, #0 -; ARM78-NEXT: movwmi r0, #1 -; ARM78-NEXT: bx lr -; -; THUMB6-LABEL: negative_scalar_i8_bitsinmiddle_slt: -; THUMB6: @ %bb.0: -; THUMB6-NEXT: uxtb r1, r1 -; THUMB6-NEXT: movs r2, #24 -; THUMB6-NEXT: lsrs r2, r1 -; THUMB6-NEXT: ands r2, r0 -; THUMB6-NEXT: bmi .LBB20_2 -; THUMB6-NEXT: @ %bb.1: -; THUMB6-NEXT: movs r0, #0 -; THUMB6-NEXT: bx lr -; THUMB6-NEXT: .LBB20_2: -; THUMB6-NEXT: movs r0, #1 -; THUMB6-NEXT: bx lr +; ARM-LABEL: negative_scalar_i8_bitsinmiddle_slt: +; ARM: @ %bb.0: +; ARM-NEXT: mov r0, #0 +; ARM-NEXT: bx lr ; -; THUMB78-LABEL: negative_scalar_i8_bitsinmiddle_slt: -; THUMB78: @ %bb.0: -; THUMB78-NEXT: uxtb r1, r1 -; THUMB78-NEXT: movs r2, #24 -; THUMB78-NEXT: lsr.w r1, r2, r1 -; THUMB78-NEXT: ands r0, r1 -; THUMB78-NEXT: mov.w r0, #0 -; THUMB78-NEXT: it mi -; THUMB78-NEXT: movmi r0, #1 -; THUMB78-NEXT: bx lr +; THUMB-LABEL: negative_scalar_i8_bitsinmiddle_slt: +; THUMB: @ %bb.0: +; THUMB-NEXT: movs r0, #0 +; THUMB-NEXT: bx lr %t0 = lshr i8 24, %y %t1 = and i8 %t0, %x %res = icmp slt i8 %t1, 0 diff --git a/llvm/test/CodeGen/ARM/hoist-and-by-const-from-shl-in-eqcmp-zero.ll b/llvm/test/CodeGen/ARM/hoist-and-by-const-from-shl-in-eqcmp-zero.ll --- a/llvm/test/CodeGen/ARM/hoist-and-by-const-from-shl-in-eqcmp-zero.ll +++ b/llvm/test/CodeGen/ARM/hoist-and-by-const-from-shl-in-eqcmp-zero.ll @@ -1067,23 +1067,10 @@ ; ARM-NEXT: mov r0, #0 ; ARM-NEXT: bx lr ; -; THUMB6-LABEL: scalar_i8_signbit_eq_with_nonzero: -; THUMB6: @ %bb.0: -; THUMB6-NEXT: uxtb r1, r1 -; THUMB6-NEXT: movs r2, #127 -; THUMB6-NEXT: mvns r2, r2 -; THUMB6-NEXT: lsls r2, r1 -; THUMB6-NEXT: ands r2, r0 -; THUMB6-NEXT: uxtb r0, r2 -; THUMB6-NEXT: subs r1, r0, #1 -; THUMB6-NEXT: rsbs r0, r1, #0 -; THUMB6-NEXT: adcs r0, r1 -; THUMB6-NEXT: bx lr -; -; THUMB78-LABEL: scalar_i8_signbit_eq_with_nonzero: -; THUMB78: @ %bb.0: -; THUMB78-NEXT: movs r0, #0 -; THUMB78-NEXT: bx lr +; THUMB-LABEL: scalar_i8_signbit_eq_with_nonzero: +; THUMB: @ %bb.0: +; THUMB-NEXT: movs r0, #0 +; THUMB-NEXT: bx lr %t0 = shl i8 128, %y %t1 = and i8 %t0, %x %res = icmp eq i8 %t1, 1 ; should be comparing with 0 diff --git a/llvm/test/CodeGen/ARM/sub-cmp-peephole.ll b/llvm/test/CodeGen/ARM/sub-cmp-peephole.ll --- a/llvm/test/CodeGen/ARM/sub-cmp-peephole.ll +++ b/llvm/test/CodeGen/ARM/sub-cmp-peephole.ll @@ -1,13 +1,16 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s ; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s --check-prefix=V7 ; RUN: llc < %s -mtriple=armv8-none-linux-gnueabi | FileCheck %s -check-prefix=V8 define i32 @f(i32 %a, i32 %b) nounwind ssp { +; V8-LABEL: f: +; V8: @ %bb.0: @ %entry +; V8-NEXT: subs r0, r0, r1 +; V8-NEXT: movle r0, #0 +; V8-NEXT: bx lr entry: -; CHECK-LABEL: f: -; CHECK: subs -; CHECK-NOT: cmp %cmp = icmp sgt i32 %a, %b %sub = sub nsw i32 %a, %b %sub. = select i1 %cmp, i32 %sub, i32 0 @@ -15,10 +18,12 @@ } define i32 @g(i32 %a, i32 %b) nounwind ssp { +; V8-LABEL: g: +; V8: @ %bb.0: @ %entry +; V8-NEXT: subs r0, r1, r0 +; V8-NEXT: movle r0, #0 +; V8-NEXT: bx lr entry: -; CHECK-LABEL: g: -; CHECK: subs -; CHECK-NOT: cmp %cmp = icmp slt i32 %a, %b %sub = sub nsw i32 %b, %a %sub. = select i1 %cmp, i32 %sub, i32 0 @@ -26,10 +31,12 @@ } define i32 @h(i32 %a, i32 %b) nounwind ssp { +; V8-LABEL: h: +; V8: @ %bb.0: @ %entry +; V8-NEXT: subs r0, r0, #3 +; V8-NEXT: movle r0, r1 +; V8-NEXT: bx lr entry: -; CHECK-LABEL: h: -; CHECK: subs -; CHECK-NOT: cmp %cmp = icmp sgt i32 %a, 3 %sub = sub nsw i32 %a, 3 %sub. = select i1 %cmp, i32 %sub, i32 %b @@ -38,10 +45,12 @@ ; rdar://11725965 define i32 @i(i32 %a, i32 %b) nounwind readnone ssp { +; V8-LABEL: i: +; V8: @ %bb.0: @ %entry +; V8-NEXT: subs r0, r1, r0 +; V8-NEXT: movls r0, #0 +; V8-NEXT: bx lr entry: -; CHECK-LABEL: i: -; CHECK: subs -; CHECK-NOT: cmp %cmp = icmp ult i32 %a, %b %sub = sub i32 %b, %a %sub. = select i1 %cmp, i32 %sub, i32 0 @@ -50,10 +59,13 @@ ; If CPSR is live-out, we can't remove cmp if there exists ; a swapped sub. define i32 @j(i32 %a, i32 %b) nounwind { +; V8-LABEL: j: +; V8: @ %bb.0: @ %entry +; V8-NEXT: subs r1, r0, r1 +; V8-NEXT: movlt r0, r1 +; V8-NEXT: movne r0, r1 +; V8-NEXT: bx lr entry: -; CHECK-LABEL: j: -; CHECK: sub -; CHECK: cmp %cmp = icmp eq i32 %b, %a %sub = sub nsw i32 %a, %b br i1 %cmp, label %if.then, label %if.else @@ -74,6 +86,16 @@ ; CHECK: rsbeq ; CHECK: cmp define i32 @bc_raise(i1 %cond) nounwind ssp { +; V8-LABEL: bc_raise: +; V8: @ %bb.0: @ %entry +; V8-NEXT: mov r1, #1 +; V8-NEXT: tst r0, #1 +; V8-NEXT: bic r1, r1, r0 +; V8-NEXT: mov r0, #23 +; V8-NEXT: rsbeq r1, r1, #0 +; V8-NEXT: cmp r1, #0 +; V8-NEXT: movweq r0, #17 +; V8-NEXT: bx lr entry: %val.2.i = select i1 %cond, i32 0, i32 1 %sub.i = sub nsw i32 0, %val.2.i @@ -91,10 +113,14 @@ ; When considering the producer of cmp's src as the subsuming instruction, ; only consider that when the comparison is to 0. define i32 @cmp_src_nonzero(i32 %a, i32 %b, i32 %x, i32 %y) { +; V8-LABEL: cmp_src_nonzero: +; V8: @ %bb.0: @ %entry +; V8-NEXT: sub r0, r0, r1 +; V8-NEXT: cmp r0, #17 +; V8-NEXT: movne r2, r3 +; V8-NEXT: mov r0, r2 +; V8-NEXT: bx lr entry: -; CHECK-LABEL: cmp_src_nonzero: -; CHECK: sub -; CHECK: cmp %sub = sub i32 %a, %b %cmp = icmp eq i32 %sub, 17 %ret = select i1 %cmp, i32 %x, i32 %y @@ -102,12 +128,15 @@ } define float @float_sel(i32 %a, i32 %b, float %x, float %y) { -entry: -; CHECK-LABEL: float_sel: -; CHECK-NOT: cmp ; V8-LABEL: float_sel: -; V8-NOT: cmp -; V8: vseleq.f32 +; V8: @ %bb.0: @ %entry +; V8-NEXT: vmov s0, r3 +; V8-NEXT: subs r0, r0, r1 +; V8-NEXT: vmov s2, r2 +; V8-NEXT: vseleq.f32 s0, s2, s0 +; V8-NEXT: vmov r0, s0 +; V8-NEXT: bx lr +entry: %sub = sub i32 %a, %b %cmp = icmp eq i32 %sub, 0 %ret = select i1 %cmp, float %x, float %y @@ -115,12 +144,15 @@ } define double @double_sel(i32 %a, i32 %b, double %x, double %y) { -entry: -; CHECK-LABEL: double_sel: -; CHECK-NOT: cmp ; V8-LABEL: double_sel: -; V8-NOT: cmp -; V8: vseleq.f64 +; V8: @ %bb.0: @ %entry +; V8-NEXT: vldr d16, [sp] +; V8-NEXT: vmov d17, r2, r3 +; V8-NEXT: subs r0, r0, r1 +; V8-NEXT: vseleq.f64 d16, d17, d16 +; V8-NEXT: vmov r0, r1, d16 +; V8-NEXT: bx lr +entry: %sub = sub i32 %a, %b %cmp = icmp eq i32 %sub, 0 %ret = select i1 %cmp, double %x, double %y @@ -129,12 +161,21 @@ @t = common global i32 0 define double @double_sub(i32 %a, i32 %b, double %x, double %y) { -entry: -; CHECK-LABEL: double_sub: -; CHECK: subs -; CHECK-NOT: cmp ; V8-LABEL: double_sub: -; V8: vsel +; V8: @ %bb.0: @ %entry +; V8-NEXT: vldr d16, [sp] +; V8-NEXT: cmp r0, r1 +; V8-NEXT: vmov d17, r2, r3 +; V8-NEXT: sub r0, r0, r1 +; V8-NEXT: vselgt.f64 d16, d17, d16 +; V8-NEXT: movw r1, :lower16:t +; V8-NEXT: vmov r2, r3, d16 +; V8-NEXT: movt r1, :upper16:t +; V8-NEXT: str r0, [r1] +; V8-NEXT: mov r0, r2 +; V8-NEXT: mov r1, r3 +; V8-NEXT: bx lr +entry: %cmp = icmp sgt i32 %a, %b %sub = sub i32 %a, %b store i32 %sub, ptr @t @@ -143,14 +184,21 @@ } define double @double_sub_swap(i32 %a, i32 %b, double %x, double %y) { -entry: -; V7-LABEL: double_sub_swap: -; V7-NOT: cmp -; V7: subs ; V8-LABEL: double_sub_swap: -; V8-NOT: subs -; V8: cmp -; V8: vsel +; V8: @ %bb.0: @ %entry +; V8-NEXT: vldr d16, [sp] +; V8-NEXT: cmp r1, r0 +; V8-NEXT: vmov d17, r2, r3 +; V8-NEXT: sub r0, r1, r0 +; V8-NEXT: vselge.f64 d16, d16, d17 +; V8-NEXT: movw r1, :lower16:t +; V8-NEXT: vmov r2, r3, d16 +; V8-NEXT: movt r1, :upper16:t +; V8-NEXT: str r0, [r1] +; V8-NEXT: mov r0, r2 +; V8-NEXT: mov r1, r3 +; V8-NEXT: bx lr +entry: %cmp = icmp sgt i32 %a, %b %sub = sub i32 %b, %a %ret = select i1 %cmp, double %x, double %y @@ -164,11 +212,22 @@ ; If the comparison uses the V bit (signed overflow/underflow), we can't ; omit the comparison. define i32 @cmp_slt0(i32 %a, i32 %b, i32 %x, i32 %y) { +; V8-LABEL: cmp_slt0: +; V8: @ %bb.0: @ %entry +; V8-NEXT: .save {r11, lr} +; V8-NEXT: push {r11, lr} +; V8-NEXT: movw r0, :lower16:t +; V8-NEXT: movt r0, :upper16:t +; V8-NEXT: ldr r0, [r0] +; V8-NEXT: sub r0, r0, #17 +; V8-NEXT: cmn r0, #1 +; V8-NEXT: ble .LBB11_2 +; V8-NEXT: @ %bb.1: @ %if.else +; V8-NEXT: mov r0, #0 +; V8-NEXT: bl exit +; V8-NEXT: .LBB11_2: @ %if.then +; V8-NEXT: bl abort entry: -; CHECK-LABEL: cmp_slt0 -; CHECK: sub -; CHECK: cmn -; CHECK: ble %load = load i32, ptr @t, align 4 %sub = sub i32 %load, 17 %cmp = icmp slt i32 %sub, 0 @@ -186,11 +245,19 @@ ; Same for the C bit. (Note the ult X, 0 is trivially ; false, so the DAG combiner may or may not optimize it). define i32 @cmp_ult0(i32 %a, i32 %b, i32 %x, i32 %y) { +; V8-LABEL: cmp_ult0: +; V8: @ %bb.0: @ %entry +; V8-NEXT: .save {r11, lr} +; V8-NEXT: push {r11, lr} +; V8-NEXT: mov r0, #1 +; V8-NEXT: cmp r0, #0 +; V8-NEXT: bne .LBB12_2 +; V8-NEXT: @ %bb.1: @ %if.then +; V8-NEXT: bl abort +; V8-NEXT: .LBB12_2: @ %if.else +; V8-NEXT: mov r0, #0 +; V8-NEXT: bl exit entry: -; CHECK-LABEL: cmp_ult0 -; CHECK: sub -; CHECK: cmp -; CHECK: bhs %load = load i32, ptr @t, align 4 %sub = sub i32 %load, 17 %cmp = icmp ult i32 %sub, 0 @@ -204,3 +271,6 @@ call void @exit(i32 0) unreachable } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} +; V7: {{.*}} diff --git a/llvm/test/CodeGen/Hexagon/vect/zext-v4i1.ll b/llvm/test/CodeGen/Hexagon/vect/zext-v4i1.ll --- a/llvm/test/CodeGen/Hexagon/vect/zext-v4i1.ll +++ b/llvm/test/CodeGen/Hexagon/vect/zext-v4i1.ll @@ -10,35 +10,10 @@ ; CHECK-LABEL: fred: ; CHECK: // %bb.0: // %b0 ; CHECK-NEXT: { -; CHECK-NEXT: if (p0) jump:nt .LBB0_2 -; CHECK-NEXT: } -; CHECK-NEXT: // %bb.1: // %b2 -; CHECK-NEXT: { -; CHECK-NEXT: r3:2 = combine(#0,#0) -; CHECK-NEXT: r1:0 = memd(r0+#0) -; CHECK-NEXT: } -; CHECK-NEXT: { -; CHECK-NEXT: p0 = vcmph.eq(r1:0,r3:2) -; CHECK-NEXT: } -; CHECK-NEXT: { -; CHECK-NEXT: r1:0 = mask(p0) -; CHECK-NEXT: } -; CHECK-NEXT: { -; CHECK-NEXT: r0 = and(r0,#1) -; CHECK-NEXT: } -; CHECK-NEXT: { -; CHECK-NEXT: p0 = cmp.eq(r0,#11) -; CHECK-NEXT: r0 = #1 -; CHECK-NEXT: } -; CHECK-NEXT: { +; CHECK-NEXT: if (!p0) r0 = #1 ; CHECK-NEXT: if (p0) r0 = #0 ; CHECK-NEXT: jumpr r31 ; CHECK-NEXT: } -; CHECK-NEXT: .LBB0_2: // %b14 -; CHECK-NEXT: { -; CHECK-NEXT: r0 = #0 -; CHECK-NEXT: jumpr r31 -; CHECK-NEXT: } b0: switch i32 undef, label %b14 [ i32 5, label %b2 diff --git a/llvm/test/CodeGen/Lanai/sub-cmp-peephole.ll b/llvm/test/CodeGen/Lanai/sub-cmp-peephole.ll --- a/llvm/test/CodeGen/Lanai/sub-cmp-peephole.ll +++ b/llvm/test/CodeGen/Lanai/sub-cmp-peephole.ll @@ -110,12 +110,9 @@ ; CHECK: ! %bb.0: ! %entry ; CHECK-NEXT: st %fp, [--%sp] ; CHECK-NEXT: add %sp, 0x8, %fp -; CHECK-NEXT: mov hi(t), %r3 -; CHECK-NEXT: or %r3, lo(t), %r3 -; CHECK-NEXT: ld 0[%r3], %r3 -; CHECK-NEXT: sub %r3, 0x11, %r3 +; CHECK-NEXT: mov 0x1, %r3 ; CHECK-NEXT: sub.f %r3, 0x0, %r0 -; CHECK-NEXT: buge .LBB5_2 +; CHECK-NEXT: bne .LBB5_2 ; CHECK-NEXT: sub %sp, 0x10, %sp ; CHECK-NEXT: .LBB5_1: ! %if.then ; CHECK-NEXT: add %pc, 0x10, %rca diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll --- a/llvm/test/CodeGen/RISCV/fpclamptosat.ll +++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll @@ -1239,14 +1239,9 @@ ; RV32IF-NEXT: lw a1, 20(sp) ; RV32IF-NEXT: lw a2, 12(sp) ; RV32IF-NEXT: lw a3, 8(sp) -; RV32IF-NEXT: or a4, a1, a0 -; RV32IF-NEXT: seqz a4, a4 -; RV32IF-NEXT: xori a0, a0, 1 -; RV32IF-NEXT: or a0, a0, a1 -; RV32IF-NEXT: seqz a0, a0 -; RV32IF-NEXT: addi a0, a0, -1 -; RV32IF-NEXT: and a0, a0, a4 -; RV32IF-NEXT: neg a1, a0 +; RV32IF-NEXT: or a0, a1, a0 +; RV32IF-NEXT: snez a0, a0 +; RV32IF-NEXT: addi a1, a0, -1 ; RV32IF-NEXT: and a0, a1, a3 ; RV32IF-NEXT: and a1, a1, a2 ; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -1279,14 +1274,9 @@ ; RV32IFD-NEXT: lw a1, 20(sp) ; RV32IFD-NEXT: lw a2, 12(sp) ; RV32IFD-NEXT: lw a3, 8(sp) -; RV32IFD-NEXT: or a4, a1, a0 -; RV32IFD-NEXT: seqz a4, a4 -; RV32IFD-NEXT: xori a0, a0, 1 -; RV32IFD-NEXT: or a0, a0, a1 -; RV32IFD-NEXT: seqz a0, a0 -; RV32IFD-NEXT: addi a0, a0, -1 -; RV32IFD-NEXT: and a0, a0, a4 -; RV32IFD-NEXT: neg a1, a0 +; RV32IFD-NEXT: or a0, a1, a0 +; RV32IFD-NEXT: snez a0, a0 +; RV32IFD-NEXT: addi a1, a0, -1 ; RV32IFD-NEXT: and a0, a1, a3 ; RV32IFD-NEXT: and a1, a1, a2 ; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -1321,11 +1311,6 @@ ; RV32IF-NEXT: seqz a2, a0 ; RV32IF-NEXT: .LBB20_3: # %entry ; RV32IF-NEXT: lw a3, 12(sp) -; RV32IF-NEXT: xori a4, a0, 1 -; RV32IF-NEXT: or a4, a4, a1 -; RV32IF-NEXT: seqz a4, a4 -; RV32IF-NEXT: addi a4, a4, -1 -; RV32IF-NEXT: and a2, a4, a2 ; RV32IF-NEXT: neg a4, a2 ; RV32IF-NEXT: bnez a2, .LBB20_5 ; RV32IF-NEXT: # %bb.4: # %entry @@ -1407,11 +1392,6 @@ ; RV32IFD-NEXT: seqz a2, a0 ; RV32IFD-NEXT: .LBB20_3: # %entry ; RV32IFD-NEXT: lw a3, 12(sp) -; RV32IFD-NEXT: xori a4, a0, 1 -; RV32IFD-NEXT: or a4, a4, a1 -; RV32IFD-NEXT: seqz a4, a4 -; RV32IFD-NEXT: addi a4, a4, -1 -; RV32IFD-NEXT: and a2, a4, a2 ; RV32IFD-NEXT: neg a4, a2 ; RV32IFD-NEXT: bnez a2, .LBB20_5 ; RV32IFD-NEXT: # %bb.4: # %entry @@ -1549,14 +1529,9 @@ ; RV32-NEXT: lw a1, 20(sp) ; RV32-NEXT: lw a2, 12(sp) ; RV32-NEXT: lw a3, 8(sp) -; RV32-NEXT: or a4, a1, a0 -; RV32-NEXT: seqz a4, a4 -; RV32-NEXT: xori a0, a0, 1 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: seqz a0, a0 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a4 -; RV32-NEXT: neg a1, a0 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: snez a0, a0 +; RV32-NEXT: addi a1, a0, -1 ; RV32-NEXT: and a0, a1, a3 ; RV32-NEXT: and a1, a1, a2 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -1603,11 +1578,6 @@ ; RV32-NEXT: seqz a2, a0 ; RV32-NEXT: .LBB23_3: # %entry ; RV32-NEXT: lw a3, 12(sp) -; RV32-NEXT: xori a4, a0, 1 -; RV32-NEXT: or a4, a4, a1 -; RV32-NEXT: seqz a4, a4 -; RV32-NEXT: addi a4, a4, -1 -; RV32-NEXT: and a2, a4, a2 ; RV32-NEXT: neg a4, a2 ; RV32-NEXT: bnez a2, .LBB23_5 ; RV32-NEXT: # %bb.4: # %entry @@ -1808,14 +1778,9 @@ ; RV32-NEXT: lw a1, 20(sp) ; RV32-NEXT: lw a2, 12(sp) ; RV32-NEXT: lw a3, 8(sp) -; RV32-NEXT: or a4, a1, a0 -; RV32-NEXT: seqz a4, a4 -; RV32-NEXT: xori a0, a0, 1 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: seqz a0, a0 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a4 -; RV32-NEXT: neg a1, a0 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: snez a0, a0 +; RV32-NEXT: addi a1, a0, -1 ; RV32-NEXT: and a0, a1, a3 ; RV32-NEXT: and a1, a1, a2 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -1866,11 +1831,6 @@ ; RV32-NEXT: seqz a2, a0 ; RV32-NEXT: .LBB26_3: # %entry ; RV32-NEXT: lw a3, 12(sp) -; RV32-NEXT: xori a4, a0, 1 -; RV32-NEXT: or a4, a4, a1 -; RV32-NEXT: seqz a4, a4 -; RV32-NEXT: addi a4, a4, -1 -; RV32-NEXT: and a2, a4, a2 ; RV32-NEXT: neg a4, a2 ; RV32-NEXT: bnez a2, .LBB26_5 ; RV32-NEXT: # %bb.4: # %entry diff --git a/llvm/test/CodeGen/RISCV/sextw-removal.ll b/llvm/test/CodeGen/RISCV/sextw-removal.ll --- a/llvm/test/CodeGen/RISCV/sextw-removal.ll +++ b/llvm/test/CodeGen/RISCV/sextw-removal.ll @@ -409,14 +409,17 @@ ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 0(sp) # 8-byte Folded Spill ; CHECK-NEXT: sraw a0, a0, a1 +; CHECK-NEXT: li s0, 1 ; CHECK-NEXT: .LBB7_1: # %bb2 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: call foo@plt ; CHECK-NEXT: ori a0, a0, -256 -; CHECK-NEXT: bnez a0, .LBB7_1 +; CHECK-NEXT: bnez s0, .LBB7_1 ; CHECK-NEXT: # %bb.2: # %bb7 ; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 0(sp) # 8-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret ; @@ -424,15 +427,18 @@ ; NOREMOVAL: # %bb.0: # %bb ; NOREMOVAL-NEXT: addi sp, sp, -16 ; NOREMOVAL-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; NOREMOVAL-NEXT: sd s0, 0(sp) # 8-byte Folded Spill ; NOREMOVAL-NEXT: sraw a0, a0, a1 +; NOREMOVAL-NEXT: li s0, 1 ; NOREMOVAL-NEXT: .LBB7_1: # %bb2 ; NOREMOVAL-NEXT: # =>This Inner Loop Header: Depth=1 ; NOREMOVAL-NEXT: sext.w a0, a0 ; NOREMOVAL-NEXT: call foo@plt ; NOREMOVAL-NEXT: ori a0, a0, -256 -; NOREMOVAL-NEXT: bnez a0, .LBB7_1 +; NOREMOVAL-NEXT: bnez s0, .LBB7_1 ; NOREMOVAL-NEXT: # %bb.2: # %bb7 ; NOREMOVAL-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; NOREMOVAL-NEXT: ld s0, 0(sp) # 8-byte Folded Reload ; NOREMOVAL-NEXT: addi sp, sp, 16 ; NOREMOVAL-NEXT: ret bb: diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-46.ll b/llvm/test/CodeGen/SystemZ/int-cmp-46.ll --- a/llvm/test/CodeGen/SystemZ/int-cmp-46.ll +++ b/llvm/test/CodeGen/SystemZ/int-cmp-46.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; Test the use of TEST UNDER MASK for 32-bit operations. ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s @@ -7,9 +8,13 @@ ; Check the lowest useful TMLL value. define void @f1(i32 %a) { ; CHECK-LABEL: f1: -; CHECK: tmll %r2, 1 -; CHECK: ber %r14 -; CHECK: br %r14 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: tmll %r2, 1 +; CHECK-NEXT: ber %r14 +; CHECK-NEXT: .LBB0_1: # %store +; CHECK-NEXT: lgrl %r1, g@GOT +; CHECK-NEXT: mvhi 0(%r1), 1 +; CHECK-NEXT: br %r14 entry: %and = and i32 %a, 1 %cmp = icmp eq i32 %and, 0 @@ -26,9 +31,13 @@ ; Check the high end of the TMLL range. define void @f2(i32 %a) { ; CHECK-LABEL: f2: -; CHECK: tmll %r2, 65535 -; CHECK: bner %r14 -; CHECK: br %r14 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: tmll %r2, 65535 +; CHECK-NEXT: bner %r14 +; CHECK-NEXT: .LBB1_1: # %store +; CHECK-NEXT: lgrl %r1, g@GOT +; CHECK-NEXT: mvhi 0(%r1), 1 +; CHECK-NEXT: br %r14 entry: %and = and i32 %a, 65535 %cmp = icmp ne i32 %and, 0 @@ -45,9 +54,13 @@ ; Check the lowest useful TMLH value, which is the next value up. define void @f3(i32 %a) { ; CHECK-LABEL: f3: -; CHECK: tmlh %r2, 1 -; CHECK: bner %r14 -; CHECK: br %r14 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: tmlh %r2, 1 +; CHECK-NEXT: bner %r14 +; CHECK-NEXT: .LBB2_1: # %store +; CHECK-NEXT: lgrl %r1, g@GOT +; CHECK-NEXT: mvhi 0(%r1), 1 +; CHECK-NEXT: br %r14 entry: %and = and i32 %a, 65536 %cmp = icmp ne i32 %and, 0 @@ -64,8 +77,13 @@ ; Check the next value up again, which cannot use TM. define void @f4(i32 %a) { ; CHECK-LABEL: f4: -; CHECK-NOT: {{tm[lh].}} -; CHECK: br %r14 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: nilh %r2, 65534 +; CHECK-NEXT: cibe %r2, 0, 0(%r14) +; CHECK-NEXT: .LBB3_1: # %store +; CHECK-NEXT: lgrl %r1, g@GOT +; CHECK-NEXT: mvhi 0(%r1), 1 +; CHECK-NEXT: br %r14 entry: %and = and i32 %a, 4294901759 %cmp = icmp eq i32 %and, 0 @@ -82,9 +100,13 @@ ; Check the high end of the TMLH range. define void @f5(i32 %a) { ; CHECK-LABEL: f5: -; CHECK: tmlh %r2, 65535 -; CHECK: ber %r14 -; CHECK: br %r14 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: tmlh %r2, 65535 +; CHECK-NEXT: ber %r14 +; CHECK-NEXT: .LBB4_1: # %store +; CHECK-NEXT: lgrl %r1, g@GOT +; CHECK-NEXT: mvhi 0(%r1), 1 +; CHECK-NEXT: br %r14 entry: %and = and i32 %a, 4294901760 %cmp = icmp eq i32 %and, 0 @@ -102,9 +124,13 @@ ; an equality comparison with zero. define void @f6(i32 %a) { ; CHECK-LABEL: f6: -; CHECK: tmll %r2, 240 -; CHECK: ber %r14 -; CHECK: br %r14 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: tmll %r2, 240 +; CHECK-NEXT: ber %r14 +; CHECK-NEXT: .LBB5_1: # %store +; CHECK-NEXT: lgrl %r1, g@GOT +; CHECK-NEXT: mvhi 0(%r1), 1 +; CHECK-NEXT: br %r14 entry: %and = and i32 %a, 240 %cmp = icmp slt i32 %and, 16 @@ -121,9 +147,13 @@ ; ...same again with LE. define void @f7(i32 %a) { ; CHECK-LABEL: f7: -; CHECK: tmll %r2, 240 -; CHECK: ber %r14 -; CHECK: br %r14 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: tmll %r2, 240 +; CHECK-NEXT: ber %r14 +; CHECK-NEXT: .LBB6_1: # %store +; CHECK-NEXT: lgrl %r1, g@GOT +; CHECK-NEXT: mvhi 0(%r1), 1 +; CHECK-NEXT: br %r14 entry: %and = and i32 %a, 240 %cmp = icmp sle i32 %and, 15 @@ -141,9 +171,13 @@ ; an inequality comparison with zero. define void @f8(i32 %a) { ; CHECK-LABEL: f8: -; CHECK: tmll %r2, 240 -; CHECK: bner %r14 -; CHECK: br %r14 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: tmll %r2, 240 +; CHECK-NEXT: bner %r14 +; CHECK-NEXT: .LBB7_1: # %store +; CHECK-NEXT: lgrl %r1, g@GOT +; CHECK-NEXT: mvhi 0(%r1), 1 +; CHECK-NEXT: br %r14 entry: %and = and i32 %a, 240 %cmp = icmp uge i32 %and, 16 @@ -160,9 +194,13 @@ ; ...same again with GT. define void @f9(i32 %a) { ; CHECK-LABEL: f9: -; CHECK: tmll %r2, 240 -; CHECK: bner %r14 -; CHECK: br %r14 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: tmll %r2, 240 +; CHECK-NEXT: bner %r14 +; CHECK-NEXT: .LBB8_1: # %store +; CHECK-NEXT: lgrl %r1, g@GOT +; CHECK-NEXT: mvhi 0(%r1), 1 +; CHECK-NEXT: br %r14 entry: %and = and i32 %a, 240 %cmp = icmp ugt i32 %and, 15 @@ -180,9 +218,13 @@ ; test whether the top bit is clear. define void @f10(i32 %a) { ; CHECK-LABEL: f10: -; CHECK: tmll %r2, 35 -; CHECK: bler %r14 -; CHECK: br %r14 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: tmll %r2, 35 +; CHECK-NEXT: bler %r14 +; CHECK-NEXT: .LBB9_1: # %store +; CHECK-NEXT: lgrl %r1, g@GOT +; CHECK-NEXT: mvhi 0(%r1), 1 +; CHECK-NEXT: br %r14 entry: %and = and i32 %a, 35 %cmp = icmp ult i32 %and, 8 @@ -199,9 +241,13 @@ ; ...same again with LE. define void @f11(i32 %a) { ; CHECK-LABEL: f11: -; CHECK: tmll %r2, 35 -; CHECK: bler %r14 -; CHECK: br %r14 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: tmll %r2, 35 +; CHECK-NEXT: bler %r14 +; CHECK-NEXT: .LBB10_1: # %store +; CHECK-NEXT: lgrl %r1, g@GOT +; CHECK-NEXT: mvhi 0(%r1), 1 +; CHECK-NEXT: br %r14 entry: %and = and i32 %a, 35 %cmp = icmp ule i32 %and, 31 @@ -219,9 +265,13 @@ ; whether the top bit is set. define void @f12(i32 %a) { ; CHECK-LABEL: f12: -; CHECK: tmll %r2, 140 -; CHECK: bnler %r14 -; CHECK: br %r14 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: tmll %r2, 140 +; CHECK-NEXT: bnler %r14 +; CHECK-NEXT: .LBB11_1: # %store +; CHECK-NEXT: lgrl %r1, g@GOT +; CHECK-NEXT: mvhi 0(%r1), 1 +; CHECK-NEXT: br %r14 entry: %and = and i32 %a, 140 %cmp = icmp uge i32 %and, 128 @@ -238,9 +288,13 @@ ; ...same again for GT. define void @f13(i32 %a) { ; CHECK-LABEL: f13: -; CHECK: tmll %r2, 140 -; CHECK: bnler %r14 -; CHECK: br %r14 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: tmll %r2, 140 +; CHECK-NEXT: bnler %r14 +; CHECK-NEXT: .LBB12_1: # %store +; CHECK-NEXT: lgrl %r1, g@GOT +; CHECK-NEXT: mvhi 0(%r1), 1 +; CHECK-NEXT: br %r14 entry: %and = and i32 %a, 140 %cmp = icmp ugt i32 %and, 126 @@ -257,9 +311,13 @@ ; Check that we can use TMLL for equality comparisons with the mask. define void @f14(i32 %a) { ; CHECK-LABEL: f14: -; CHECK: tmll %r2, 101 -; CHECK: bor %r14 -; CHECK: br %r14 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: tmll %r2, 101 +; CHECK-NEXT: bor %r14 +; CHECK-NEXT: .LBB13_1: # %store +; CHECK-NEXT: lgrl %r1, g@GOT +; CHECK-NEXT: mvhi 0(%r1), 1 +; CHECK-NEXT: br %r14 entry: %and = and i32 %a, 101 %cmp = icmp eq i32 %and, 101 @@ -276,9 +334,13 @@ ; Check that we can use TMLL for inequality comparisons with the mask. define void @f15(i32 %a) { ; CHECK-LABEL: f15: -; CHECK: tmll %r2, 65519 -; CHECK: bnor %r14 -; CHECK: br %r14 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: tmll %r2, 65519 +; CHECK-NEXT: bnor %r14 +; CHECK-NEXT: .LBB14_1: # %store +; CHECK-NEXT: lgrl %r1, g@GOT +; CHECK-NEXT: mvhi 0(%r1), 1 +; CHECK-NEXT: br %r14 entry: %and = and i32 %a, 65519 %cmp = icmp ne i32 %and, 65519 @@ -296,9 +358,13 @@ ; to inequality comparisons with the mask. define void @f16(i32 %a) { ; CHECK-LABEL: f16: -; CHECK: tmll %r2, 130 -; CHECK: bnor %r14 -; CHECK: br %r14 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: tmll %r2, 130 +; CHECK-NEXT: bnor %r14 +; CHECK-NEXT: .LBB15_1: # %store +; CHECK-NEXT: lgrl %r1, g@GOT +; CHECK-NEXT: mvhi 0(%r1), 1 +; CHECK-NEXT: br %r14 entry: %and = and i32 %a, 130 %cmp = icmp ult i32 %and, 129 @@ -315,9 +381,13 @@ ; ...same again with LE. define void @f17(i32 %a) { ; CHECK-LABEL: f17: -; CHECK: tmll %r2, 130 -; CHECK: bnor %r14 -; CHECK: br %r14 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: tmll %r2, 130 +; CHECK-NEXT: bnor %r14 +; CHECK-NEXT: .LBB16_1: # %store +; CHECK-NEXT: lgrl %r1, g@GOT +; CHECK-NEXT: mvhi 0(%r1), 1 +; CHECK-NEXT: br %r14 entry: %and = and i32 %a, 130 %cmp = icmp ule i32 %and, 128 @@ -335,9 +405,13 @@ ; to equality comparisons with the mask. define void @f18(i32 %a) { ; CHECK-LABEL: f18: -; CHECK: tmll %r2, 194 -; CHECK: bor %r14 -; CHECK: br %r14 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: tmll %r2, 194 +; CHECK-NEXT: bor %r14 +; CHECK-NEXT: .LBB17_1: # %store +; CHECK-NEXT: lgrl %r1, g@GOT +; CHECK-NEXT: mvhi 0(%r1), 1 +; CHECK-NEXT: br %r14 entry: %and = and i32 %a, 194 %cmp = icmp uge i32 %and, 193 @@ -354,9 +428,13 @@ ; ...same again for GT. define void @f19(i32 %a) { ; CHECK-LABEL: f19: -; CHECK: tmll %r2, 194 -; CHECK: bor %r14 -; CHECK: br %r14 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: tmll %r2, 194 +; CHECK-NEXT: bor %r14 +; CHECK-NEXT: .LBB18_1: # %store +; CHECK-NEXT: lgrl %r1, g@GOT +; CHECK-NEXT: mvhi 0(%r1), 1 +; CHECK-NEXT: br %r14 entry: %and = and i32 %a, 194 %cmp = icmp ugt i32 %and, 192 @@ -374,9 +452,13 @@ ; when the mask has two bits. define void @f20(i32 %a) { ; CHECK-LABEL: f20: -; CHECK: tmll %r2, 20 -; CHECK: blr %r14 -; CHECK: br %r14 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: tmll %r2, 20 +; CHECK-NEXT: blr %r14 +; CHECK-NEXT: .LBB19_1: # %store +; CHECK-NEXT: lgrl %r1, g@GOT +; CHECK-NEXT: mvhi 0(%r1), 1 +; CHECK-NEXT: br %r14 entry: %and = and i32 %a, 20 %cmp = icmp eq i32 %and, 4 @@ -394,9 +476,13 @@ ; when the mask has two bits. define void @f21(i32 %a) { ; CHECK-LABEL: f21: -; CHECK: tmll %r2, 20 -; CHECK: bnlr %r14 -; CHECK: br %r14 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: tmll %r2, 20 +; CHECK-NEXT: bnlr %r14 +; CHECK-NEXT: .LBB20_1: # %store +; CHECK-NEXT: lgrl %r1, g@GOT +; CHECK-NEXT: mvhi 0(%r1), 1 +; CHECK-NEXT: br %r14 entry: %and = and i32 %a, 20 %cmp = icmp ne i32 %and, 4 @@ -414,9 +500,13 @@ ; when the mask has two bits. define void @f22(i32 %a) { ; CHECK-LABEL: f22: -; CHECK: tmll %r2, 20 -; CHECK: bhr %r14 -; CHECK: br %r14 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: tmll %r2, 20 +; CHECK-NEXT: bhr %r14 +; CHECK-NEXT: .LBB21_1: # %store +; CHECK-NEXT: lgrl %r1, g@GOT +; CHECK-NEXT: mvhi 0(%r1), 1 +; CHECK-NEXT: br %r14 entry: %and = and i32 %a, 20 %cmp = icmp eq i32 %and, 16 @@ -434,9 +524,13 @@ ; when the mask has two bits. define void @f23(i32 %a) { ; CHECK-LABEL: f23: -; CHECK: tmll %r2, 20 -; CHECK: bnhr %r14 -; CHECK: br %r14 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: tmll %r2, 20 +; CHECK-NEXT: bnhr %r14 +; CHECK-NEXT: .LBB22_1: # %store +; CHECK-NEXT: lgrl %r1, g@GOT +; CHECK-NEXT: mvhi 0(%r1), 1 +; CHECK-NEXT: br %r14 entry: %and = and i32 %a, 20 %cmp = icmp ne i32 %and, 16 @@ -453,9 +547,13 @@ ; Check that we can fold an SHL into a TMxx mask. define void @f24(i32 %a) { ; CHECK-LABEL: f24: -; CHECK: tmll %r2, 255 -; CHECK: bner %r14 -; CHECK: br %r14 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: tmll %r2, 255 +; CHECK-NEXT: bner %r14 +; CHECK-NEXT: .LBB23_1: # %store +; CHECK-NEXT: lgrl %r1, g@GOT +; CHECK-NEXT: mvhi 0(%r1), 1 +; CHECK-NEXT: br %r14 entry: %shl = shl i32 %a, 12 %and = and i32 %shl, 1044480 @@ -473,9 +571,13 @@ ; Check that we can fold an SHR into a TMxx mask. define void @f25(i32 %a) { ; CHECK-LABEL: f25: -; CHECK: tmlh %r2, 512 -; CHECK: bner %r14 -; CHECK: br %r14 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: tmlh %r2, 512 +; CHECK-NEXT: bner %r14 +; CHECK-NEXT: .LBB24_1: # %store +; CHECK-NEXT: lgrl %r1, g@GOT +; CHECK-NEXT: mvhi 0(%r1), 1 +; CHECK-NEXT: br %r14 entry: %shr = lshr i32 %a, 25 %and = and i32 %shr, 1 diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-47.ll b/llvm/test/CodeGen/SystemZ/int-cmp-47.ll --- a/llvm/test/CodeGen/SystemZ/int-cmp-47.ll +++ b/llvm/test/CodeGen/SystemZ/int-cmp-47.ll @@ -424,8 +424,8 @@ define void @f19(i64 %a) { ; CHECK-LABEL: f19: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: srlg %r0, %r2, 63 -; CHECK-NEXT: cgibl %r0, 3, 0(%r14) +; CHECK-NEXT: lhi %r0, 1 +; CHECK-NEXT: ciblh %r0, 0, 0(%r14) ; CHECK-NEXT: .LBB18_1: # %store ; CHECK-NEXT: lgrl %r1, g@GOT ; CHECK-NEXT: mvhi 0(%r1), 1 diff --git a/llvm/test/CodeGen/Thumb/cmp-and-fold.ll b/llvm/test/CodeGen/Thumb/cmp-and-fold.ll --- a/llvm/test/CodeGen/Thumb/cmp-and-fold.ll +++ b/llvm/test/CodeGen/Thumb/cmp-and-fold.ll @@ -129,10 +129,9 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: movs r2, #32 -; CHECK-NEXT: ands r2, r0 -; CHECK-NEXT: cmp r2, #17 -; CHECK-NEXT: beq .LBB5_2 +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: bne .LBB5_2 ; CHECK-NEXT: @ %bb.1: @ %if.then ; CHECK-NEXT: blx r1 ; CHECK-NEXT: .LBB5_2: @ %if.end diff --git a/llvm/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll b/llvm/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll --- a/llvm/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll +++ b/llvm/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll @@ -4,25 +4,18 @@ define signext i16 @f(ptr %bp, ptr %ss) { ; CHECK-LABEL: f: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushl %esi -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: .cfi_offset %esi, -8 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movb $1, %cl ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_1: # %cond_next127 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: movl (%eax), %edx -; CHECK-NEXT: movl (%ecx), %esi ; CHECK-NEXT: andl $15, %edx -; CHECK-NEXT: andl $15, %esi -; CHECK-NEXT: addl %esi, (%ecx) -; CHECK-NEXT: cmpl $63, %edx -; CHECK-NEXT: jb .LBB0_1 +; CHECK-NEXT: addl %edx, (%eax) +; CHECK-NEXT: testb %cl, %cl +; CHECK-NEXT: jne .LBB0_1 ; CHECK-NEXT: # %bb.2: # %UnifiedReturnBlock ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: popl %esi -; CHECK-NEXT: .cfi_def_cfa_offset 4 ; CHECK-NEXT: retl entry: br label %cond_next127 diff --git a/llvm/test/CodeGen/X86/2012-08-17-legalizer-crash.ll b/llvm/test/CodeGen/X86/2012-08-17-legalizer-crash.ll --- a/llvm/test/CodeGen/X86/2012-08-17-legalizer-crash.ll +++ b/llvm/test/CodeGen/X86/2012-08-17-legalizer-crash.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc < %s | FileCheck %s ; Check that an overly large immediate created by SROA doesn't crash the ; legalizer. @@ -11,6 +12,38 @@ @a = common global ptr null, align 8 define void @fn1() nounwind uwtable ssp { +; CHECK-LABEL: fn1: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: movq _a@GOTPCREL(%rip), %rax +; CHECK-NEXT: movq (%rax), %rax +; CHECK-NEXT: movq 64(%rax), %rcx +; CHECK-NEXT: movq %rcx, %rdx +; CHECK-NEXT: shrq $32, %rdx +; CHECK-NEXT: je LBB0_2 +; CHECK-NEXT: ## %bb.1: ## %if.then +; CHECK-NEXT: movq 56(%rax), %rdx +; CHECK-NEXT: movq 48(%rax), %rsi +; CHECK-NEXT: movq 40(%rax), %rdi +; CHECK-NEXT: movq 32(%rax), %r8 +; CHECK-NEXT: movq 24(%rax), %r9 +; CHECK-NEXT: movq 16(%rax), %r10 +; CHECK-NEXT: movq (%rax), %r11 +; CHECK-NEXT: movq 8(%rax), %rbx +; CHECK-NEXT: movq %r11, (%rax) +; CHECK-NEXT: movq %rbx, 8(%rax) +; CHECK-NEXT: movq %r10, 16(%rax) +; CHECK-NEXT: movq %r9, 24(%rax) +; CHECK-NEXT: movq %r8, 32(%rax) +; CHECK-NEXT: movq %rdi, 40(%rax) +; CHECK-NEXT: movq %rsi, 48(%rax) +; CHECK-NEXT: movq %rdx, 56(%rax) +; CHECK-NEXT: movq %rcx, 64(%rax) +; CHECK-NEXT: LBB0_2: ## %if.end +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: retq entry: %0 = load ptr, ptr @a, align 8 %srcval2 = load i576, ptr %0, align 8 @@ -24,6 +57,4 @@ if.end: ; preds = %if.then, %entry ret void -; CHECK-LABEL: fn1: -; CHECK: jb } diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll --- a/llvm/test/CodeGen/X86/avx512-mask-op.ll +++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll @@ -609,49 +609,16 @@ } define void @test7(<8 x i1> %mask) { -; KNL-LABEL: test7: -; KNL: ## %bb.0: ## %allocas -; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 -; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 -; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: orb $85, %al -; KNL-NEXT: vzeroupper -; KNL-NEXT: retq -; -; SKX-LABEL: test7: -; SKX: ## %bb.0: ## %allocas -; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 -; SKX-NEXT: vpmovw2m %xmm0, %k0 -; SKX-NEXT: kmovd %k0, %eax -; SKX-NEXT: orb $85, %al -; SKX-NEXT: retq -; -; AVX512BW-LABEL: test7: -; AVX512BW: ## %bb.0: ## %allocas -; AVX512BW-NEXT: vpsllw $15, %xmm0, %xmm0 -; AVX512BW-NEXT: vpmovw2m %zmm0, %k0 -; AVX512BW-NEXT: kmovd %k0, %eax -; AVX512BW-NEXT: orb $85, %al -; AVX512BW-NEXT: vzeroupper -; AVX512BW-NEXT: retq -; -; AVX512DQ-LABEL: test7: -; AVX512DQ: ## %bb.0: ## %allocas -; AVX512DQ-NEXT: vpmovsxwq %xmm0, %zmm0 -; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0 -; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0 -; AVX512DQ-NEXT: kmovw %k0, %eax -; AVX512DQ-NEXT: orb $85, %al -; AVX512DQ-NEXT: vzeroupper -; AVX512DQ-NEXT: retq +; CHECK-LABEL: test7: +; CHECK: ## %bb.0: ## %allocas +; CHECK-NEXT: movb $1, %al +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: retq ; ; X86-LABEL: test7: ; X86: ## %bb.0: ## %allocas -; X86-NEXT: vpsllw $15, %xmm0, %xmm0 -; X86-NEXT: vpmovw2m %xmm0, %k0 -; X86-NEXT: kmovd %k0, %eax -; X86-NEXT: orb $85, %al +; X86-NEXT: movb $1, %al +; X86-NEXT: testb %al, %al ; X86-NEXT: retl allocas: %a= or <8 x i1> %mask, diff --git a/llvm/test/CodeGen/X86/cmp.ll b/llvm/test/CodeGen/X86/cmp.ll --- a/llvm/test/CodeGen/X86/cmp.ll +++ b/llvm/test/CodeGen/X86/cmp.ll @@ -280,9 +280,7 @@ define i32 @test14(i32 %mask, i32 %base, i32 %intra) { ; CHECK-LABEL: test14: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %esi, %eax # encoding: [0x89,0xf0] -; CHECK-NEXT: shrl $7, %edi # encoding: [0xc1,0xef,0x07] -; CHECK-NEXT: cmovnsl %edx, %eax # encoding: [0x0f,0x49,0xc2] +; CHECK-NEXT: movl %edx, %eax # encoding: [0x89,0xd0] ; CHECK-NEXT: retq # encoding: [0xc3] %s = lshr i32 %mask, 7 %tobool = icmp sgt i32 %s, -1 diff --git a/llvm/test/CodeGen/X86/fold-rmw-ops.ll b/llvm/test/CodeGen/X86/fold-rmw-ops.ll --- a/llvm/test/CodeGen/X86/fold-rmw-ops.ll +++ b/llvm/test/CodeGen/X86/fold-rmw-ops.ll @@ -1352,9 +1352,11 @@ define void @or64_imm32_br() nounwind { ; CHECK-LABEL: or64_imm32_br: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: orq $16777215, g64(%rip) # encoding: [0x48,0x81,0x0d,A,A,A,A,0xff,0xff,0xff,0x00] -; CHECK-NEXT: # fixup A - offset: 3, value: g64-8, kind: reloc_riprel_4byte +; CHECK-NEXT: orl $16777215, g64(%rip) # encoding: [0x81,0x0d,A,A,A,A,0xff,0xff,0xff,0x00] +; CHECK-NEXT: # fixup A - offset: 2, value: g64-8, kind: reloc_riprel_4byte ; CHECK-NEXT: # imm = 0xFFFFFF +; CHECK-NEXT: movb $1, %al # encoding: [0xb0,0x01] +; CHECK-NEXT: testb %al, %al # encoding: [0x84,0xc0] ; CHECK-NEXT: jne b # TAILCALL ; CHECK-NEXT: # encoding: [0x75,A] ; CHECK-NEXT: # fixup A - offset: 1, value: b-1, kind: FK_PCRel_1 @@ -1385,6 +1387,8 @@ ; CHECK-NEXT: orq $-2147483648, g64(%rip) # encoding: [0x48,0x81,0x0d,A,A,A,A,0x00,0x00,0x00,0x80] ; CHECK-NEXT: # fixup A - offset: 3, value: g64-8, kind: reloc_riprel_4byte ; CHECK-NEXT: # imm = 0x80000000 +; CHECK-NEXT: movb $1, %al # encoding: [0xb0,0x01] +; CHECK-NEXT: testb %al, %al # encoding: [0x84,0xc0] ; CHECK-NEXT: jne b # TAILCALL ; CHECK-NEXT: # encoding: [0x75,A] ; CHECK-NEXT: # fixup A - offset: 1, value: b-1, kind: FK_PCRel_1 @@ -1412,8 +1416,10 @@ define void @or64_imm8_br() nounwind { ; CHECK-LABEL: or64_imm8_br: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: orq $15, g64(%rip) # encoding: [0x48,0x83,0x0d,A,A,A,A,0x0f] -; CHECK-NEXT: # fixup A - offset: 3, value: g64-5, kind: reloc_riprel_4byte +; CHECK-NEXT: orb $15, g64(%rip) # encoding: [0x80,0x0d,A,A,A,A,0x0f] +; CHECK-NEXT: # fixup A - offset: 2, value: g64-5, kind: reloc_riprel_4byte +; CHECK-NEXT: movb $1, %al # encoding: [0xb0,0x01] +; CHECK-NEXT: testb %al, %al # encoding: [0x84,0xc0] ; CHECK-NEXT: jne b # TAILCALL ; CHECK-NEXT: # encoding: [0x75,A] ; CHECK-NEXT: # fixup A - offset: 1, value: b-1, kind: FK_PCRel_1 @@ -1442,6 +1448,8 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: orq $-4, g64(%rip) # encoding: [0x48,0x83,0x0d,A,A,A,A,0xfc] ; CHECK-NEXT: # fixup A - offset: 3, value: g64-5, kind: reloc_riprel_4byte +; CHECK-NEXT: movb $1, %al # encoding: [0xb0,0x01] +; CHECK-NEXT: testb %al, %al # encoding: [0x84,0xc0] ; CHECK-NEXT: jne b # TAILCALL ; CHECK-NEXT: # encoding: [0x75,A] ; CHECK-NEXT: # fixup A - offset: 1, value: b-1, kind: FK_PCRel_1 @@ -1468,9 +1476,10 @@ define void @or32_imm_br() nounwind { ; CHECK-LABEL: or32_imm_br: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: orl $-2147483648, g32(%rip) # encoding: [0x81,0x0d,A,A,A,A,0x00,0x00,0x00,0x80] -; CHECK-NEXT: # fixup A - offset: 2, value: g32-8, kind: reloc_riprel_4byte -; CHECK-NEXT: # imm = 0x80000000 +; CHECK-NEXT: orb $-128, g32+3(%rip) # encoding: [0x80,0x0d,A,A,A,A,0x80] +; CHECK-NEXT: # fixup A - offset: 2, value: (g32+3)-5, kind: reloc_riprel_4byte +; CHECK-NEXT: movb $1, %al # encoding: [0xb0,0x01] +; CHECK-NEXT: testb %al, %al # encoding: [0x84,0xc0] ; CHECK-NEXT: jne b # TAILCALL ; CHECK-NEXT: # encoding: [0x75,A] ; CHECK-NEXT: # fixup A - offset: 1, value: b-1, kind: FK_PCRel_1 @@ -1498,8 +1507,10 @@ define void @or32_imm8_br() nounwind { ; CHECK-LABEL: or32_imm8_br: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: orl $15, g32(%rip) # encoding: [0x83,0x0d,A,A,A,A,0x0f] +; CHECK-NEXT: orb $15, g32(%rip) # encoding: [0x80,0x0d,A,A,A,A,0x0f] ; CHECK-NEXT: # fixup A - offset: 2, value: g32-5, kind: reloc_riprel_4byte +; CHECK-NEXT: movb $1, %al # encoding: [0xb0,0x01] +; CHECK-NEXT: testb %al, %al # encoding: [0x84,0xc0] ; CHECK-NEXT: jne b # TAILCALL ; CHECK-NEXT: # encoding: [0x75,A] ; CHECK-NEXT: # fixup A - offset: 1, value: b-1, kind: FK_PCRel_1 @@ -1528,6 +1539,8 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: orl $-4, g32(%rip) # encoding: [0x83,0x0d,A,A,A,A,0xfc] ; CHECK-NEXT: # fixup A - offset: 2, value: g32-5, kind: reloc_riprel_4byte +; CHECK-NEXT: movb $1, %al # encoding: [0xb0,0x01] +; CHECK-NEXT: testb %al, %al # encoding: [0x84,0xc0] ; CHECK-NEXT: jne b # TAILCALL ; CHECK-NEXT: # encoding: [0x75,A] ; CHECK-NEXT: # fixup A - offset: 1, value: b-1, kind: FK_PCRel_1 @@ -1554,9 +1567,10 @@ define void @or16_imm_br() nounwind { ; CHECK-LABEL: or16_imm_br: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: orw $-32768, g16(%rip) # encoding: [0x66,0x81,0x0d,A,A,A,A,0x00,0x80] -; CHECK-NEXT: # fixup A - offset: 3, value: g16-6, kind: reloc_riprel_4byte -; CHECK-NEXT: # imm = 0x8000 +; CHECK-NEXT: orb $-128, g16+1(%rip) # encoding: [0x80,0x0d,A,A,A,A,0x80] +; CHECK-NEXT: # fixup A - offset: 2, value: (g16+1)-5, kind: reloc_riprel_4byte +; CHECK-NEXT: movb $1, %al # encoding: [0xb0,0x01] +; CHECK-NEXT: testb %al, %al # encoding: [0x84,0xc0] ; CHECK-NEXT: jne b # TAILCALL ; CHECK-NEXT: # encoding: [0x75,A] ; CHECK-NEXT: # fixup A - offset: 1, value: b-1, kind: FK_PCRel_1 @@ -1583,8 +1597,10 @@ define void @or16_imm8_br() nounwind { ; CHECK-LABEL: or16_imm8_br: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: orw $15, g16(%rip) # encoding: [0x66,0x83,0x0d,A,A,A,A,0x0f] -; CHECK-NEXT: # fixup A - offset: 3, value: g16-5, kind: reloc_riprel_4byte +; CHECK-NEXT: orb $15, g16(%rip) # encoding: [0x80,0x0d,A,A,A,A,0x0f] +; CHECK-NEXT: # fixup A - offset: 2, value: g16-5, kind: reloc_riprel_4byte +; CHECK-NEXT: movb $1, %al # encoding: [0xb0,0x01] +; CHECK-NEXT: testb %al, %al # encoding: [0x84,0xc0] ; CHECK-NEXT: jne b # TAILCALL ; CHECK-NEXT: # encoding: [0x75,A] ; CHECK-NEXT: # fixup A - offset: 1, value: b-1, kind: FK_PCRel_1 @@ -1613,6 +1629,8 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: orw $-4, g16(%rip) # encoding: [0x66,0x83,0x0d,A,A,A,A,0xfc] ; CHECK-NEXT: # fixup A - offset: 3, value: g16-5, kind: reloc_riprel_4byte +; CHECK-NEXT: movb $1, %al # encoding: [0xb0,0x01] +; CHECK-NEXT: testb %al, %al # encoding: [0x84,0xc0] ; CHECK-NEXT: jne b # TAILCALL ; CHECK-NEXT: # encoding: [0x75,A] ; CHECK-NEXT: # fixup A - offset: 1, value: b-1, kind: FK_PCRel_1 @@ -1641,6 +1659,8 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: orb $-4, g8(%rip) # encoding: [0x80,0x0d,A,A,A,A,0xfc] ; CHECK-NEXT: # fixup A - offset: 2, value: g8-5, kind: reloc_riprel_4byte +; CHECK-NEXT: movb $1, %al # encoding: [0xb0,0x01] +; CHECK-NEXT: testb %al, %al # encoding: [0x84,0xc0] ; CHECK-NEXT: jne b # TAILCALL ; CHECK-NEXT: # encoding: [0x75,A] ; CHECK-NEXT: # fixup A - offset: 1, value: b-1, kind: FK_PCRel_1 diff --git a/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll b/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll --- a/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll +++ b/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll @@ -792,23 +792,12 @@ define i1 @scalar_i8_signbit_eq_with_nonzero(i8 %x, i8 %y) nounwind { ; X86-LABEL: scalar_i8_signbit_eq_with_nonzero: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movb $-128, %al -; X86-NEXT: shlb %cl, %al -; X86-NEXT: andb {{[0-9]+}}(%esp), %al -; X86-NEXT: cmpb $1, %al -; X86-NEXT: sete %al +; X86-NEXT: xorl %eax, %eax ; X86-NEXT: retl ; ; X64-LABEL: scalar_i8_signbit_eq_with_nonzero: ; X64: # %bb.0: -; X64-NEXT: movl %esi, %ecx -; X64-NEXT: movb $-128, %al -; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shlb %cl, %al -; X64-NEXT: andb %dil, %al -; X64-NEXT: cmpb $1, %al -; X64-NEXT: sete %al +; X64-NEXT: xorl %eax, %eax ; X64-NEXT: retq %t0 = shl i8 128, %y %t1 = and i8 %t0, %x diff --git a/llvm/test/CodeGen/X86/is_fpclass-fp80.ll b/llvm/test/CodeGen/X86/is_fpclass-fp80.ll --- a/llvm/test/CodeGen/X86/is_fpclass-fp80.ll +++ b/llvm/test/CodeGen/X86/is_fpclass-fp80.ll @@ -333,18 +333,15 @@ define i1 @is_normal_f80(x86_fp80 %x) { ; CHECK-32-LABEL: is_normal_f80: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx -; CHECK-32-NEXT: andl $32767, %ecx # imm = 0x7FFF -; CHECK-32-NEXT: decl %ecx -; CHECK-32-NEXT: movzwl %cx, %ecx -; CHECK-32-NEXT: xorl %edx, %edx -; CHECK-32-NEXT: cmpl $32766, %ecx # imm = 0x7FFE -; CHECK-32-NEXT: sbbl %edx, %edx -; CHECK-32-NEXT: setb %cl -; CHECK-32-NEXT: shrl $31, %eax +; CHECK-32-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-32-NEXT: shrl $31, %ecx +; CHECK-32-NEXT: andl $32767, %eax # imm = 0x7FFF +; CHECK-32-NEXT: decl %eax +; CHECK-32-NEXT: movzwl %ax, %eax +; CHECK-32-NEXT: cmpl $32766, %eax # imm = 0x7FFE +; CHECK-32-NEXT: setb %al ; CHECK-32-NEXT: andb %cl, %al -; CHECK-32-NEXT: # kill: def $al killed $al killed $eax ; CHECK-32-NEXT: retl ; ; CHECK-64-LABEL: is_normal_f80: @@ -367,28 +364,20 @@ define i1 @is_posnormal_f80(x86_fp80 %x) { ; CHECK-32-LABEL: is_posnormal_f80: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: pushl %esi -; CHECK-32-NEXT: .cfi_def_cfa_offset 8 -; CHECK-32-NEXT: .cfi_offset %esi, -8 -; CHECK-32-NEXT: movzwl {{[0-9]+}}(%esp), %edx -; CHECK-32-NEXT: movswl %dx, %ecx -; CHECK-32-NEXT: sarl $15, %ecx +; CHECK-32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; CHECK-32-NEXT: movswl %cx, %eax +; CHECK-32-NEXT: sarl $15, %eax ; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: andl $32767, %edx # imm = 0x7FFF -; CHECK-32-NEXT: decl %edx -; CHECK-32-NEXT: movzwl %dx, %edx -; CHECK-32-NEXT: xorl %esi, %esi -; CHECK-32-NEXT: cmpl $32766, %edx # imm = 0x7FFE -; CHECK-32-NEXT: sbbl %esi, %esi -; CHECK-32-NEXT: setb %dl -; CHECK-32-NEXT: testl %ecx, %ecx -; CHECK-32-NEXT: setns %cl +; CHECK-32-NEXT: setns %dl +; CHECK-32-NEXT: andl $32767, %ecx # imm = 0x7FFF +; CHECK-32-NEXT: decl %ecx +; CHECK-32-NEXT: movzwl %cx, %ecx +; CHECK-32-NEXT: cmpl $32766, %ecx # imm = 0x7FFE +; CHECK-32-NEXT: setb %cl ; CHECK-32-NEXT: shrl $31, %eax -; CHECK-32-NEXT: andb %cl, %al ; CHECK-32-NEXT: andb %dl, %al +; CHECK-32-NEXT: andb %cl, %al ; CHECK-32-NEXT: # kill: def $al killed $al killed $eax -; CHECK-32-NEXT: popl %esi -; CHECK-32-NEXT: .cfi_def_cfa_offset 4 ; CHECK-32-NEXT: retl ; ; CHECK-64-LABEL: is_posnormal_f80: @@ -415,28 +404,20 @@ define i1 @is_negnormal_f80(x86_fp80 %x) { ; CHECK-32-LABEL: is_negnormal_f80: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: pushl %esi -; CHECK-32-NEXT: .cfi_def_cfa_offset 8 -; CHECK-32-NEXT: .cfi_offset %esi, -8 -; CHECK-32-NEXT: movzwl {{[0-9]+}}(%esp), %edx -; CHECK-32-NEXT: movswl %dx, %ecx -; CHECK-32-NEXT: sarl $15, %ecx +; CHECK-32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; CHECK-32-NEXT: movswl %cx, %eax +; CHECK-32-NEXT: sarl $15, %eax ; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: andl $32767, %edx # imm = 0x7FFF -; CHECK-32-NEXT: decl %edx -; CHECK-32-NEXT: movzwl %dx, %edx -; CHECK-32-NEXT: xorl %esi, %esi -; CHECK-32-NEXT: cmpl $32766, %edx # imm = 0x7FFE -; CHECK-32-NEXT: sbbl %esi, %esi -; CHECK-32-NEXT: setb %dl -; CHECK-32-NEXT: testl %ecx, %ecx -; CHECK-32-NEXT: sets %cl +; CHECK-32-NEXT: sets %dl +; CHECK-32-NEXT: andl $32767, %ecx # imm = 0x7FFF +; CHECK-32-NEXT: decl %ecx +; CHECK-32-NEXT: movzwl %cx, %ecx +; CHECK-32-NEXT: cmpl $32766, %ecx # imm = 0x7FFE +; CHECK-32-NEXT: setb %cl ; CHECK-32-NEXT: shrl $31, %eax -; CHECK-32-NEXT: andb %cl, %al ; CHECK-32-NEXT: andb %dl, %al +; CHECK-32-NEXT: andb %cl, %al ; CHECK-32-NEXT: # kill: def $al killed $al killed $eax -; CHECK-32-NEXT: popl %esi -; CHECK-32-NEXT: .cfi_def_cfa_offset 4 ; CHECK-32-NEXT: retl ; ; CHECK-64-LABEL: is_negnormal_f80: diff --git a/llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll b/llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll --- a/llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll +++ b/llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll @@ -64,7 +64,7 @@ ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: shll %cl, %edi ; CHECK-NEXT: imull $-1431655765, %edi, %eax # imm = 0xAAAAAAAB -; CHECK-NEXT: cmpl $1431655766, %eax # imm = 0x55555556 +; CHECK-NEXT: cmpl $1431655765, %eax # imm = 0x55555555 ; CHECK-NEXT: setb %al ; CHECK-NEXT: retq %t0 = and i32 %x, 2 ; clearly a power-of-two or zero diff --git a/llvm/test/CodeGen/X86/or-with-overflow.ll b/llvm/test/CodeGen/X86/or-with-overflow.ll --- a/llvm/test/CodeGen/X86/or-with-overflow.ll +++ b/llvm/test/CodeGen/X86/or-with-overflow.ll @@ -10,20 +10,13 @@ ; X86-LABEL: or_i8_ri: ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: orb $-17, %cl -; X86-NEXT: je .LBB0_2 -; X86-NEXT: # %bb.1: -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: .LBB0_2: +; X86-NEXT: orb $-17, %al ; X86-NEXT: retl ; ; X64-LABEL: or_i8_ri: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax ; X64-NEXT: orb $-17, %al -; X64-NEXT: movzbl %al, %eax -; X64-NEXT: cmovel %edi, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %3 = or i8 %0, -17 @@ -60,14 +53,8 @@ define i16 @or_i16_ri(i16 zeroext %0, i16 zeroext %1) { ; X86-LABEL: or_i16_ri: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: orl $65519, %ecx # imm = 0xFFEF -; X86-NEXT: testw %cx, %cx -; X86-NEXT: je .LBB2_2 -; X86-NEXT: # %bb.1: -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: .LBB2_2: +; X86-NEXT: movl $65519, %eax # imm = 0xFFEF +; X86-NEXT: orl {{[0-9]+}}(%esp), %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; @@ -75,7 +62,6 @@ ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax ; X64-NEXT: orl $65519, %eax # imm = 0xFFEF -; X64-NEXT: cmovel %edi, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %3 = or i16 %0, -17 @@ -114,19 +100,11 @@ ; X86-LABEL: or_i32_ri: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: orl $-17, %ecx -; X86-NEXT: jle .LBB4_2 -; X86-NEXT: # %bb.1: -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: .LBB4_2: ; X86-NEXT: retl ; ; X64-LABEL: or_i32_ri: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: orl $-17, %eax -; X64-NEXT: cmovlel %edi, %eax ; X64-NEXT: retq %3 = or i32 %0, -17 %4 = icmp slt i32 %3, 1 @@ -161,29 +139,13 @@ define i64 @or_i64_ri(i64 %0, i64 %1) nounwind { ; X86-LABEL: or_i64_ri: ; X86: # %bb.0: -; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: orl $-17, %ecx -; X86-NEXT: cmpl $1, %ecx -; X86-NEXT: movl $-1, %edx -; X86-NEXT: movl $-1, %esi -; X86-NEXT: sbbl $0, %esi -; X86-NEXT: jl .LBB6_1 -; X86-NEXT: # %bb.2: -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: popl %esi -; X86-NEXT: retl -; X86-NEXT: .LBB6_1: ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; X64-LABEL: or_i64_ri: ; X64: # %bb.0: ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: orq $-17, %rax -; X64-NEXT: cmovleq %rdi, %rax ; X64-NEXT: retq %3 = or i64 %0, -17 %4 = icmp slt i64 %3, 1 diff --git a/llvm/test/CodeGen/X86/pr16031.ll b/llvm/test/CodeGen/X86/pr16031.ll --- a/llvm/test/CodeGen/X86/pr16031.ll +++ b/llvm/test/CodeGen/X86/pr16031.ll @@ -4,16 +4,7 @@ define i64 @main(i1 %tobool1) nounwind { ; CHECK-LABEL: main: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: andl $1, %eax -; CHECK-NEXT: decl %eax -; CHECK-NEXT: orl $-12, %eax -; CHECK-NEXT: xorl %ecx, %ecx -; CHECK-NEXT: movl %eax, %edx -; CHECK-NEXT: addl $-1, %edx -; CHECK-NEXT: movl $0, %edx -; CHECK-NEXT: adcl $-2, %edx -; CHECK-NEXT: cmovsl %ecx, %eax +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: retl entry: diff --git a/llvm/test/CodeGen/X86/pr28444.ll b/llvm/test/CodeGen/X86/pr28444.ll --- a/llvm/test/CodeGen/X86/pr28444.ll +++ b/llvm/test/CodeGen/X86/pr28444.ll @@ -11,8 +11,10 @@ define void @extractelt_mismatch_vector_element_type(i32 %arg, i1 %x) { ; CHECK-LABEL: extractelt_mismatch_vector_element_type: ; CHECK: # %bb.0: # %bb +; CHECK-NEXT: movb $-1, %cl ; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: movb %al, (%rax) +; CHECK-NEXT: andb $1, %cl +; CHECK-NEXT: movb %cl, (%rax) ; CHECK-NEXT: movb %al, (%rax) ; CHECK-NEXT: retq bb: diff --git a/llvm/test/CodeGen/X86/select.ll b/llvm/test/CodeGen/X86/select.ll --- a/llvm/test/CodeGen/X86/select.ll +++ b/llvm/test/CodeGen/X86/select.ll @@ -60,7 +60,7 @@ ; GENERIC-NEXT: testb $1, %al ; GENERIC-NEXT: movl $-3840, %eax ## imm = 0xF100 ; GENERIC-NEXT: cmovnel %ecx, %eax -; GENERIC-NEXT: cmpl $32768, %eax ## imm = 0x8000 +; GENERIC-NEXT: cmpl $32767, %eax ## imm = 0x7FFF ; GENERIC-NEXT: jge LBB1_1 ; GENERIC-NEXT: ## %bb.2: ## %bb91 ; GENERIC-NEXT: xorl %eax, %eax @@ -77,7 +77,7 @@ ; ATOM-NEXT: movl $-3840, %edx ## imm = 0xF100 ; ATOM-NEXT: testb $1, %al ; ATOM-NEXT: cmovnel %ecx, %edx -; ATOM-NEXT: cmpl $32768, %edx ## imm = 0x8000 +; ATOM-NEXT: cmpl $32767, %edx ## imm = 0x7FFF ; ATOM-NEXT: jge LBB1_1 ; ATOM-NEXT: ## %bb.2: ## %bb91 ; ATOM-NEXT: xorl %eax, %eax @@ -94,7 +94,7 @@ ; ATHLON-NEXT: testb $1, %al ; ATHLON-NEXT: movl $-3840, %eax ## imm = 0xF100 ; ATHLON-NEXT: cmovnel %ecx, %eax -; ATHLON-NEXT: cmpl $32768, %eax ## imm = 0x8000 +; ATHLON-NEXT: cmpl $32767, %eax ## imm = 0x7FFF ; ATHLON-NEXT: jge LBB1_1 ; ATHLON-NEXT: ## %bb.2: ## %bb91 ; ATHLON-NEXT: xorl %eax, %eax @@ -112,7 +112,7 @@ ; MCU-NEXT: # %bb.1: # %entry ; MCU-NEXT: movl $-3840, %ecx # imm = 0xF100 ; MCU-NEXT: .LBB1_2: # %entry -; MCU-NEXT: cmpl $32768, %ecx # imm = 0x8000 +; MCU-NEXT: cmpl $32767, %ecx # imm = 0x7FFF ; MCU-NEXT: jge .LBB1_3 ; MCU-NEXT: # %bb.4: # %bb91 ; MCU-NEXT: xorl %eax, %eax diff --git a/llvm/test/CodeGen/X86/shrink-compare-pgso.ll b/llvm/test/CodeGen/X86/shrink-compare-pgso.ll --- a/llvm/test/CodeGen/X86/shrink-compare-pgso.ll +++ b/llvm/test/CodeGen/X86/shrink-compare-pgso.ll @@ -124,8 +124,8 @@ define dso_local void @test2_1(i32 %X) nounwind !prof !14 { ; CHECK-LABEL: test2_1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: cmpl $256, %eax # imm = 0x100 +; CHECK-NEXT: movb $1, %al +; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: je bar # TAILCALL ; CHECK-NEXT: # %bb.1: # %if.end ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/shrink-compare.ll b/llvm/test/CodeGen/X86/shrink-compare.ll --- a/llvm/test/CodeGen/X86/shrink-compare.ll +++ b/llvm/test/CodeGen/X86/shrink-compare.ll @@ -124,8 +124,8 @@ define dso_local void @test2_1(i32 %X) nounwind minsize { ; CHECK-LABEL: test2_1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: cmpl $256, %eax # imm = 0x100 +; CHECK-NEXT: movb $1, %al +; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: je bar # TAILCALL ; CHECK-NEXT: # %bb.1: # %if.end ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/smul_fix_sat_constants.ll b/llvm/test/CodeGen/X86/smul_fix_sat_constants.ll --- a/llvm/test/CodeGen/X86/smul_fix_sat_constants.ll +++ b/llvm/test/CodeGen/X86/smul_fix_sat_constants.ll @@ -12,15 +12,7 @@ define i64 @func() nounwind { ; X64-LABEL: func: ; X64: # %bb.0: -; X64-NEXT: movl $2, %eax -; X64-NEXT: negq %rax -; X64-NEXT: movabsq $9223372036854775807, %rax # imm = 0x7FFFFFFFFFFFFFFF -; X64-NEXT: movl $1, %ecx -; X64-NEXT: cmovgeq %rax, %rcx -; X64-NEXT: movq $-2, %rax -; X64-NEXT: negq %rax -; X64-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 -; X64-NEXT: cmovgeq %rcx, %rax +; X64-NEXT: movl $1, %eax ; X64-NEXT: retq %tmp = call i64 @llvm.smul.fix.sat.i64(i64 3, i64 2, i32 2) ret i64 %tmp @@ -41,15 +33,7 @@ define i64 @func3() nounwind { ; X64-LABEL: func3: ; X64: # %bb.0: -; X64-NEXT: movl $2, %eax -; X64-NEXT: negq %rax -; X64-NEXT: movabsq $9223372036854775807, %rax # imm = 0x7FFFFFFFFFFFFFFF -; X64-NEXT: movabsq $4611686018427387903, %rcx # imm = 0x3FFFFFFFFFFFFFFF -; X64-NEXT: cmovgeq %rax, %rcx -; X64-NEXT: movq $-2, %rax -; X64-NEXT: negq %rax -; X64-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 -; X64-NEXT: cmovgeq %rcx, %rax +; X64-NEXT: movabsq $4611686018427387903, %rax # imm = 0x3FFFFFFFFFFFFFFF ; X64-NEXT: retq %tmp = call i64 @llvm.smul.fix.sat.i64(i64 9223372036854775807, i64 2, i32 2) ret i64 %tmp @@ -58,15 +42,7 @@ define i64 @func4() nounwind { ; X64-LABEL: func4: ; X64: # %bb.0: -; X64-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; X64-NEXT: negq %rax -; X64-NEXT: movabsq $9223372036854775807, %rax # imm = 0x7FFFFFFFFFFFFFFF -; X64-NEXT: movl $4294967295, %ecx # imm = 0xFFFFFFFF -; X64-NEXT: cmovgq %rax, %rcx -; X64-NEXT: movq $-2147483648, %rax # imm = 0x80000000 -; X64-NEXT: negq %rax -; X64-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 -; X64-NEXT: cmovgeq %rcx, %rax +; X64-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF ; X64-NEXT: retq %tmp = call i64 @llvm.smul.fix.sat.i64(i64 9223372036854775807, i64 2, i32 32) ret i64 %tmp @@ -75,15 +51,7 @@ define i64 @func5() nounwind { ; X64-LABEL: func5: ; X64: # %bb.0: -; X64-NEXT: movabsq $4611686018427387903, %rax # imm = 0x3FFFFFFFFFFFFFFF -; X64-NEXT: negq %rax -; X64-NEXT: movabsq $9223372036854775807, %rax # imm = 0x7FFFFFFFFFFFFFFF -; X64-NEXT: movl $1, %ecx -; X64-NEXT: cmovgq %rax, %rcx -; X64-NEXT: movabsq $-4611686018427387904, %rax # imm = 0xC000000000000000 -; X64-NEXT: negq %rax -; X64-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 -; X64-NEXT: cmovgeq %rcx, %rax +; X64-NEXT: movl $1, %eax ; X64-NEXT: retq %tmp = call i64 @llvm.smul.fix.sat.i64(i64 9223372036854775807, i64 2, i32 63) ret i64 %tmp diff --git a/llvm/test/CodeGen/X86/vector-mulfix-legalize.ll b/llvm/test/CodeGen/X86/vector-mulfix-legalize.ll --- a/llvm/test/CodeGen/X86/vector-mulfix-legalize.ll +++ b/llvm/test/CodeGen/X86/vector-mulfix-legalize.ll @@ -105,40 +105,27 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: pextrw $2, %xmm0, %eax ; CHECK-NEXT: leal (%rax,%rax,2), %eax +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: shrl $16, %ecx +; CHECK-NEXT: shldw $1, %ax, %cx +; CHECK-NEXT: pextrw $1, %xmm0, %eax +; CHECK-NEXT: addl %eax, %eax ; CHECK-NEXT: movl %eax, %edx ; CHECK-NEXT: shrl $16, %edx -; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: shldw $1, %ax, %cx -; CHECK-NEXT: cmpl $32768, %edx # imm = 0x8000 -; CHECK-NEXT: movl $65535, %eax # imm = 0xFFFF -; CHECK-NEXT: cmovael %eax, %ecx -; CHECK-NEXT: pextrw $1, %xmm0, %edx -; CHECK-NEXT: addl %edx, %edx -; CHECK-NEXT: movl %edx, %esi -; CHECK-NEXT: shrl $16, %esi -; CHECK-NEXT: movl %esi, %edi -; CHECK-NEXT: shldw $1, %dx, %di -; CHECK-NEXT: cmpl $32768, %esi # imm = 0x8000 -; CHECK-NEXT: cmovael %eax, %edi -; CHECK-NEXT: movd %xmm0, %edx +; CHECK-NEXT: shldw $1, %ax, %dx +; CHECK-NEXT: movd %xmm0, %eax ; CHECK-NEXT: xorl %esi, %esi -; CHECK-NEXT: shldw $1, %dx, %si -; CHECK-NEXT: movl $32768, %edx # imm = 0x8000 -; CHECK-NEXT: negl %edx -; CHECK-NEXT: cmovael %eax, %esi -; CHECK-NEXT: movzwl %si, %edx -; CHECK-NEXT: movd %edx, %xmm1 -; CHECK-NEXT: pinsrw $1, %edi, %xmm1 +; CHECK-NEXT: shldw $1, %ax, %si +; CHECK-NEXT: movzwl %si, %eax +; CHECK-NEXT: movd %eax, %xmm1 +; CHECK-NEXT: pinsrw $1, %edx, %xmm1 ; CHECK-NEXT: pinsrw $2, %ecx, %xmm1 -; CHECK-NEXT: pextrw $3, %xmm0, %ecx -; CHECK-NEXT: shll $2, %ecx -; CHECK-NEXT: movl %ecx, %edx -; CHECK-NEXT: shrl $16, %edx -; CHECK-NEXT: movl %edx, %esi -; CHECK-NEXT: shldw $1, %cx, %si -; CHECK-NEXT: cmpl $32768, %edx # imm = 0x8000 -; CHECK-NEXT: cmovael %eax, %esi -; CHECK-NEXT: pinsrw $3, %esi, %xmm1 +; CHECK-NEXT: pextrw $3, %xmm0, %eax +; CHECK-NEXT: shll $2, %eax +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: shrl $16, %ecx +; CHECK-NEXT: shldw $1, %ax, %cx +; CHECK-NEXT: pinsrw $3, %ecx, %xmm1 ; CHECK-NEXT: movdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %t = call <4 x i16> @llvm.umul.fix.sat.v4i16(<4 x i16> , <4 x i16> %a, i32 15)