diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -4240,6 +4240,105 @@ !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1})) return DAG.getSetCC(dl, VT, N1, N0, SwappedCC); + // Try to constant fold SetCC. + if (OpVT.isInteger()) { + KnownBits KnownRHS = DAG.computeKnownBits(N1); + if (!KnownRHS.isUnknown()) { + KnownBits KnownLHS = DAG.computeKnownBits(N0); + std::optional Res; + // Check if we can constant fold this with knownbits. + switch (Cond) { + case ISD::SETEQ: + Res = KnownBits::eq(KnownLHS, KnownRHS); + break; + case ISD::SETNE: + Res = KnownBits::ne(KnownLHS, KnownRHS); + break; + case ISD::SETLT: + Res = KnownBits::slt(KnownLHS, KnownRHS); + break; + case ISD::SETULT: + Res = KnownBits::ult(KnownLHS, KnownRHS); + break; + case ISD::SETGT: + Res = KnownBits::sgt(KnownLHS, KnownRHS); + break; + case ISD::SETUGT: + Res = KnownBits::ugt(KnownLHS, KnownRHS); + break; + case ISD::SETLE: + Res = KnownBits::sle(KnownLHS, KnownRHS); + break; + case ISD::SETULE: + Res = KnownBits::ule(KnownLHS, KnownRHS); + break; + case ISD::SETGE: + Res = KnownBits::sge(KnownLHS, KnownRHS); + break; + case ISD::SETUGE: + Res = KnownBits::uge(KnownLHS, KnownRHS); + break; + default: + break; + } + + if (Res) + return DAG.getBoolConstant(*Res, dl, VT, OpVT); + + // We aren't able to constant fold with known bits but can either 1) make + // conditions stronger (i.e ule -> ult) or 2) simplify with + // isKnownNeverZero if RHS is zero. + switch (Cond) { + case ISD::SETLE: + case ISD::SETULE: + case ISD::SETGE: + case ISD::SETUGE: + Res = KnownBits::eq(KnownLHS, KnownRHS); + [[fallthrough]]; + case ISD::SETEQ: + case ISD::SETNE: + // isKnownNeverZero is able to prove cases computeKnownBits can't. + if (!Res && KnownRHS.isZero() && DAG.isKnownNeverZero(N0)) + Res = false; + break; + default: + break; + } + + if (Res) { + assert(*Res == false && + "There is a bug in KnownBits::{sge,uge,sle,ule}"); + ISD::CondCode NewCond = Cond; + // NB: We could remove this switch and just do `Cond ^ ISD::SETEQ` for + // the new opcode. + switch (Cond) { + // Remove the or eq portion of the condition. + case ISD::SETULE: + NewCond = ISD::SETULT; + break; + case ISD::SETLE: + NewCond = ISD::SETLT; + break; + case ISD::SETUGE: + NewCond = ISD::SETUGT; + break; + case ISD::SETGE: + NewCond = ISD::SETGT; + break; + // Evaluate to true/false. + case ISD::SETNE: + return DAG.getBoolConstant(true, dl, VT, OpVT); + case ISD::SETEQ: + return DAG.getBoolConstant(false, dl, VT, OpVT); + default: + break; + } + if (Cond != NewCond) + return DAG.getSetCC(dl, VT, N0, N1, NewCond); + } + } + } + if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG)) return V; diff --git a/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll b/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll --- a/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll @@ -20,10 +20,7 @@ define i8 @test2(i32 %a) { ; CHECK-LABEL: test2: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #135 -; CHECK-NEXT: and w8, w0, w8 -; CHECK-NEXT: cmp w8, #1024 -; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ret entry: %and = and i32 %a, 135 @@ -37,7 +34,7 @@ define i8 @test3(i32 %a) { ; CHECK-LABEL: test3: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #1024 +; CHECK-NEXT: mov w8, #1024 // =0x400 ; CHECK-NEXT: movk w8, #33, lsl #16 ; CHECK-NEXT: and w8, w0, w8 ; CHECK-NEXT: cmp w8, #1024 @@ -68,10 +65,7 @@ define i8 @test5(i64 %a) { ; CHECK-LABEL: test5: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: and x8, x0, #0x3ffffc000 -; CHECK-NEXT: and x8, x8, #0xfffffffe00007fff -; CHECK-NEXT: cmp x8, #1024 -; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ret entry: %and = and i64 %a, 8589950976 @@ -84,10 +78,7 @@ define i8 @test6(i64 %a) { ; CHECK-LABEL: test6: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #135 -; CHECK-NEXT: and x8, x0, x8 -; CHECK-NEXT: cmp x8, #1024 -; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ret entry: %and = and i64 %a, 135 @@ -101,7 +92,7 @@ define i8 @test7(i64 %a) { ; CHECK-LABEL: test7: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #1024 +; CHECK-NEXT: mov w8, #1024 // =0x400 ; CHECK-NEXT: movk w8, #33, lsl #16 ; CHECK-NEXT: and x8, x0, x8 ; CHECK-NEXT: cmp x8, #1024 @@ -175,7 +166,7 @@ ; CHECK-NEXT: cmp w2, #1 ; CHECK-NEXT: b.lt .LBB8_3 ; CHECK-NEXT: // %bb.1: // %for.body.preheader -; CHECK-NEXT: mov w9, #1024 +; CHECK-NEXT: mov w9, #1024 // =0x400 ; CHECK-NEXT: mov w8, w2 ; CHECK-NEXT: movk w9, #32, lsl #16 ; CHECK-NEXT: .LBB8_2: // %for.body @@ -226,7 +217,7 @@ ; CHECK-LABEL: test10: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ldr w8, [x1] -; CHECK-NEXT: mov w9, #1024 +; CHECK-NEXT: mov w9, #1024 // =0x400 ; CHECK-NEXT: movk w9, #32, lsl #16 ; CHECK-NEXT: and w8, w8, w9 ; CHECK-NEXT: str w8, [x0] @@ -253,10 +244,7 @@ define i8 @test11(i64 %a) { ; CHECK-LABEL: test11: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #-1610612736 -; CHECK-NEXT: and x8, x0, x8 -; CHECK-NEXT: cmp x8, #1024 -; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ret entry: %and = and i64 %a, 2684354560 diff --git a/llvm/test/CodeGen/AArch64/andcompare.ll b/llvm/test/CodeGen/AArch64/andcompare.ll --- a/llvm/test/CodeGen/AArch64/andcompare.ll +++ b/llvm/test/CodeGen/AArch64/andcompare.ll @@ -2451,7 +2451,7 @@ ; ; GISEL-LABEL: cmp_to_ands3: ; GISEL: // %bb.0: -; GISEL-NEXT: mov w8, #23 +; GISEL-NEXT: mov w8, #23 // =0x17 ; GISEL-NEXT: and w8, w0, w8 ; GISEL-NEXT: cmp w8, #7 ; GISEL-NEXT: csel w0, w1, wzr, hi @@ -2466,8 +2466,8 @@ ; SDISEL-LABEL: cmp_to_ands4: ; SDISEL: // %bb.0: ; SDISEL-NEXT: and w8, w0, #0x30 -; SDISEL-NEXT: tst w0, #0x20 -; SDISEL-NEXT: csel w0, w8, w1, eq +; SDISEL-NEXT: cmp w8, #31 +; SDISEL-NEXT: csel w0, w8, w1, lo ; SDISEL-NEXT: ret ; ; GISEL-LABEL: cmp_to_ands4: diff --git a/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll b/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll --- a/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll +++ b/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll @@ -289,7 +289,7 @@ define i1 @scalar_i32_x_is_const_eq(i32 %y) nounwind { ; CHECK-LABEL: scalar_i32_x_is_const_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #43605 +; CHECK-NEXT: mov w8, #43605 // =0xaa55 ; CHECK-NEXT: movk w8, #43605, lsl #16 ; CHECK-NEXT: lsl w8, w8, w0 ; CHECK-NEXT: tst w8, #0x1 @@ -303,8 +303,8 @@ define i1 @scalar_i32_x_is_const2_eq(i32 %y) nounwind { ; CHECK-LABEL: scalar_i32_x_is_const2_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 -; CHECK-NEXT: mov w9, #43605 +; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: mov w9, #43605 // =0xaa55 ; CHECK-NEXT: lsl w8, w8, w0 ; CHECK-NEXT: movk w9, #43605, lsl #16 ; CHECK-NEXT: tst w8, w9 @@ -319,7 +319,7 @@ define i1 @scalar_i8_bitsinmiddle_slt(i8 %x, i8 %y) nounwind { ; CHECK-LABEL: scalar_i8_bitsinmiddle_slt: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #24 +; CHECK-NEXT: mov w8, #24 // =0x18 ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 ; CHECK-NEXT: lsl w8, w8, w1 ; CHECK-NEXT: and w8, w8, w0 @@ -334,13 +334,7 @@ define i1 @scalar_i8_signbit_eq_with_nonzero(i8 %x, i8 %y) nounwind { ; CHECK-LABEL: scalar_i8_signbit_eq_with_nonzero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-128 -; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: lsl w8, w8, w1 -; CHECK-NEXT: and w8, w8, w0 -; CHECK-NEXT: and w8, w8, #0x80 -; CHECK-NEXT: cmp w8, #1 -; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ret %t0 = shl i8 128, %y %t1 = and i8 %t0, %x diff --git a/llvm/test/CodeGen/AArch64/icmp-shift-opt.ll b/llvm/test/CodeGen/AArch64/icmp-shift-opt.ll --- a/llvm/test/CodeGen/AArch64/icmp-shift-opt.ll +++ b/llvm/test/CodeGen/AArch64/icmp-shift-opt.ll @@ -55,9 +55,7 @@ define i1 @opt_setcc_shl_eq_zero(i128 %a) nounwind { ; CHECK-LABEL: opt_setcc_shl_eq_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: orr x8, x0, x1, lsl #17 -; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ret %shl = shl i128 %a, 17 %cmp = icmp eq i128 %shl, 0 @@ -67,9 +65,7 @@ define i1 @opt_setcc_shl_ne_zero(i128 %a) nounwind { ; CHECK-LABEL: opt_setcc_shl_ne_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: orr x8, x0, x1, lsl #17 -; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: mov w0, #1 // =0x1 ; CHECK-NEXT: ret %shl = shl i128 %a, 17 %cmp = icmp ne i128 %shl, 0 @@ -80,15 +76,13 @@ define i1 @opt_setcc_shl_eq_zero_multiple_shl_users(i128 %a) nounwind { ; CHECK-LABEL: opt_setcc_shl_eq_zero_multiple_shl_users: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: lsl x8, x0, #17 ; CHECK-NEXT: extr x1, x1, x0, #47 -; CHECK-NEXT: lsl x0, x0, #17 -; CHECK-NEXT: orr x8, x0, x1 -; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: cset w19, eq +; CHECK-NEXT: mov x0, x8 ; CHECK-NEXT: bl use -; CHECK-NEXT: mov w0, w19 -; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %shl = shl i128 %a, 17 %cmp = icmp eq i128 %shl, 0 @@ -101,9 +95,7 @@ define i1 @opt_setcc_expanded_shl_correct_shifts(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: opt_setcc_expanded_shl_correct_shifts: ; CHECK: // %bb.0: -; CHECK-NEXT: orr x8, x1, x0, lsl #17 -; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ret %shl.a = shl i64 %a, 17 %srl.b = lshr i64 %b, 47 @@ -119,10 +111,7 @@ define i1 @opt_setcc_expanded_shl_wrong_shifts(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: opt_setcc_expanded_shl_wrong_shifts: ; CHECK: // %bb.0: -; CHECK-NEXT: extr x8, x0, x1, #47 -; CHECK-NEXT: orr x8, x8, x1, lsl #18 -; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ret %shl.a = shl i64 %a, 17 %srl.b = lshr i64 %b, 47 @@ -136,14 +125,7 @@ define i1 @opt_setcc_shl_ne_zero_i256(i256 %a) nounwind { ; CHECK-LABEL: opt_setcc_shl_ne_zero_i256: ; CHECK: // %bb.0: -; CHECK-NEXT: orr x8, x2, x0 -; CHECK-NEXT: extr x9, x3, x2, #47 -; CHECK-NEXT: extr x10, x1, x0, #47 -; CHECK-NEXT: extr x8, x8, x1, #47 -; CHECK-NEXT: orr x9, x10, x9 -; CHECK-NEXT: orr x8, x8, x9 -; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: mov w0, #1 // =0x1 ; CHECK-NEXT: ret %shl = shl i256 %a, 17 %cmp = icmp ne i256 %shl, 0 diff --git a/llvm/test/CodeGen/AArch64/pr59902.ll b/llvm/test/CodeGen/AArch64/pr59902.ll --- a/llvm/test/CodeGen/AArch64/pr59902.ll +++ b/llvm/test/CodeGen/AArch64/pr59902.ll @@ -6,13 +6,7 @@ define i1 @test() { ; CHECK-LABEL: test: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #9007199254740990 -; CHECK-NEXT: movk x8, #65503, lsl #16 -; CHECK-NEXT: movk x8, #65407, lsl #32 -; CHECK-NEXT: cmp x8, x8 -; CHECK-NEXT: csel x9, x8, x8, gt -; CHECK-NEXT: cmp x9, x8 -; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: mov w0, #1 // =0x1 ; CHECK-NEXT: ret %1 = select i1 false, i64 0, i64 9006649496829950 %2 = call i64 @llvm.smax.i64(i64 %1, i64 9006649496829950) diff --git a/llvm/test/CodeGen/AArch64/setcc-fsh.ll b/llvm/test/CodeGen/AArch64/setcc-fsh.ll --- a/llvm/test/CodeGen/AArch64/setcc-fsh.ll +++ b/llvm/test/CodeGen/AArch64/setcc-fsh.ll @@ -9,9 +9,7 @@ define i1 @fshl_or_eq_0(i32 %x, i32 %y) { ; CHECK-LABEL: fshl_or_eq_0: ; CHECK: // %bb.0: -; CHECK-NEXT: orr w8, w0, w1, lsl #5 -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ret %or = or i32 %x, %y %f = call i32 @llvm.fshl.i32(i32 %or, i32 %x, i32 5) @@ -22,9 +20,7 @@ define i1 @fshl_or_commute_eq_0(i32 %x, i32 %y) { ; CHECK-LABEL: fshl_or_commute_eq_0: ; CHECK: // %bb.0: -; CHECK-NEXT: orr w8, w0, w1, lsl #5 -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ret %or = or i32 %y, %x %f = call i32 @llvm.fshl.i32(i32 %or, i32 %x, i32 5) @@ -63,9 +59,7 @@ define i1 @fshr_or_eq_0(i16 %x, i16 %y) { ; CHECK-LABEL: fshr_or_eq_0: ; CHECK: // %bb.0: -; CHECK-NEXT: orr w8, w0, w1, lsl #8 -; CHECK-NEXT: tst w8, #0xffff -; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ret %or = or i16 %x, %y %f = call i16 @llvm.fshr.i16(i16 %or, i16 %x, i16 8) @@ -76,9 +70,7 @@ define i1 @fshr_or_commute_eq_0(i16 %x, i16 %y) { ; CHECK-LABEL: fshr_or_commute_eq_0: ; CHECK: // %bb.0: -; CHECK-NEXT: orr w8, w0, w1, lsl #8 -; CHECK-NEXT: tst w8, #0xffff -; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ret %or = or i16 %y, %x %f = call i16 @llvm.fshr.i16(i16 %or, i16 %x, i16 8) @@ -102,9 +94,7 @@ define i1 @fshl_or_ne_0(i32 %x, i32 %y) { ; CHECK-LABEL: fshl_or_ne_0: ; CHECK: // %bb.0: -; CHECK-NEXT: orr w8, w0, w1, lsl #7 -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: mov w0, #1 // =0x1 ; CHECK-NEXT: ret %or = or i32 %x, %y %f = call i32 @llvm.fshl.i32(i32 %or, i32 %x, i32 7) @@ -115,9 +105,7 @@ define i1 @fshl_or_commute_ne_0(i32 %x, i32 %y) { ; CHECK-LABEL: fshl_or_commute_ne_0: ; CHECK: // %bb.0: -; CHECK-NEXT: orr w8, w0, w1, lsl #7 -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: mov w0, #1 // =0x1 ; CHECK-NEXT: ret %or = or i32 %y, %x %f = call i32 @llvm.fshl.i32(i32 %or, i32 %x, i32 7) @@ -156,9 +144,7 @@ define i1 @fshr_or_ne_0(i64 %x, i64 %y) { ; CHECK-LABEL: fshr_or_ne_0: ; CHECK: // %bb.0: -; CHECK-NEXT: orr x8, x0, x1, lsl #63 -; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: mov w0, #1 // =0x1 ; CHECK-NEXT: ret %or = or i64 %x, %y %f = call i64 @llvm.fshr.i64(i64 %or, i64 %x, i64 1) @@ -169,9 +155,7 @@ define i1 @fshr_or_commute_ne_0(i64 %x, i64 %y) { ; CHECK-LABEL: fshr_or_commute_ne_0: ; CHECK: // %bb.0: -; CHECK-NEXT: orr x8, x0, x1, lsl #63 -; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: mov w0, #1 // =0x1 ; CHECK-NEXT: ret %or = or i64 %y, %x %f = call i64 @llvm.fshr.i64(i64 %or, i64 %x, i64 1) diff --git a/llvm/test/CodeGen/AArch64/urem-seteq-vec-tautological.ll b/llvm/test/CodeGen/AArch64/urem-seteq-vec-tautological.ll --- a/llvm/test/CodeGen/AArch64/urem-seteq-vec-tautological.ll +++ b/llvm/test/CodeGen/AArch64/urem-seteq-vec-tautological.ll @@ -20,7 +20,7 @@ define <4 x i1> @t1_all_odd_eq(<4 x i32> %X) nounwind { ; CHECK-LABEL: t1_all_odd_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #43691 +; CHECK-NEXT: mov w8, #43691 // =0xaaab ; CHECK-NEXT: movk w8, #43690, lsl #16 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: adrp x8, .LCPI1_0 @@ -39,7 +39,7 @@ define <4 x i1> @t1_all_odd_ne(<4 x i32> %X) nounwind { ; CHECK-LABEL: t1_all_odd_ne: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #43691 +; CHECK-NEXT: mov w8, #43691 // =0xaaab ; CHECK-NEXT: movk w8, #43690, lsl #16 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: adrp x8, .LCPI2_0 @@ -58,7 +58,7 @@ define <8 x i1> @t2_narrow(<8 x i16> %X) nounwind { ; CHECK-LABEL: t2_narrow: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #43691 +; CHECK-NEXT: mov w8, #43691 // =0xaaab ; CHECK-NEXT: dup v1.8h, w8 ; CHECK-NEXT: adrp x8, .LCPI3_0 ; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h @@ -76,7 +76,7 @@ define <2 x i1> @t3_wide(<2 x i64> %X) nounwind { ; CHECK-LABEL: t3_wide: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-6148914691236517206 +; CHECK-NEXT: mov x8, #-6148914691236517206 // =0xaaaaaaaaaaaaaaaa ; CHECK-NEXT: fmov x9, d0 ; CHECK-NEXT: movk x8, #43691 ; CHECK-NEXT: mov x10, v0.d[1] diff --git a/llvm/test/CodeGen/AArch64/use-cr-result-of-dom-icmp-st.ll b/llvm/test/CodeGen/AArch64/use-cr-result-of-dom-icmp-st.ll --- a/llvm/test/CodeGen/AArch64/use-cr-result-of-dom-icmp-st.ll +++ b/llvm/test/CodeGen/AArch64/use-cr-result-of-dom-icmp-st.ll @@ -68,9 +68,7 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: lsl x8, x0, x1 ; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: csinc x8, x1, xzr, eq -; CHECK-NEXT: mul x8, x8, x0 -; CHECK-NEXT: csel x0, x1, x8, gt +; CHECK-NEXT: csel x0, x1, x0, gt ; CHECK-NEXT: ret entry: %shl = shl i64 %a, %b @@ -306,9 +304,7 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: lsl w8, w0, w1 ; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: csinc w8, w1, wzr, eq -; CHECK-NEXT: mul w8, w8, w0 -; CHECK-NEXT: csel w8, w1, w8, gt +; CHECK-NEXT: csel w8, w1, w0, gt ; CHECK-NEXT: sxtw x0, w8 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll b/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll --- a/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll +++ b/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll @@ -670,9 +670,7 @@ ; SI-NEXT: v_lshlrev_b32_e32 v0, 8, v0 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_or_b32_e32 v0, v0, v1 -; SI-NEXT: v_ffbl_b32_e32 v1, v0 -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 -; SI-NEXT: v_cndmask_b32_e32 v0, 32, v1, vcc +; SI-NEXT: v_ffbl_b32_e32 v0, v0 ; SI-NEXT: buffer_store_short v0, off, s[4:7], 0 ; SI-NEXT: s_endpgm ; @@ -692,9 +690,7 @@ ; VI-NEXT: v_lshlrev_b16_e32 v1, 8, v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_or_b32_e32 v0, v1, v0 -; VI-NEXT: v_ffbl_b32_e32 v1, v0 -; VI-NEXT: v_cmp_ne_u16_e32 vcc, 0, v0 -; VI-NEXT: v_cndmask_b32_e32 v2, 32, v1, vcc +; VI-NEXT: v_ffbl_b32_e32 v2, v0 ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: flat_store_short v[0:1], v2 @@ -987,30 +983,22 @@ ; ; EG-LABEL: v_cttz_zero_undef_i64_with_select: ; EG: ; %bb.0: -; EG-NEXT: ALU 0, @14, KC0[CB0:0-32], KC1[] -; EG-NEXT: TEX 3 @6 -; EG-NEXT: ALU 12, @15, KC0[CB0:0-32], KC1[] +; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 1 @6 +; EG-NEXT: ALU 6, @11, KC0[CB0:0-32], KC1[] ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 ; EG-NEXT: CF_END ; EG-NEXT: PAD ; EG-NEXT: Fetch clause starting at 6: -; EG-NEXT: VTX_READ_16 T1.X, T0.X, 6, #1 -; EG-NEXT: VTX_READ_16 T2.X, T0.X, 0, #1 -; EG-NEXT: VTX_READ_16 T3.X, T0.X, 4, #1 -; EG-NEXT: VTX_READ_16 T0.X, T0.X, 2, #1 -; EG-NEXT: ALU clause starting at 14: +; EG-NEXT: VTX_READ_16 T1.X, T0.X, 2, #1 +; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 +; EG-NEXT: ALU clause starting at 10: ; EG-NEXT: MOV * T0.X, KC0[2].Z, -; EG-NEXT: ALU clause starting at 15: -; EG-NEXT: LSHL T0.W, T1.X, literal.x, -; EG-NEXT: LSHL * T1.W, T0.X, literal.x, +; EG-NEXT: ALU clause starting at 11: +; EG-NEXT: LSHL * T0.W, T1.X, literal.x, ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) -; EG-NEXT: OR_INT * T0.W, PV.W, T3.X, -; EG-NEXT: FFBL_INT T0.W, PV.W, -; EG-NEXT: OR_INT * T1.W, T1.W, T2.X, -; EG-NEXT: FFBL_INT T2.W, PS, -; EG-NEXT: ADD_INT * T0.W, PV.W, literal.x, -; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) -; EG-NEXT: CNDE_INT T0.X, T1.W, PS, PV.W, +; EG-NEXT: OR_INT * T0.W, PV.W, T0.X, +; EG-NEXT: FFBL_INT T0.X, PV.W, ; EG-NEXT: MOV T0.Y, 0.0, ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) @@ -1130,7 +1118,7 @@ ; EG: ; %bb.0: ; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] ; EG-NEXT: TEX 1 @6 -; EG-NEXT: ALU 6, @11, KC0[CB0:0-32], KC1[] +; EG-NEXT: ALU 5, @11, KC0[CB0:0-32], KC1[] ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 ; EG-NEXT: CF_END ; EG-NEXT: PAD @@ -1143,10 +1131,9 @@ ; EG-NEXT: LSHL * T0.W, T1.X, literal.x, ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) ; EG-NEXT: OR_INT * T0.W, PV.W, T0.X, -; EG-NEXT: FFBL_INT * T1.W, PV.W, -; EG-NEXT: CNDE_INT T0.X, T0.W, literal.x, PV.W, -; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, -; EG-NEXT: -1(nan), 2(2.802597e-45) +; EG-NEXT: FFBL_INT T0.X, PV.W, +; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) ; ; GFX9-GISEL-LABEL: v_cttz_i32_sel_eq_neg1: ; GFX9-GISEL: ; %bb.0: @@ -1250,7 +1237,7 @@ ; EG: ; %bb.0: ; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] ; EG-NEXT: TEX 1 @6 -; EG-NEXT: ALU 6, @11, KC0[CB0:0-32], KC1[] +; EG-NEXT: ALU 5, @11, KC0[CB0:0-32], KC1[] ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 ; EG-NEXT: CF_END ; EG-NEXT: PAD @@ -1263,10 +1250,9 @@ ; EG-NEXT: LSHL * T0.W, T1.X, literal.x, ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) ; EG-NEXT: OR_INT * T0.W, PV.W, T0.X, -; EG-NEXT: FFBL_INT * T1.W, PV.W, -; EG-NEXT: CNDE_INT T0.X, T0.W, literal.x, PV.W, -; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, -; EG-NEXT: -1(nan), 2(2.802597e-45) +; EG-NEXT: FFBL_INT T0.X, PV.W, +; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) ; ; GFX9-GISEL-LABEL: v_cttz_i32_sel_ne_neg1: ; GFX9-GISEL: ; %bb.0: @@ -1546,14 +1532,11 @@ ; VI-NEXT: v_mov_b32_e32 v1, s3 ; VI-NEXT: flat_load_ubyte v2, v[2:3] ; VI-NEXT: flat_load_ubyte v0, v[0:1] -; VI-NEXT: v_mov_b32_e32 v1, 0xffff ; VI-NEXT: s_waitcnt vmcnt(1) -; VI-NEXT: v_lshlrev_b16_e32 v2, 8, v2 +; VI-NEXT: v_lshlrev_b16_e32 v1, 8, v2 ; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_or_b32_e32 v0, v2, v0 +; VI-NEXT: v_or_b32_e32 v0, v1, v0 ; VI-NEXT: v_ffbl_b32_e32 v2, v0 -; VI-NEXT: v_cmp_ne_u16_e32 vcc, 0, v0 -; VI-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: flat_store_short v[0:1], v2 diff --git a/llvm/test/CodeGen/AMDGPU/fshl.ll b/llvm/test/CodeGen/AMDGPU/fshl.ll --- a/llvm/test/CodeGen/AMDGPU/fshl.ll +++ b/llvm/test/CodeGen/AMDGPU/fshl.ll @@ -698,84 +698,64 @@ define amdgpu_kernel void @orxor2or1(ptr addrspace(1) %in, i32 %a, i32 %b) { ; SI-LABEL: orxor2or1: ; SI: ; %bb.0: -; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_load_dword s4, s[0:1], 0xc +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_mov_b32 s4, s0 -; SI-NEXT: s_lshl_b32 s0, s2, 7 -; SI-NEXT: s_or_b32 s0, s3, s0 -; SI-NEXT: s_cmp_eq_u32 s0, 0 -; SI-NEXT: s_cselect_b32 s0, s2, s3 -; SI-NEXT: s_mov_b32 s5, s1 -; SI-NEXT: v_mov_b32_e32 v0, s0 -; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; SI-NEXT: v_mov_b32_e32 v0, s4 +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: orxor2or1: ; VI: ; %bb.0: -; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; VI-NEXT: s_load_dword s0, s[0:1], 0x30 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_lshl_b32 s4, s2, 7 -; VI-NEXT: s_or_b32 s4, s3, s4 -; VI-NEXT: s_cmp_eq_u32 s4, 0 -; VI-NEXT: s_cselect_b32 s2, s2, s3 -; VI-NEXT: v_mov_b32_e32 v0, s0 -; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: v_mov_b32_e32 v2, s2 +; VI-NEXT: v_mov_b32_e32 v0, s2 +; VI-NEXT: v_mov_b32_e32 v1, s3 +; VI-NEXT: v_mov_b32_e32 v2, s0 ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: orxor2or1: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX9-NEXT: s_load_dword s4, s[0:1], 0x30 +; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_lshl_b32 s4, s2, 7 -; GFX9-NEXT: s_or_b32 s4, s3, s4 -; GFX9-NEXT: s_cmp_eq_u32 s4, 0 -; GFX9-NEXT: s_cselect_b32 s2, s2, s3 -; GFX9-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-NEXT: global_store_dword v0, v1, s[0:1] +; GFX9-NEXT: v_mov_b32_e32 v1, s4 +; GFX9-NEXT: global_store_dword v0, v1, s[2:3] ; GFX9-NEXT: s_endpgm ; ; R600-LABEL: orxor2or1: ; R600: ; %bb.0: -; R600-NEXT: ALU 5, @4, KC0[CB0:0-32], KC1[] -; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 +; R600-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] +; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1 ; R600-NEXT: CF_END ; R600-NEXT: PAD ; R600-NEXT: ALU clause starting at 4: -; R600-NEXT: LSHL * T0.W, KC0[2].Z, literal.x, -; R600-NEXT: 7(9.809089e-45), 0(0.000000e+00) -; R600-NEXT: OR_INT * T0.W, KC0[2].W, PV.W, -; R600-NEXT: CNDE_INT T0.X, PV.W, KC0[2].Z, KC0[2].W, -; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; R600-NEXT: LSHR T0.X, KC0[2].Y, literal.x, +; R600-NEXT: MOV * T1.X, KC0[2].W, ; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) ; ; GFX10-LABEL: orxor2or1: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX10-NEXT: s_clause 0x1 +; GFX10-NEXT: s_load_dword s4, s[0:1], 0x30 +; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_lshl_b32 s4, s2, 7 -; GFX10-NEXT: s_or_b32 s4, s3, s4 -; GFX10-NEXT: s_cmp_eq_u32 s4, 0 -; GFX10-NEXT: s_cselect_b32 s2, s2, s3 -; GFX10-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-NEXT: global_store_dword v0, v1, s[0:1] +; GFX10-NEXT: v_mov_b32_e32 v1, s4 +; GFX10-NEXT: global_store_dword v0, v1, s[2:3] ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: orxor2or1: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 +; GFX11-NEXT: s_clause 0x1 +; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x30 +; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: s_lshl_b32 s4, s2, 7 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX11-NEXT: s_or_b32 s4, s3, s4 -; GFX11-NEXT: s_cmp_eq_u32 s4, 0 -; GFX11-NEXT: s_cselect_b32 s2, s2, s3 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) diff --git a/llvm/test/CodeGen/ARM/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/ARM/atomicrmw-uinc-udec-wrap.ll --- a/llvm/test/CodeGen/ARM/atomicrmw-uinc-udec-wrap.ll +++ b/llvm/test/CodeGen/ARM/atomicrmw-uinc-udec-wrap.ll @@ -176,16 +176,12 @@ ; CHECK-NEXT: .LBB7_1: @ %atomicrmw.start ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldrexd r4, r5, [r0] -; CHECK-NEXT: mov r12, #0 ; CHECK-NEXT: subs r1, r2, r4 ; CHECK-NEXT: sbcs r1, r3, r5 -; CHECK-NEXT: orr r1, r4, r5 -; CHECK-NEXT: clz r1, r1 -; CHECK-NEXT: movwlo r12, #1 -; CHECK-NEXT: lsr r1, r1, #5 +; CHECK-NEXT: mov r1, #0 +; CHECK-NEXT: movwlo r1, #1 ; CHECK-NEXT: subs r6, r4, #1 ; CHECK-NEXT: sbc r7, r5, #0 -; CHECK-NEXT: orr r1, r1, r12 ; CHECK-NEXT: cmp r1, #0 ; CHECK-NEXT: movne r7, r3 ; CHECK-NEXT: movne r6, r2 diff --git a/llvm/test/CodeGen/ARM/bfi.ll b/llvm/test/CodeGen/ARM/bfi.ll --- a/llvm/test/CodeGen/ARM/bfi.ll +++ b/llvm/test/CodeGen/ARM/bfi.ll @@ -204,10 +204,9 @@ define i32 @f13(i32 %x, i32 %y) { ; CHECK-LABEL: f13: ; CHECK: @ %bb.0: -; CHECK-NEXT: and r2, r0, #4 -; CHECK-NEXT: bic r0, r1, #255 -; CHECK-NEXT: cmp r2, #42 -; CHECK-NEXT: orrne r0, r0, #16 +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: mov r1, #16 +; CHECK-NEXT: bfi r0, r1, #0, #8 ; CHECK-NEXT: bx lr %y2 = and i32 %y, 4294967040 ; 0xFFFFFF00 %and = and i32 %x, 4 diff --git a/llvm/test/CodeGen/ARM/cmp-peephole.ll b/llvm/test/CodeGen/ARM/cmp-peephole.ll --- a/llvm/test/CodeGen/ARM/cmp-peephole.ll +++ b/llvm/test/CodeGen/ARM/cmp-peephole.ll @@ -137,23 +137,17 @@ define i1 @cmp_ne_zero_or_ri(i32 %a) { ; ARM-LABEL: cmp_ne_zero_or_ri: ; ARM: @ %bb.0: -; ARM-NEXT: orrs r0, r0, #42 -; ARM-NEXT: movwne r0, #1 +; ARM-NEXT: mov r0, #1 ; ARM-NEXT: bx lr ; ; THUMB-LABEL: cmp_ne_zero_or_ri: ; THUMB: @ %bb.0: -; THUMB-NEXT: movs r1, #42 -; THUMB-NEXT: orrs r0, r1 -; THUMB-NEXT: subs r1, r0, #1 -; THUMB-NEXT: sbcs r0, r1 +; THUMB-NEXT: movs r0, #1 ; THUMB-NEXT: bx lr ; ; THUMB2-LABEL: cmp_ne_zero_or_ri: ; THUMB2: @ %bb.0: -; THUMB2-NEXT: orrs r0, r0, #42 -; THUMB2-NEXT: it ne -; THUMB2-NEXT: movne r0, #1 +; THUMB2-NEXT: movs r0, #1 ; THUMB2-NEXT: bx lr %or = or i32 %a, 42 %res = icmp ne i32 %or, 0 @@ -163,24 +157,17 @@ define i1 @cmp_ne_zero_or_rsr(i32 %a, i32 %b, i32 %c) { ; ARM-LABEL: cmp_ne_zero_or_rsr: ; ARM: @ %bb.0: -; ARM-NEXT: orrs r0, r0, r1, lsl r2 -; ARM-NEXT: movwne r0, #1 +; ARM-NEXT: mov r0, #1 ; ARM-NEXT: bx lr ; ; THUMB-LABEL: cmp_ne_zero_or_rsr: ; THUMB: @ %bb.0: -; THUMB-NEXT: lsls r1, r2 -; THUMB-NEXT: orrs r0, r1 -; THUMB-NEXT: subs r1, r0, #1 -; THUMB-NEXT: sbcs r0, r1 +; THUMB-NEXT: movs r0, #1 ; THUMB-NEXT: bx lr ; ; THUMB2-LABEL: cmp_ne_zero_or_rsr: ; THUMB2: @ %bb.0: -; THUMB2-NEXT: lsls r1, r2 -; THUMB2-NEXT: orrs r0, r1 -; THUMB2-NEXT: it ne -; THUMB2-NEXT: movne r0, #1 +; THUMB2-NEXT: movs r0, #1 ; THUMB2-NEXT: bx lr %sh = shl i32 %b, %c %or = or i32 %sh, %a @@ -433,22 +420,17 @@ define i1 @cmp_ne_zero_shl_rr(i32 %a, i32 %b) { ; ARM-LABEL: cmp_ne_zero_shl_rr: ; ARM: @ %bb.0: -; ARM-NEXT: lsls r0, r0, r1 -; ARM-NEXT: movwne r0, #1 +; ARM-NEXT: mov r0, #1 ; ARM-NEXT: bx lr ; ; THUMB-LABEL: cmp_ne_zero_shl_rr: ; THUMB: @ %bb.0: -; THUMB-NEXT: lsls r0, r1 -; THUMB-NEXT: subs r1, r0, #1 -; THUMB-NEXT: sbcs r0, r1 +; THUMB-NEXT: movs r0, #1 ; THUMB-NEXT: bx lr ; ; THUMB2-LABEL: cmp_ne_zero_shl_rr: ; THUMB2: @ %bb.0: -; THUMB2-NEXT: lsls r0, r1 -; THUMB2-NEXT: it ne -; THUMB2-NEXT: movne r0, #1 +; THUMB2-NEXT: movs r0, #1 ; THUMB2-NEXT: bx lr %sh = shl i32 %a, %b %cmp = icmp ne i32 %sh, 0 @@ -458,22 +440,17 @@ define i1 @cmp_ne_zero_shl_ri(i32 %a) { ; ARM-LABEL: cmp_ne_zero_shl_ri: ; ARM: @ %bb.0: -; ARM-NEXT: lsls r0, r0, #7 -; ARM-NEXT: movwne r0, #1 +; ARM-NEXT: mov r0, #1 ; ARM-NEXT: bx lr ; ; THUMB-LABEL: cmp_ne_zero_shl_ri: ; THUMB: @ %bb.0: -; THUMB-NEXT: lsls r0, r0, #7 -; THUMB-NEXT: subs r1, r0, #1 -; THUMB-NEXT: sbcs r0, r1 +; THUMB-NEXT: movs r0, #1 ; THUMB-NEXT: bx lr ; ; THUMB2-LABEL: cmp_ne_zero_shl_ri: ; THUMB2: @ %bb.0: -; THUMB2-NEXT: lsls r0, r0, #7 -; THUMB2-NEXT: it ne -; THUMB2-NEXT: movne r0, #1 +; THUMB2-NEXT: movs r0, #1 ; THUMB2-NEXT: bx lr %sh = shl i32 %a, 7 %cmp = icmp ne i32 %sh, 0 @@ -726,10 +703,7 @@ ; ; THUMB-LABEL: cmp_eq_zero_or_ri: ; THUMB: @ %bb.0: -; THUMB-NEXT: movs r1, #42 -; THUMB-NEXT: orrs r0, r1 -; THUMB-NEXT: rsbs r1, r0, #0 -; THUMB-NEXT: adcs r0, r1 +; THUMB-NEXT: movs r0, #0 ; THUMB-NEXT: bx lr ; ; THUMB2-LABEL: cmp_eq_zero_or_ri: @@ -744,25 +718,17 @@ define i1 @cmp_eq_zero_or_rsr(i32 %a, i32 %b, i32 %c) { ; ARM-LABEL: cmp_eq_zero_or_rsr: ; ARM: @ %bb.0: -; ARM-NEXT: orr r0, r0, r1, lsl r2 -; ARM-NEXT: clz r0, r0 -; ARM-NEXT: lsr r0, r0, #5 +; ARM-NEXT: mov r0, #0 ; ARM-NEXT: bx lr ; ; THUMB-LABEL: cmp_eq_zero_or_rsr: ; THUMB: @ %bb.0: -; THUMB-NEXT: lsls r1, r2 -; THUMB-NEXT: orrs r0, r1 -; THUMB-NEXT: rsbs r1, r0, #0 -; THUMB-NEXT: adcs r0, r1 +; THUMB-NEXT: movs r0, #0 ; THUMB-NEXT: bx lr ; ; THUMB2-LABEL: cmp_eq_zero_or_rsr: ; THUMB2: @ %bb.0: -; THUMB2-NEXT: lsls r1, r2 -; THUMB2-NEXT: orrs r0, r1 -; THUMB2-NEXT: clz r0, r0 -; THUMB2-NEXT: lsrs r0, r0, #5 +; THUMB2-NEXT: movs r0, #0 ; THUMB2-NEXT: bx lr %sh = shl i32 %b, %c %or = or i32 %sh, %a @@ -1024,23 +990,17 @@ define i1 @cmp_eq_zero_shl_rr(i32 %a, i32 %b) { ; ARM-LABEL: cmp_eq_zero_shl_rr: ; ARM: @ %bb.0: -; ARM-NEXT: lsl r0, r0, r1 -; ARM-NEXT: clz r0, r0 -; ARM-NEXT: lsr r0, r0, #5 +; ARM-NEXT: mov r0, #0 ; ARM-NEXT: bx lr ; ; THUMB-LABEL: cmp_eq_zero_shl_rr: ; THUMB: @ %bb.0: -; THUMB-NEXT: lsls r0, r1 -; THUMB-NEXT: rsbs r1, r0, #0 -; THUMB-NEXT: adcs r0, r1 +; THUMB-NEXT: movs r0, #0 ; THUMB-NEXT: bx lr ; ; THUMB2-LABEL: cmp_eq_zero_shl_rr: ; THUMB2: @ %bb.0: -; THUMB2-NEXT: lsls r0, r1 -; THUMB2-NEXT: clz r0, r0 -; THUMB2-NEXT: lsrs r0, r0, #5 +; THUMB2-NEXT: movs r0, #0 ; THUMB2-NEXT: bx lr %sh = shl i32 %a, %b %cmp = icmp eq i32 %sh, 0 @@ -1050,23 +1010,17 @@ define i1 @cmp_eq_zero_shl_ri(i32 %a) { ; ARM-LABEL: cmp_eq_zero_shl_ri: ; ARM: @ %bb.0: -; ARM-NEXT: lsl r0, r0, #7 -; ARM-NEXT: clz r0, r0 -; ARM-NEXT: lsr r0, r0, #5 +; ARM-NEXT: mov r0, #0 ; ARM-NEXT: bx lr ; ; THUMB-LABEL: cmp_eq_zero_shl_ri: ; THUMB: @ %bb.0: -; THUMB-NEXT: lsls r1, r0, #7 -; THUMB-NEXT: rsbs r0, r1, #0 -; THUMB-NEXT: adcs r0, r1 +; THUMB-NEXT: movs r0, #0 ; THUMB-NEXT: bx lr ; ; THUMB2-LABEL: cmp_eq_zero_shl_ri: ; THUMB2: @ %bb.0: -; THUMB2-NEXT: lsls r0, r0, #7 -; THUMB2-NEXT: clz r0, r0 -; THUMB2-NEXT: lsrs r0, r0, #5 +; THUMB2-NEXT: movs r0, #0 ; THUMB2-NEXT: bx lr %sh = shl i32 %a, 7 %cmp = icmp eq i32 %sh, 0 diff --git a/llvm/test/CodeGen/ARM/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll b/llvm/test/CodeGen/ARM/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll --- a/llvm/test/CodeGen/ARM/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll +++ b/llvm/test/CodeGen/ARM/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll @@ -966,48 +966,15 @@ ;------------------------------------------------------------------------------; define i1 @negative_scalar_i8_bitsinmiddle_slt(i8 %x, i8 %y) nounwind { -; ARM6-LABEL: negative_scalar_i8_bitsinmiddle_slt: -; ARM6: @ %bb.0: -; ARM6-NEXT: uxtb r1, r1 -; ARM6-NEXT: mov r2, #24 -; ARM6-NEXT: ands r0, r0, r2, lsr r1 -; ARM6-NEXT: mov r0, #0 -; ARM6-NEXT: movmi r0, #1 -; ARM6-NEXT: bx lr -; -; ARM78-LABEL: negative_scalar_i8_bitsinmiddle_slt: -; ARM78: @ %bb.0: -; ARM78-NEXT: uxtb r1, r1 -; ARM78-NEXT: mov r2, #24 -; ARM78-NEXT: ands r0, r0, r2, lsr r1 -; ARM78-NEXT: mov r0, #0 -; ARM78-NEXT: movwmi r0, #1 -; ARM78-NEXT: bx lr -; -; THUMB6-LABEL: negative_scalar_i8_bitsinmiddle_slt: -; THUMB6: @ %bb.0: -; THUMB6-NEXT: uxtb r1, r1 -; THUMB6-NEXT: movs r2, #24 -; THUMB6-NEXT: lsrs r2, r1 -; THUMB6-NEXT: ands r2, r0 -; THUMB6-NEXT: bmi .LBB20_2 -; THUMB6-NEXT: @ %bb.1: -; THUMB6-NEXT: movs r0, #0 -; THUMB6-NEXT: bx lr -; THUMB6-NEXT: .LBB20_2: -; THUMB6-NEXT: movs r0, #1 -; THUMB6-NEXT: bx lr +; ARM-LABEL: negative_scalar_i8_bitsinmiddle_slt: +; ARM: @ %bb.0: +; ARM-NEXT: mov r0, #0 +; ARM-NEXT: bx lr ; -; THUMB78-LABEL: negative_scalar_i8_bitsinmiddle_slt: -; THUMB78: @ %bb.0: -; THUMB78-NEXT: uxtb r1, r1 -; THUMB78-NEXT: movs r2, #24 -; THUMB78-NEXT: lsr.w r1, r2, r1 -; THUMB78-NEXT: ands r0, r1 -; THUMB78-NEXT: mov.w r0, #0 -; THUMB78-NEXT: it mi -; THUMB78-NEXT: movmi r0, #1 -; THUMB78-NEXT: bx lr +; THUMB-LABEL: negative_scalar_i8_bitsinmiddle_slt: +; THUMB: @ %bb.0: +; THUMB-NEXT: movs r0, #0 +; THUMB-NEXT: bx lr %t0 = lshr i8 24, %y %t1 = and i8 %t0, %x %res = icmp slt i8 %t1, 0 diff --git a/llvm/test/CodeGen/ARM/hoist-and-by-const-from-shl-in-eqcmp-zero.ll b/llvm/test/CodeGen/ARM/hoist-and-by-const-from-shl-in-eqcmp-zero.ll --- a/llvm/test/CodeGen/ARM/hoist-and-by-const-from-shl-in-eqcmp-zero.ll +++ b/llvm/test/CodeGen/ARM/hoist-and-by-const-from-shl-in-eqcmp-zero.ll @@ -1067,23 +1067,10 @@ ; ARM-NEXT: mov r0, #0 ; ARM-NEXT: bx lr ; -; THUMB6-LABEL: scalar_i8_signbit_eq_with_nonzero: -; THUMB6: @ %bb.0: -; THUMB6-NEXT: uxtb r1, r1 -; THUMB6-NEXT: movs r2, #127 -; THUMB6-NEXT: mvns r2, r2 -; THUMB6-NEXT: lsls r2, r1 -; THUMB6-NEXT: ands r2, r0 -; THUMB6-NEXT: uxtb r0, r2 -; THUMB6-NEXT: subs r1, r0, #1 -; THUMB6-NEXT: rsbs r0, r1, #0 -; THUMB6-NEXT: adcs r0, r1 -; THUMB6-NEXT: bx lr -; -; THUMB78-LABEL: scalar_i8_signbit_eq_with_nonzero: -; THUMB78: @ %bb.0: -; THUMB78-NEXT: movs r0, #0 -; THUMB78-NEXT: bx lr +; THUMB-LABEL: scalar_i8_signbit_eq_with_nonzero: +; THUMB: @ %bb.0: +; THUMB-NEXT: movs r0, #0 +; THUMB-NEXT: bx lr %t0 = shl i8 128, %y %t1 = and i8 %t0, %x %res = icmp eq i8 %t1, 1 ; should be comparing with 0 diff --git a/llvm/test/CodeGen/ARM/icmp-shift-opt.ll b/llvm/test/CodeGen/ARM/icmp-shift-opt.ll --- a/llvm/test/CodeGen/ARM/icmp-shift-opt.ll +++ b/llvm/test/CodeGen/ARM/icmp-shift-opt.ll @@ -54,9 +54,7 @@ define i1 @opt_setcc_shl_eq_zero(i64 %a) nounwind { ; CHECK-LABEL: opt_setcc_shl_eq_zero: ; CHECK: @ %bb.0: -; CHECK-NEXT: orr r0, r0, r1, lsl #17 -; CHECK-NEXT: clz r0, r0 -; CHECK-NEXT: lsr r0, r0, #5 +; CHECK-NEXT: mov r0, #0 ; CHECK-NEXT: bx lr %shl = shl i64 %a, 17 %cmp = icmp eq i64 %shl, 0 @@ -66,8 +64,7 @@ define i1 @opt_setcc_shl_ne_zero(i64 %a) nounwind { ; CHECK-LABEL: opt_setcc_shl_ne_zero: ; CHECK: @ %bb.0: -; CHECK-NEXT: orrs r0, r0, r1, lsl #17 -; CHECK-NEXT: movwne r0, #1 +; CHECK-NEXT: mov r0, #1 ; CHECK-NEXT: bx lr %shl = shl i64 %a, 17 %cmp = icmp ne i64 %shl, 0 @@ -78,17 +75,13 @@ define i1 @opt_setcc_shl_eq_zero_multiple_shl_users(i64 %a) nounwind { ; CHECK-LABEL: opt_setcc_shl_eq_zero_multiple_shl_users: ; CHECK: @ %bb.0: -; CHECK-NEXT: push {r4, r5, r11, lr} -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: lsl r0, r1, #17 -; CHECK-NEXT: orr r5, r0, r4, lsr #15 -; CHECK-NEXT: lsl r0, r4, #17 -; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: lsl r1, r1, #17 +; CHECK-NEXT: orr r1, r1, r0, lsr #15 +; CHECK-NEXT: lsl r0, r0, #17 ; CHECK-NEXT: bl use -; CHECK-NEXT: orr r0, r5, r4, lsl #17 -; CHECK-NEXT: clz r0, r0 -; CHECK-NEXT: lsr r0, r0, #5 -; CHECK-NEXT: pop {r4, r5, r11, pc} +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: pop {r11, pc} %shl = shl i64 %a, 17 %cmp = icmp eq i64 %shl, 0 call void @use(i64 %shl) @@ -100,9 +93,7 @@ define i1 @opt_setcc_expanded_shl_correct_shifts(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: opt_setcc_expanded_shl_correct_shifts: ; CHECK: @ %bb.0: -; CHECK-NEXT: orr r0, r1, r0, lsl #17 -; CHECK-NEXT: clz r0, r0 -; CHECK-NEXT: lsr r0, r0, #5 +; CHECK-NEXT: mov r0, #0 ; CHECK-NEXT: bx lr %shl.a = shl i32 %a, 17 %srl.b = lshr i32 %b, 15 @@ -118,11 +109,7 @@ define i1 @opt_setcc_expanded_shl_wrong_shifts(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: opt_setcc_expanded_shl_wrong_shifts: ; CHECK: @ %bb.0: -; CHECK-NEXT: lsl r0, r0, #17 -; CHECK-NEXT: orr r0, r0, r1, lsr #15 -; CHECK-NEXT: orr r0, r0, r1, lsl #18 -; CHECK-NEXT: clz r0, r0 -; CHECK-NEXT: lsr r0, r0, #5 +; CHECK-NEXT: mov r0, #0 ; CHECK-NEXT: bx lr %shl.a = shl i32 %a, 17 %srl.b = lshr i32 %b, 15 @@ -136,13 +123,7 @@ define i1 @opt_setcc_shl_ne_zero_i128(i128 %a) nounwind { ; CHECK-LABEL: opt_setcc_shl_ne_zero_i128: ; CHECK: @ %bb.0: -; CHECK-NEXT: orr r3, r1, r3 -; CHECK-NEXT: orr r0, r2, r0 -; CHECK-NEXT: orr r2, r0, r3 -; CHECK-NEXT: orr r0, r0, r1 -; CHECK-NEXT: lsr r0, r0, #15 -; CHECK-NEXT: orrs r0, r0, r2, lsl #17 -; CHECK-NEXT: movwne r0, #1 +; CHECK-NEXT: mov r0, #1 ; CHECK-NEXT: bx lr %shl = shl i128 %a, 17 %cmp = icmp ne i128 %shl, 0 diff --git a/llvm/test/CodeGen/Hexagon/vect/zext-v4i1.ll b/llvm/test/CodeGen/Hexagon/vect/zext-v4i1.ll --- a/llvm/test/CodeGen/Hexagon/vect/zext-v4i1.ll +++ b/llvm/test/CodeGen/Hexagon/vect/zext-v4i1.ll @@ -10,35 +10,10 @@ ; CHECK-LABEL: fred: ; CHECK: // %bb.0: // %b0 ; CHECK-NEXT: { -; CHECK-NEXT: if (p0) jump:nt .LBB0_2 -; CHECK-NEXT: } -; CHECK-NEXT: // %bb.1: // %b2 -; CHECK-NEXT: { -; CHECK-NEXT: r3:2 = combine(#0,#0) -; CHECK-NEXT: r1:0 = memd(r0+#0) -; CHECK-NEXT: } -; CHECK-NEXT: { -; CHECK-NEXT: p0 = vcmph.eq(r1:0,r3:2) -; CHECK-NEXT: } -; CHECK-NEXT: { -; CHECK-NEXT: r1:0 = mask(p0) -; CHECK-NEXT: } -; CHECK-NEXT: { -; CHECK-NEXT: r0 = and(r0,#1) -; CHECK-NEXT: } -; CHECK-NEXT: { -; CHECK-NEXT: p0 = cmp.eq(r0,#11) -; CHECK-NEXT: r0 = #1 -; CHECK-NEXT: } -; CHECK-NEXT: { +; CHECK-NEXT: if (!p0) r0 = #1 ; CHECK-NEXT: if (p0) r0 = #0 ; CHECK-NEXT: jumpr r31 ; CHECK-NEXT: } -; CHECK-NEXT: .LBB0_2: // %b14 -; CHECK-NEXT: { -; CHECK-NEXT: r0 = #0 -; CHECK-NEXT: jumpr r31 -; CHECK-NEXT: } b0: switch i32 undef, label %b14 [ i32 5, label %b2 diff --git a/llvm/test/CodeGen/PowerPC/p10-handle-split-promote-vec.ll b/llvm/test/CodeGen/PowerPC/p10-handle-split-promote-vec.ll --- a/llvm/test/CodeGen/PowerPC/p10-handle-split-promote-vec.ll +++ b/llvm/test/CodeGen/PowerPC/p10-handle-split-promote-vec.ll @@ -8,193 +8,10 @@ define i32 @SplitPromoteVectorTest(i32 %Opc) align 2 { ; CHECK-LABEL: SplitPromoteVectorTest: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: plxv v2, .LCPI0_0@PCREL(0), 1 -; CHECK-NEXT: plxv v4, .LCPI0_1@PCREL(0), 1 -; CHECK-NEXT: mtvsrws v3, r3 -; CHECK-NEXT: li r5, 12 -; CHECK-NEXT: li r8, 0 -; CHECK-NEXT: vcmpequw v2, v3, v2 -; CHECK-NEXT: plxv v5, .LCPI0_2@PCREL(0), 1 -; CHECK-NEXT: vcmpequw v4, v3, v4 -; CHECK-NEXT: vcmpequw v5, v3, v5 -; CHECK-NEXT: vextubrx r4, r5, v2 -; CHECK-NEXT: vextubrx r6, r5, v4 -; CHECK-NEXT: or r9, r6, r4 -; CHECK-NEXT: li r6, 4 -; CHECK-NEXT: vextubrx r4, r8, v5 -; CHECK-NEXT: vextubrx r7, r6, v5 -; CHECK-NEXT: rlwimi r4, r7, 1, 30, 30 -; CHECK-NEXT: li r7, 8 -; CHECK-NEXT: vextubrx r10, r7, v5 -; CHECK-NEXT: rlwimi r4, r10, 2, 29, 29 -; CHECK-NEXT: vextubrx r10, r5, v5 -; CHECK-NEXT: plxv v5, .LCPI0_3@PCREL(0), 1 -; CHECK-NEXT: rlwimi r4, r10, 3, 28, 28 -; CHECK-NEXT: vcmpequw v5, v3, v5 -; CHECK-NEXT: vextubrx r10, r8, v5 -; CHECK-NEXT: rlwimi r4, r10, 4, 27, 27 -; CHECK-NEXT: vextubrx r10, r6, v5 -; CHECK-NEXT: rlwimi r4, r10, 5, 26, 26 -; CHECK-NEXT: vextubrx r10, r7, v5 -; CHECK-NEXT: rlwimi r4, r10, 6, 25, 25 -; CHECK-NEXT: vextubrx r10, r5, v5 -; CHECK-NEXT: plxv v5, .LCPI0_4@PCREL(0), 1 -; CHECK-NEXT: rlwimi r4, r10, 7, 24, 24 -; CHECK-NEXT: vcmpequw v5, v3, v5 -; CHECK-NEXT: vextubrx r10, r8, v5 -; CHECK-NEXT: rlwimi r4, r10, 8, 23, 23 -; CHECK-NEXT: vextubrx r10, r6, v5 -; CHECK-NEXT: rlwimi r4, r10, 9, 22, 22 -; CHECK-NEXT: vextubrx r10, r7, v5 -; CHECK-NEXT: rlwimi r4, r10, 10, 21, 21 -; CHECK-NEXT: vextubrx r10, r5, v5 -; CHECK-NEXT: rlwimi r4, r10, 11, 20, 20 -; CHECK-NEXT: vextubrx r10, r8, v4 -; CHECK-NEXT: rlwimi r4, r10, 12, 19, 19 -; CHECK-NEXT: vextubrx r10, r6, v4 -; CHECK-NEXT: rlwimi r4, r10, 13, 18, 18 -; CHECK-NEXT: vextubrx r10, r7, v4 -; CHECK-NEXT: plxv v4, .LCPI0_5@PCREL(0), 1 -; CHECK-NEXT: rlwimi r4, r10, 14, 17, 17 -; CHECK-NEXT: rlwimi r4, r9, 15, 0, 16 -; CHECK-NEXT: vcmpequw v4, v3, v4 -; CHECK-NEXT: vextubrx r10, r8, v4 -; CHECK-NEXT: vextubrx r9, r6, v4 -; CHECK-NEXT: clrlwi r10, r10, 31 -; CHECK-NEXT: rlwimi r10, r9, 1, 30, 30 -; CHECK-NEXT: vextubrx r9, r7, v4 -; CHECK-NEXT: rlwimi r10, r9, 2, 29, 29 -; CHECK-NEXT: vextubrx r9, r5, v4 -; CHECK-NEXT: plxv v4, .LCPI0_6@PCREL(0), 1 -; CHECK-NEXT: rlwimi r10, r9, 3, 28, 28 -; CHECK-NEXT: vcmpequw v4, v3, v4 -; CHECK-NEXT: vextubrx r9, r8, v4 -; CHECK-NEXT: rlwimi r10, r9, 4, 27, 27 -; CHECK-NEXT: vextubrx r9, r6, v4 -; CHECK-NEXT: rlwimi r10, r9, 5, 26, 26 -; CHECK-NEXT: vextubrx r9, r7, v4 -; CHECK-NEXT: rlwimi r10, r9, 6, 25, 25 -; CHECK-NEXT: vextubrx r9, r5, v4 -; CHECK-NEXT: plxv v4, .LCPI0_7@PCREL(0), 1 -; CHECK-NEXT: rlwimi r10, r9, 7, 24, 24 -; CHECK-NEXT: vcmpequw v3, v3, v4 -; CHECK-NEXT: vextubrx r9, r8, v3 -; CHECK-NEXT: vextubrx r5, r5, v3 -; CHECK-NEXT: rlwimi r10, r9, 8, 23, 23 -; CHECK-NEXT: vextubrx r9, r6, v3 -; CHECK-NEXT: rlwimi r10, r9, 9, 22, 22 -; CHECK-NEXT: vextubrx r9, r7, v3 -; CHECK-NEXT: rlwimi r10, r9, 10, 21, 21 -; CHECK-NEXT: rlwimi r10, r5, 11, 20, 20 -; CHECK-NEXT: vextubrx r5, r8, v2 -; CHECK-NEXT: rlwimi r10, r5, 12, 19, 19 -; CHECK-NEXT: vextubrx r5, r6, v2 -; CHECK-NEXT: rlwimi r10, r5, 13, 18, 18 -; CHECK-NEXT: vextubrx r5, r7, v2 -; CHECK-NEXT: rlwimi r10, r5, 14, 17, 17 -; CHECK-NEXT: or r4, r4, r10 -; CHECK-NEXT: andi. r4, r4, 65535 -; CHECK-NEXT: iseleq r3, 0, r3 ; CHECK-NEXT: blr ; ; CHECK-AIX-LABEL: SplitPromoteVectorTest: ; CHECK-AIX: # %bb.0: # %entry -; CHECK-AIX-NEXT: ld 4, L..C0(2) # %const.0 -; CHECK-AIX-NEXT: mtvsrws 34, 3 -; CHECK-AIX-NEXT: li 8, 15 -; CHECK-AIX-NEXT: li 5, 11 -; CHECK-AIX-NEXT: lxv 35, 0(4) -; CHECK-AIX-NEXT: vcmpequw 3, 2, 3 -; CHECK-AIX-NEXT: vextublx 4, 8, 3 -; CHECK-AIX-NEXT: vextublx 6, 5, 3 -; CHECK-AIX-NEXT: clrlwi 4, 4, 31 -; CHECK-AIX-NEXT: rlwimi 4, 6, 1, 30, 30 -; CHECK-AIX-NEXT: li 6, 7 -; CHECK-AIX-NEXT: vextublx 7, 6, 3 -; CHECK-AIX-NEXT: rlwimi 4, 7, 2, 29, 29 -; CHECK-AIX-NEXT: li 7, 3 -; CHECK-AIX-NEXT: vextublx 9, 7, 3 -; CHECK-AIX-NEXT: rlwimi 4, 9, 3, 28, 28 -; CHECK-AIX-NEXT: ld 9, L..C1(2) # %const.1 -; CHECK-AIX-NEXT: lxv 35, 0(9) -; CHECK-AIX-NEXT: vcmpequw 3, 2, 3 -; CHECK-AIX-NEXT: vextublx 9, 8, 3 -; CHECK-AIX-NEXT: rlwimi 4, 9, 4, 27, 27 -; CHECK-AIX-NEXT: vextublx 9, 5, 3 -; CHECK-AIX-NEXT: rlwimi 4, 9, 5, 26, 26 -; CHECK-AIX-NEXT: vextublx 9, 6, 3 -; CHECK-AIX-NEXT: rlwimi 4, 9, 6, 25, 25 -; CHECK-AIX-NEXT: vextublx 9, 7, 3 -; CHECK-AIX-NEXT: rlwimi 4, 9, 7, 24, 24 -; CHECK-AIX-NEXT: ld 9, L..C2(2) # %const.2 -; CHECK-AIX-NEXT: lxv 35, 0(9) -; CHECK-AIX-NEXT: vcmpequw 3, 2, 3 -; CHECK-AIX-NEXT: vextublx 9, 8, 3 -; CHECK-AIX-NEXT: rlwimi 4, 9, 8, 23, 23 -; CHECK-AIX-NEXT: vextublx 9, 5, 3 -; CHECK-AIX-NEXT: rlwimi 4, 9, 9, 22, 22 -; CHECK-AIX-NEXT: vextublx 9, 6, 3 -; CHECK-AIX-NEXT: rlwimi 4, 9, 10, 21, 21 -; CHECK-AIX-NEXT: vextublx 9, 7, 3 -; CHECK-AIX-NEXT: rlwimi 4, 9, 11, 20, 20 -; CHECK-AIX-NEXT: ld 9, L..C3(2) # %const.3 -; CHECK-AIX-NEXT: lxv 35, 0(9) -; CHECK-AIX-NEXT: vcmpequw 3, 2, 3 -; CHECK-AIX-NEXT: vextublx 9, 8, 3 -; CHECK-AIX-NEXT: rlwimi 4, 9, 12, 19, 19 -; CHECK-AIX-NEXT: vextublx 9, 5, 3 -; CHECK-AIX-NEXT: rlwimi 4, 9, 13, 18, 18 -; CHECK-AIX-NEXT: vextublx 9, 6, 3 -; CHECK-AIX-NEXT: rlwimi 4, 9, 14, 17, 17 -; CHECK-AIX-NEXT: vextublx 9, 7, 3 -; CHECK-AIX-NEXT: rlwimi 4, 9, 15, 16, 16 -; CHECK-AIX-NEXT: ld 9, L..C4(2) # %const.4 -; CHECK-AIX-NEXT: lxv 35, 0(9) -; CHECK-AIX-NEXT: vcmpequw 3, 2, 3 -; CHECK-AIX-NEXT: vextublx 9, 8, 3 -; CHECK-AIX-NEXT: vextublx 10, 5, 3 -; CHECK-AIX-NEXT: clrlwi 9, 9, 31 -; CHECK-AIX-NEXT: rlwimi 9, 10, 1, 30, 30 -; CHECK-AIX-NEXT: vextublx 10, 6, 3 -; CHECK-AIX-NEXT: rlwimi 9, 10, 2, 29, 29 -; CHECK-AIX-NEXT: vextublx 10, 7, 3 -; CHECK-AIX-NEXT: rlwimi 9, 10, 3, 28, 28 -; CHECK-AIX-NEXT: ld 10, L..C5(2) # %const.5 -; CHECK-AIX-NEXT: lxv 35, 0(10) -; CHECK-AIX-NEXT: vcmpequw 3, 2, 3 -; CHECK-AIX-NEXT: vextublx 10, 8, 3 -; CHECK-AIX-NEXT: rlwimi 9, 10, 4, 27, 27 -; CHECK-AIX-NEXT: vextublx 10, 5, 3 -; CHECK-AIX-NEXT: rlwimi 9, 10, 5, 26, 26 -; CHECK-AIX-NEXT: vextublx 10, 6, 3 -; CHECK-AIX-NEXT: rlwimi 9, 10, 6, 25, 25 -; CHECK-AIX-NEXT: vextublx 10, 7, 3 -; CHECK-AIX-NEXT: rlwimi 9, 10, 7, 24, 24 -; CHECK-AIX-NEXT: ld 10, L..C6(2) # %const.6 -; CHECK-AIX-NEXT: lxv 35, 0(10) -; CHECK-AIX-NEXT: vcmpequw 3, 2, 3 -; CHECK-AIX-NEXT: vextublx 10, 8, 3 -; CHECK-AIX-NEXT: rlwimi 9, 10, 8, 23, 23 -; CHECK-AIX-NEXT: vextublx 10, 5, 3 -; CHECK-AIX-NEXT: rlwimi 9, 10, 9, 22, 22 -; CHECK-AIX-NEXT: vextublx 10, 6, 3 -; CHECK-AIX-NEXT: rlwimi 9, 10, 10, 21, 21 -; CHECK-AIX-NEXT: vextublx 10, 7, 3 -; CHECK-AIX-NEXT: rlwimi 9, 10, 11, 20, 20 -; CHECK-AIX-NEXT: ld 10, L..C7(2) # %const.7 -; CHECK-AIX-NEXT: lxv 35, 0(10) -; CHECK-AIX-NEXT: vcmpequw 2, 2, 3 -; CHECK-AIX-NEXT: vextublx 8, 8, 2 -; CHECK-AIX-NEXT: vextublx 5, 5, 2 -; CHECK-AIX-NEXT: rlwimi 9, 8, 12, 19, 19 -; CHECK-AIX-NEXT: rlwimi 9, 5, 13, 18, 18 -; CHECK-AIX-NEXT: vextublx 5, 6, 2 -; CHECK-AIX-NEXT: rlwimi 9, 5, 14, 17, 17 -; CHECK-AIX-NEXT: vextublx 5, 7, 2 -; CHECK-AIX-NEXT: rlwimi 9, 5, 15, 16, 16 -; CHECK-AIX-NEXT: or 4, 9, 4 -; CHECK-AIX-NEXT: andi. 4, 4, 65535 -; CHECK-AIX-NEXT: iseleq 3, 0, 3 ; CHECK-AIX-NEXT: blr entry: %0 = insertelement <32 x i32> poison, i32 %Opc, i64 0 diff --git a/llvm/test/CodeGen/RISCV/div-pow2.ll b/llvm/test/CodeGen/RISCV/div-pow2.ll --- a/llvm/test/CodeGen/RISCV/div-pow2.ll +++ b/llvm/test/CodeGen/RISCV/div-pow2.ll @@ -209,12 +209,10 @@ ; RV32I-NEXT: sltu a0, a2, a0 ; RV32I-NEXT: add a1, a1, a0 ; RV32I-NEXT: slli a0, a1, 31 -; RV32I-NEXT: or a3, a3, a0 -; RV32I-NEXT: neg a0, a3 -; RV32I-NEXT: snez a2, a3 +; RV32I-NEXT: or a0, a3, a0 +; RV32I-NEXT: neg a0, a0 ; RV32I-NEXT: srai a1, a1, 1 -; RV32I-NEXT: neg a1, a1 -; RV32I-NEXT: sub a1, a1, a2 +; RV32I-NEXT: not a1, a1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: sdiv64_pow2_negative_2: @@ -265,12 +263,10 @@ ; RV32I-NEXT: sltu a0, a2, a0 ; RV32I-NEXT: add a1, a1, a0 ; RV32I-NEXT: slli a0, a1, 21 -; RV32I-NEXT: or a3, a3, a0 -; RV32I-NEXT: neg a0, a3 -; RV32I-NEXT: snez a2, a3 +; RV32I-NEXT: or a0, a3, a0 +; RV32I-NEXT: neg a0, a0 ; RV32I-NEXT: srai a1, a1, 11 -; RV32I-NEXT: neg a1, a1 -; RV32I-NEXT: sub a1, a1, a2 +; RV32I-NEXT: not a1, a1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: sdiv64_pow2_negative_2048: @@ -322,12 +318,10 @@ ; RV32I-NEXT: sltu a0, a2, a0 ; RV32I-NEXT: add a1, a1, a0 ; RV32I-NEXT: slli a0, a1, 20 -; RV32I-NEXT: or a3, a3, a0 -; RV32I-NEXT: neg a0, a3 -; RV32I-NEXT: snez a2, a3 +; RV32I-NEXT: or a0, a3, a0 +; RV32I-NEXT: neg a0, a0 ; RV32I-NEXT: srai a1, a1, 12 -; RV32I-NEXT: neg a1, a1 -; RV32I-NEXT: sub a1, a1, a2 +; RV32I-NEXT: not a1, a1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: sdiv64_pow2_negative_4096: @@ -379,12 +373,10 @@ ; RV32I-NEXT: sltu a0, a2, a0 ; RV32I-NEXT: add a1, a1, a0 ; RV32I-NEXT: slli a0, a1, 16 -; RV32I-NEXT: or a3, a3, a0 -; RV32I-NEXT: neg a0, a3 -; RV32I-NEXT: snez a2, a3 +; RV32I-NEXT: or a0, a3, a0 +; RV32I-NEXT: neg a0, a0 ; RV32I-NEXT: srai a1, a1, 16 -; RV32I-NEXT: neg a1, a1 -; RV32I-NEXT: sub a1, a1, a2 +; RV32I-NEXT: not a1, a1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: sdiv64_pow2_negative_65536: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-asm.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-asm.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-asm.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-asm.ll @@ -923,9 +923,9 @@ define void @gather_no_scalar_remainder(ptr noalias nocapture noundef %arg, ptr noalias nocapture noundef readonly %arg1, i64 noundef %arg2) { ; CHECK-LABEL: gather_no_scalar_remainder: ; CHECK: # %bb.0: # %bb -; CHECK-NEXT: slli a2, a2, 4 -; CHECK-NEXT: beqz a2, .LBB14_3 +; CHECK-NEXT: bnez zero, .LBB14_3 ; CHECK-NEXT: # %bb.1: # %bb2 +; CHECK-NEXT: slli a2, a2, 4 ; CHECK-NEXT: li a3, 5 ; CHECK-NEXT: vsetivli zero, 16, e8, mf2, ta, ma ; CHECK-NEXT: .LBB14_2: # %bb4 diff --git a/llvm/test/CodeGen/RISCV/sextw-removal.ll b/llvm/test/CodeGen/RISCV/sextw-removal.ll --- a/llvm/test/CodeGen/RISCV/sextw-removal.ll +++ b/llvm/test/CodeGen/RISCV/sextw-removal.ll @@ -409,14 +409,17 @@ ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 0(sp) # 8-byte Folded Spill ; CHECK-NEXT: sraw a0, a0, a1 +; CHECK-NEXT: li s0, 1 ; CHECK-NEXT: .LBB7_1: # %bb2 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: call foo@plt ; CHECK-NEXT: ori a0, a0, -256 -; CHECK-NEXT: bnez a0, .LBB7_1 +; CHECK-NEXT: bnez s0, .LBB7_1 ; CHECK-NEXT: # %bb.2: # %bb7 ; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 0(sp) # 8-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret ; @@ -424,15 +427,18 @@ ; NOREMOVAL: # %bb.0: # %bb ; NOREMOVAL-NEXT: addi sp, sp, -16 ; NOREMOVAL-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; NOREMOVAL-NEXT: sd s0, 0(sp) # 8-byte Folded Spill ; NOREMOVAL-NEXT: sraw a0, a0, a1 +; NOREMOVAL-NEXT: li s0, 1 ; NOREMOVAL-NEXT: .LBB7_1: # %bb2 ; NOREMOVAL-NEXT: # =>This Inner Loop Header: Depth=1 ; NOREMOVAL-NEXT: sext.w a0, a0 ; NOREMOVAL-NEXT: call foo@plt ; NOREMOVAL-NEXT: ori a0, a0, -256 -; NOREMOVAL-NEXT: bnez a0, .LBB7_1 +; NOREMOVAL-NEXT: bnez s0, .LBB7_1 ; NOREMOVAL-NEXT: # %bb.2: # %bb7 ; NOREMOVAL-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; NOREMOVAL-NEXT: ld s0, 0(sp) # 8-byte Folded Reload ; NOREMOVAL-NEXT: addi sp, sp, 16 ; NOREMOVAL-NEXT: ret bb: diff --git a/llvm/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll b/llvm/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll --- a/llvm/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll +++ b/llvm/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll @@ -4,25 +4,18 @@ define signext i16 @f(ptr %bp, ptr %ss) { ; CHECK-LABEL: f: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushl %esi -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: .cfi_offset %esi, -8 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movb $1, %cl ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_1: # %cond_next127 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: movl (%eax), %edx -; CHECK-NEXT: movl (%ecx), %esi ; CHECK-NEXT: andl $15, %edx -; CHECK-NEXT: andl $15, %esi -; CHECK-NEXT: addl %esi, (%ecx) -; CHECK-NEXT: cmpl $63, %edx -; CHECK-NEXT: jb .LBB0_1 +; CHECK-NEXT: addl %edx, (%eax) +; CHECK-NEXT: testb %cl, %cl +; CHECK-NEXT: jne .LBB0_1 ; CHECK-NEXT: # %bb.2: # %UnifiedReturnBlock ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: popl %esi -; CHECK-NEXT: .cfi_def_cfa_offset 4 ; CHECK-NEXT: retl entry: br label %cond_next127 diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll --- a/llvm/test/CodeGen/X86/avx512-mask-op.ll +++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll @@ -609,49 +609,16 @@ } define void @test7(<8 x i1> %mask) { -; KNL-LABEL: test7: -; KNL: ## %bb.0: ## %allocas -; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 -; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 -; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: orb $85, %al -; KNL-NEXT: vzeroupper -; KNL-NEXT: retq -; -; SKX-LABEL: test7: -; SKX: ## %bb.0: ## %allocas -; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 -; SKX-NEXT: vpmovw2m %xmm0, %k0 -; SKX-NEXT: kmovd %k0, %eax -; SKX-NEXT: orb $85, %al -; SKX-NEXT: retq -; -; AVX512BW-LABEL: test7: -; AVX512BW: ## %bb.0: ## %allocas -; AVX512BW-NEXT: vpsllw $15, %xmm0, %xmm0 -; AVX512BW-NEXT: vpmovw2m %zmm0, %k0 -; AVX512BW-NEXT: kmovd %k0, %eax -; AVX512BW-NEXT: orb $85, %al -; AVX512BW-NEXT: vzeroupper -; AVX512BW-NEXT: retq -; -; AVX512DQ-LABEL: test7: -; AVX512DQ: ## %bb.0: ## %allocas -; AVX512DQ-NEXT: vpmovsxwq %xmm0, %zmm0 -; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0 -; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0 -; AVX512DQ-NEXT: kmovw %k0, %eax -; AVX512DQ-NEXT: orb $85, %al -; AVX512DQ-NEXT: vzeroupper -; AVX512DQ-NEXT: retq +; CHECK-LABEL: test7: +; CHECK: ## %bb.0: ## %allocas +; CHECK-NEXT: movb $1, %al +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: retq ; ; X86-LABEL: test7: ; X86: ## %bb.0: ## %allocas -; X86-NEXT: vpsllw $15, %xmm0, %xmm0 -; X86-NEXT: vpmovw2m %xmm0, %k0 -; X86-NEXT: kmovd %k0, %eax -; X86-NEXT: orb $85, %al +; X86-NEXT: movb $1, %al +; X86-NEXT: testb %al, %al ; X86-NEXT: retl allocas: %a= or <8 x i1> %mask, @@ -2227,16 +2194,9 @@ ; ; KNL-LABEL: ktest_2: ; KNL: ## %bb.0: -; KNL-NEXT: vcmpgtps (%rdi), %zmm0, %k1 -; KNL-NEXT: vcmpgtps 64(%rdi), %zmm1, %k2 -; KNL-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z} -; KNL-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z} -; KNL-NEXT: vcmpltps %zmm3, %zmm0, %k0 -; KNL-NEXT: vcmpltps %zmm2, %zmm1, %k3 -; KNL-NEXT: korw %k3, %k2, %k2 -; KNL-NEXT: korw %k0, %k1, %k0 -; KNL-NEXT: kortestw %k2, %k0 -; KNL-NEXT: je LBB45_2 +; KNL-NEXT: xorl %eax, %eax +; KNL-NEXT: testb %al, %al +; KNL-NEXT: jne LBB45_2 ; KNL-NEXT: ## %bb.1: ## %L1 ; KNL-NEXT: vmovaps %zmm0, (%rdi) ; KNL-NEXT: vmovaps %zmm1, 64(%rdi) @@ -2296,16 +2256,9 @@ ; ; AVX512DQ-LABEL: ktest_2: ; AVX512DQ: ## %bb.0: -; AVX512DQ-NEXT: vcmpgtps (%rdi), %zmm0, %k1 -; AVX512DQ-NEXT: vcmpgtps 64(%rdi), %zmm1, %k2 -; AVX512DQ-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z} -; AVX512DQ-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z} -; AVX512DQ-NEXT: vcmpltps %zmm3, %zmm0, %k0 -; AVX512DQ-NEXT: vcmpltps %zmm2, %zmm1, %k3 -; AVX512DQ-NEXT: korw %k3, %k2, %k2 -; AVX512DQ-NEXT: korw %k0, %k1, %k0 -; AVX512DQ-NEXT: kortestw %k2, %k0 -; AVX512DQ-NEXT: je LBB45_2 +; AVX512DQ-NEXT: xorl %eax, %eax +; AVX512DQ-NEXT: testb %al, %al +; AVX512DQ-NEXT: jne LBB45_2 ; AVX512DQ-NEXT: ## %bb.1: ## %L1 ; AVX512DQ-NEXT: vmovaps %zmm0, (%rdi) ; AVX512DQ-NEXT: vmovaps %zmm1, 64(%rdi) @@ -4713,39 +4666,14 @@ define void @ktest_6(<32 x i16> %w, <32 x i16> %x, <32 x i16> %y, <32 x i16> %z) { ; KNL-LABEL: ktest_6: ; KNL: ## %bb.0: -; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm4 -; KNL-NEXT: vpxor %xmm5, %xmm5, %xmm5 -; KNL-NEXT: vpcmpeqw %ymm5, %ymm4, %ymm4 -; KNL-NEXT: vpcmpeqw %ymm5, %ymm0, %ymm0 -; KNL-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm0 -; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; KNL-NEXT: vpcmpeqw %ymm5, %ymm4, %ymm4 -; KNL-NEXT: vpcmpeqw %ymm5, %ymm1, %ymm1 -; KNL-NEXT: vinserti64x4 $1, %ymm4, %zmm1, %zmm1 -; KNL-NEXT: vporq %zmm1, %zmm0, %zmm0 -; KNL-NEXT: vextracti64x4 $1, %zmm2, %ymm1 -; KNL-NEXT: vpcmpeqw %ymm5, %ymm1, %ymm1 -; KNL-NEXT: vpcmpeqw %ymm5, %ymm2, %ymm2 -; KNL-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm1 -; KNL-NEXT: vextracti64x4 $1, %zmm3, %ymm2 -; KNL-NEXT: vpcmpeqw %ymm5, %ymm2, %ymm2 -; KNL-NEXT: vpcmpeqw %ymm5, %ymm3, %ymm3 -; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; KNL-NEXT: vpternlogq $200, %zmm1, %zmm0, %zmm2 -; KNL-NEXT: vextracti64x4 $1, %zmm2, %ymm0 -; KNL-NEXT: vpor %ymm0, %ymm2, %ymm0 -; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 -; KNL-NEXT: vpslld $31, %zmm0, %zmm0 -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kortestw %k0, %k0 +; KNL-NEXT: movb $1, %al +; KNL-NEXT: testb %al, %al ; KNL-NEXT: je LBB77_1 ; KNL-NEXT: ## %bb.2: ## %exit -; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; KNL-NEXT: LBB77_1: ## %bar ; KNL-NEXT: pushq %rax ; KNL-NEXT: .cfi_def_cfa_offset 16 -; KNL-NEXT: vzeroupper ; KNL-NEXT: callq _foo ; KNL-NEXT: addq $8, %rsp ; KNL-NEXT: retq @@ -4794,39 +4722,14 @@ ; ; AVX512DQ-LABEL: ktest_6: ; AVX512DQ: ## %bb.0: -; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm4 -; AVX512DQ-NEXT: vpxor %xmm5, %xmm5, %xmm5 -; AVX512DQ-NEXT: vpcmpeqw %ymm5, %ymm4, %ymm4 -; AVX512DQ-NEXT: vpcmpeqw %ymm5, %ymm0, %ymm0 -; AVX512DQ-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm0 -; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512DQ-NEXT: vpcmpeqw %ymm5, %ymm4, %ymm4 -; AVX512DQ-NEXT: vpcmpeqw %ymm5, %ymm1, %ymm1 -; AVX512DQ-NEXT: vinserti64x4 $1, %ymm4, %zmm1, %zmm1 -; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0 -; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm1 -; AVX512DQ-NEXT: vpcmpeqw %ymm5, %ymm1, %ymm1 -; AVX512DQ-NEXT: vpcmpeqw %ymm5, %ymm2, %ymm2 -; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm1 -; AVX512DQ-NEXT: vextracti64x4 $1, %zmm3, %ymm2 -; AVX512DQ-NEXT: vpcmpeqw %ymm5, %ymm2, %ymm2 -; AVX512DQ-NEXT: vpcmpeqw %ymm5, %ymm3, %ymm3 -; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512DQ-NEXT: vpternlogq $200, %zmm1, %zmm0, %zmm2 -; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm0 -; AVX512DQ-NEXT: vpor %ymm0, %ymm2, %ymm0 -; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0 -; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 -; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 -; AVX512DQ-NEXT: kortestw %k0, %k0 +; AVX512DQ-NEXT: movb $1, %al +; AVX512DQ-NEXT: testb %al, %al ; AVX512DQ-NEXT: je LBB77_1 ; AVX512DQ-NEXT: ## %bb.2: ## %exit -; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; AVX512DQ-NEXT: LBB77_1: ## %bar ; AVX512DQ-NEXT: pushq %rax ; AVX512DQ-NEXT: .cfi_def_cfa_offset 16 -; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: callq _foo ; AVX512DQ-NEXT: addq $8, %rsp ; AVX512DQ-NEXT: retq @@ -4873,37 +4776,14 @@ define void @ktest_7(<64 x i8> %w, <64 x i8> %x, <64 x i8> %y, <64 x i8> %z) { ; KNL-LABEL: ktest_7: ; KNL: ## %bb.0: -; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm4 -; KNL-NEXT: vpxor %xmm5, %xmm5, %xmm5 -; KNL-NEXT: vpcmpeqb %ymm5, %ymm4, %ymm4 -; KNL-NEXT: vpcmpeqb %ymm5, %ymm0, %ymm0 -; KNL-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm0 -; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; KNL-NEXT: vpcmpeqb %ymm5, %ymm4, %ymm4 -; KNL-NEXT: vpcmpeqb %ymm5, %ymm1, %ymm1 -; KNL-NEXT: vinserti64x4 $1, %ymm4, %zmm1, %zmm1 -; KNL-NEXT: vporq %zmm1, %zmm0, %zmm0 -; KNL-NEXT: vextracti64x4 $1, %zmm2, %ymm1 -; KNL-NEXT: vpcmpeqb %ymm5, %ymm1, %ymm1 -; KNL-NEXT: vpcmpeqb %ymm5, %ymm2, %ymm2 -; KNL-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm1 -; KNL-NEXT: vextracti64x4 $1, %zmm3, %ymm2 -; KNL-NEXT: vpcmpeqb %ymm5, %ymm2, %ymm2 -; KNL-NEXT: vpcmpeqb %ymm5, %ymm3, %ymm3 -; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; KNL-NEXT: vpternlogq $200, %zmm1, %zmm0, %zmm2 -; KNL-NEXT: vextracti64x4 $1, %zmm2, %ymm0 -; KNL-NEXT: vpor %ymm0, %ymm2, %ymm0 -; KNL-NEXT: vpmovmskb %ymm0, %eax -; KNL-NEXT: testl %eax, %eax +; KNL-NEXT: movb $1, %al +; KNL-NEXT: testb %al, %al ; KNL-NEXT: je LBB78_1 ; KNL-NEXT: ## %bb.2: ## %exit -; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; KNL-NEXT: LBB78_1: ## %bar ; KNL-NEXT: pushq %rax ; KNL-NEXT: .cfi_def_cfa_offset 16 -; KNL-NEXT: vzeroupper ; KNL-NEXT: callq _foo ; KNL-NEXT: addq $8, %rsp ; KNL-NEXT: retq @@ -4952,37 +4832,14 @@ ; ; AVX512DQ-LABEL: ktest_7: ; AVX512DQ: ## %bb.0: -; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm4 -; AVX512DQ-NEXT: vpxor %xmm5, %xmm5, %xmm5 -; AVX512DQ-NEXT: vpcmpeqb %ymm5, %ymm4, %ymm4 -; AVX512DQ-NEXT: vpcmpeqb %ymm5, %ymm0, %ymm0 -; AVX512DQ-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm0 -; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512DQ-NEXT: vpcmpeqb %ymm5, %ymm4, %ymm4 -; AVX512DQ-NEXT: vpcmpeqb %ymm5, %ymm1, %ymm1 -; AVX512DQ-NEXT: vinserti64x4 $1, %ymm4, %zmm1, %zmm1 -; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0 -; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm1 -; AVX512DQ-NEXT: vpcmpeqb %ymm5, %ymm1, %ymm1 -; AVX512DQ-NEXT: vpcmpeqb %ymm5, %ymm2, %ymm2 -; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm1 -; AVX512DQ-NEXT: vextracti64x4 $1, %zmm3, %ymm2 -; AVX512DQ-NEXT: vpcmpeqb %ymm5, %ymm2, %ymm2 -; AVX512DQ-NEXT: vpcmpeqb %ymm5, %ymm3, %ymm3 -; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512DQ-NEXT: vpternlogq $200, %zmm1, %zmm0, %zmm2 -; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm0 -; AVX512DQ-NEXT: vpor %ymm0, %ymm2, %ymm0 -; AVX512DQ-NEXT: vpmovmskb %ymm0, %eax -; AVX512DQ-NEXT: testl %eax, %eax +; AVX512DQ-NEXT: movb $1, %al +; AVX512DQ-NEXT: testb %al, %al ; AVX512DQ-NEXT: je LBB78_1 ; AVX512DQ-NEXT: ## %bb.2: ## %exit -; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; AVX512DQ-NEXT: LBB78_1: ## %bar ; AVX512DQ-NEXT: pushq %rax ; AVX512DQ-NEXT: .cfi_def_cfa_offset 16 -; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: callq _foo ; AVX512DQ-NEXT: addq $8, %rsp ; AVX512DQ-NEXT: retq diff --git a/llvm/test/CodeGen/X86/cmp-concat.ll b/llvm/test/CodeGen/X86/cmp-concat.ll --- a/llvm/test/CodeGen/X86/cmp-concat.ll +++ b/llvm/test/CodeGen/X86/cmp-concat.ll @@ -19,8 +19,7 @@ define i1 @cmp_anybits_concat_i32(i32 %x, i32 %y) { ; CHECK-LABEL: cmp_anybits_concat_i32: ; CHECK: # %bb.0: -; CHECK-NEXT: orl %esi, %edi -; CHECK-NEXT: setne %al +; CHECK-NEXT: movb $1, %al ; CHECK-NEXT: retq %zx = zext i32 %x to i64 %zy = zext i32 %y to i64 @@ -33,11 +32,7 @@ define i1 @cmp_anybits_concat_shl_shl_i16(i16 %x, i16 %y) { ; CHECK-LABEL: cmp_anybits_concat_shl_shl_i16: ; CHECK: # %bb.0: -; CHECK-NEXT: movzwl %di, %eax -; CHECK-NEXT: movzwl %si, %ecx -; CHECK-NEXT: shlq $8, %rcx -; CHECK-NEXT: orq %rax, %rcx -; CHECK-NEXT: sete %al +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: retq %zx = zext i16 %x to i64 %zy = zext i16 %y to i64 @@ -51,11 +46,7 @@ define i1 @cmp_anybits_concat_shl_shl_i16_commute(i16 %x, i16 %y) { ; CHECK-LABEL: cmp_anybits_concat_shl_shl_i16_commute: ; CHECK: # %bb.0: -; CHECK-NEXT: movzwl %di, %eax -; CHECK-NEXT: movzwl %si, %ecx -; CHECK-NEXT: shlq $8, %rcx -; CHECK-NEXT: orq %rax, %rcx -; CHECK-NEXT: sete %al +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: retq %zx = zext i16 %x to i64 %zy = zext i16 %y to i64 @@ -93,21 +84,7 @@ define <2 x i64> @cmp_nobits_concat_v2i64(<2 x i64> %x, <2 x i64> %y) { ; CHECK-LABEL: cmp_nobits_concat_v2i64: ; CHECK: # %bb.0: -; CHECK-NEXT: movq %xmm0, %rax -; CHECK-NEXT: pextrq $1, %xmm0, %rcx -; CHECK-NEXT: movq %xmm1, %rdx -; CHECK-NEXT: pextrq $1, %xmm1, %rsi -; CHECK-NEXT: xorl %edi, %edi -; CHECK-NEXT: orq %rcx, %rsi -; CHECK-NEXT: sete %dil -; CHECK-NEXT: negq %rdi -; CHECK-NEXT: movq %rdi, %xmm1 -; CHECK-NEXT: xorl %ecx, %ecx -; CHECK-NEXT: orq %rax, %rdx -; CHECK-NEXT: sete %cl -; CHECK-NEXT: negq %rcx -; CHECK-NEXT: movq %rcx, %xmm0 -; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: xorps %xmm0, %xmm0 ; CHECK-NEXT: retq %zx = zext <2 x i64> %x to <2 x i128> %zy = zext <2 x i64> %y to <2 x i128> diff --git a/llvm/test/CodeGen/X86/cmp.ll b/llvm/test/CodeGen/X86/cmp.ll --- a/llvm/test/CodeGen/X86/cmp.ll +++ b/llvm/test/CodeGen/X86/cmp.ll @@ -31,9 +31,9 @@ define i32 @test2(i32 %X, ptr %y) nounwind { ; CHECK-LABEL: test2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: testl $536870911, (%rsi) # encoding: [0xf7,0x06,0xff,0xff,0xff,0x1f] -; CHECK-NEXT: # imm = 0x1FFFFFFF -; CHECK-NEXT: je .LBB1_2 # encoding: [0x74,A] +; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; CHECK-NEXT: testb %al, %al # encoding: [0x84,0xc0] +; CHECK-NEXT: jne .LBB1_2 # encoding: [0x75,A] ; CHECK-NEXT: # fixup A - offset: 1, value: .LBB1_2-1, kind: FK_PCRel_1 ; CHECK-NEXT: # %bb.1: # %cond_true ; CHECK-NEXT: movl $1, %eax # encoding: [0xb8,0x01,0x00,0x00,0x00] @@ -57,8 +57,9 @@ define i8 @test2b(i8 %X, ptr %y) nounwind { ; CHECK-LABEL: test2b: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: testb $31, (%rsi) # encoding: [0xf6,0x06,0x1f] -; CHECK-NEXT: je .LBB2_2 # encoding: [0x74,A] +; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; CHECK-NEXT: testb %al, %al # encoding: [0x84,0xc0] +; CHECK-NEXT: jne .LBB2_2 # encoding: [0x75,A] ; CHECK-NEXT: # fixup A - offset: 1, value: .LBB2_2-1, kind: FK_PCRel_1 ; CHECK-NEXT: # %bb.1: # %cond_true ; CHECK-NEXT: movb $1, %al # encoding: [0xb0,0x01] @@ -280,9 +281,7 @@ define i32 @test14(i32 %mask, i32 %base, i32 %intra) { ; CHECK-LABEL: test14: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %esi, %eax # encoding: [0x89,0xf0] -; CHECK-NEXT: shrl $7, %edi # encoding: [0xc1,0xef,0x07] -; CHECK-NEXT: cmovnsl %edx, %eax # encoding: [0x0f,0x49,0xc2] +; CHECK-NEXT: movl %edx, %eax # encoding: [0x89,0xd0] ; CHECK-NEXT: retq # encoding: [0xc3] %s = lshr i32 %mask, 7 %tobool = icmp sgt i32 %s, -1 @@ -359,17 +358,9 @@ define void @test20(i32 %bf.load, i8 %x1, ptr %b_addr) { ; CHECK-LABEL: test20: ; CHECK: # %bb.0: -; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] -; CHECK-NEXT: testl $16777215, %edi # encoding: [0xf7,0xc7,0xff,0xff,0xff,0x00] -; CHECK-NEXT: # imm = 0xFFFFFF -; CHECK-NEXT: setne %al # encoding: [0x0f,0x95,0xc0] -; CHECK-NEXT: movzbl %sil, %ecx # encoding: [0x40,0x0f,0xb6,0xce] -; CHECK-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1] -; CHECK-NEXT: setne (%rdx) # encoding: [0x0f,0x95,0x02] -; CHECK-NEXT: testl $16777215, %edi # encoding: [0xf7,0xc7,0xff,0xff,0xff,0x00] -; CHECK-NEXT: # imm = 0xFFFFFF -; CHECK-NEXT: setne d(%rip) # encoding: [0x0f,0x95,0x05,A,A,A,A] -; CHECK-NEXT: # fixup A - offset: 3, value: d-4, kind: reloc_riprel_4byte +; CHECK-NEXT: movb $1, (%rdx) # encoding: [0xc6,0x02,0x01] +; CHECK-NEXT: movb $1, d(%rip) # encoding: [0xc6,0x05,A,A,A,A,0x01] +; CHECK-NEXT: # fixup A - offset: 2, value: d-5, kind: reloc_riprel_4byte ; CHECK-NEXT: retq # encoding: [0xc3] %bf.shl = shl i32 %bf.load, 8 %bf.ashr = ashr exact i32 %bf.shl, 8 diff --git a/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll b/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll --- a/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll +++ b/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll @@ -33,21 +33,17 @@ ; CHECK-NEXT: movl (%rsi), %esi ; CHECK-NEXT: movq l@GOTPCREL(%rip), %r8 ; CHECK-NEXT: movl (%r8), %r8d -; CHECK-NEXT: movl %r8d, %r9d -; CHECK-NEXT: shll $7, %r9d -; CHECK-NEXT: sarl $7, %r9d -; CHECK-NEXT: negl %r9d +; CHECK-NEXT: shll $7, %r8d +; CHECK-NEXT: sarl $7, %r8d +; CHECK-NEXT: negl %r8d ; CHECK-NEXT: testl %esi, %esi -; CHECK-NEXT: cmovel %esi, %r9d -; CHECK-NEXT: movzwl %dx, %r10d -; CHECK-NEXT: leal (%rcx,%r10,2), %ecx +; CHECK-NEXT: cmovel %esi, %r8d +; CHECK-NEXT: movzwl %dx, %r9d +; CHECK-NEXT: leal (%rcx,%r9,2), %ecx ; CHECK-NEXT: addl %edi, %ecx -; CHECK-NEXT: cmpl %r9d, %ecx +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: cmpl %r8d, %ecx ; CHECK-NEXT: sete %dil -; CHECK-NEXT: testl $33554431, %r8d # imm = 0x1FFFFFF -; CHECK-NEXT: sete %r8b -; CHECK-NEXT: orb %dil, %r8b -; CHECK-NEXT: movzbl %r8b, %edi ; CHECK-NEXT: movq e@GOTPCREL(%rip), %r8 ; CHECK-NEXT: movw %di, (%r8) ; CHECK-NEXT: notl %ecx diff --git a/llvm/test/CodeGen/X86/fold-rmw-ops.ll b/llvm/test/CodeGen/X86/fold-rmw-ops.ll --- a/llvm/test/CodeGen/X86/fold-rmw-ops.ll +++ b/llvm/test/CodeGen/X86/fold-rmw-ops.ll @@ -1352,9 +1352,11 @@ define void @or64_imm32_br() nounwind { ; CHECK-LABEL: or64_imm32_br: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: orq $16777215, g64(%rip) # encoding: [0x48,0x81,0x0d,A,A,A,A,0xff,0xff,0xff,0x00] -; CHECK-NEXT: # fixup A - offset: 3, value: g64-8, kind: reloc_riprel_4byte +; CHECK-NEXT: orl $16777215, g64(%rip) # encoding: [0x81,0x0d,A,A,A,A,0xff,0xff,0xff,0x00] +; CHECK-NEXT: # fixup A - offset: 2, value: g64-8, kind: reloc_riprel_4byte ; CHECK-NEXT: # imm = 0xFFFFFF +; CHECK-NEXT: movb $1, %al # encoding: [0xb0,0x01] +; CHECK-NEXT: testb %al, %al # encoding: [0x84,0xc0] ; CHECK-NEXT: jne b # TAILCALL ; CHECK-NEXT: # encoding: [0x75,A] ; CHECK-NEXT: # fixup A - offset: 1, value: b-1, kind: FK_PCRel_1 @@ -1385,6 +1387,8 @@ ; CHECK-NEXT: orq $-2147483648, g64(%rip) # encoding: [0x48,0x81,0x0d,A,A,A,A,0x00,0x00,0x00,0x80] ; CHECK-NEXT: # fixup A - offset: 3, value: g64-8, kind: reloc_riprel_4byte ; CHECK-NEXT: # imm = 0x80000000 +; CHECK-NEXT: movb $1, %al # encoding: [0xb0,0x01] +; CHECK-NEXT: testb %al, %al # encoding: [0x84,0xc0] ; CHECK-NEXT: jne b # TAILCALL ; CHECK-NEXT: # encoding: [0x75,A] ; CHECK-NEXT: # fixup A - offset: 1, value: b-1, kind: FK_PCRel_1 @@ -1412,8 +1416,10 @@ define void @or64_imm8_br() nounwind { ; CHECK-LABEL: or64_imm8_br: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: orq $15, g64(%rip) # encoding: [0x48,0x83,0x0d,A,A,A,A,0x0f] -; CHECK-NEXT: # fixup A - offset: 3, value: g64-5, kind: reloc_riprel_4byte +; CHECK-NEXT: orb $15, g64(%rip) # encoding: [0x80,0x0d,A,A,A,A,0x0f] +; CHECK-NEXT: # fixup A - offset: 2, value: g64-5, kind: reloc_riprel_4byte +; CHECK-NEXT: movb $1, %al # encoding: [0xb0,0x01] +; CHECK-NEXT: testb %al, %al # encoding: [0x84,0xc0] ; CHECK-NEXT: jne b # TAILCALL ; CHECK-NEXT: # encoding: [0x75,A] ; CHECK-NEXT: # fixup A - offset: 1, value: b-1, kind: FK_PCRel_1 @@ -1442,6 +1448,8 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: orq $-4, g64(%rip) # encoding: [0x48,0x83,0x0d,A,A,A,A,0xfc] ; CHECK-NEXT: # fixup A - offset: 3, value: g64-5, kind: reloc_riprel_4byte +; CHECK-NEXT: movb $1, %al # encoding: [0xb0,0x01] +; CHECK-NEXT: testb %al, %al # encoding: [0x84,0xc0] ; CHECK-NEXT: jne b # TAILCALL ; CHECK-NEXT: # encoding: [0x75,A] ; CHECK-NEXT: # fixup A - offset: 1, value: b-1, kind: FK_PCRel_1 @@ -1468,9 +1476,10 @@ define void @or32_imm_br() nounwind { ; CHECK-LABEL: or32_imm_br: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: orl $-2147483648, g32(%rip) # encoding: [0x81,0x0d,A,A,A,A,0x00,0x00,0x00,0x80] -; CHECK-NEXT: # fixup A - offset: 2, value: g32-8, kind: reloc_riprel_4byte -; CHECK-NEXT: # imm = 0x80000000 +; CHECK-NEXT: orb $-128, g32+3(%rip) # encoding: [0x80,0x0d,A,A,A,A,0x80] +; CHECK-NEXT: # fixup A - offset: 2, value: (g32+3)-5, kind: reloc_riprel_4byte +; CHECK-NEXT: movb $1, %al # encoding: [0xb0,0x01] +; CHECK-NEXT: testb %al, %al # encoding: [0x84,0xc0] ; CHECK-NEXT: jne b # TAILCALL ; CHECK-NEXT: # encoding: [0x75,A] ; CHECK-NEXT: # fixup A - offset: 1, value: b-1, kind: FK_PCRel_1 @@ -1498,8 +1507,10 @@ define void @or32_imm8_br() nounwind { ; CHECK-LABEL: or32_imm8_br: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: orl $15, g32(%rip) # encoding: [0x83,0x0d,A,A,A,A,0x0f] +; CHECK-NEXT: orb $15, g32(%rip) # encoding: [0x80,0x0d,A,A,A,A,0x0f] ; CHECK-NEXT: # fixup A - offset: 2, value: g32-5, kind: reloc_riprel_4byte +; CHECK-NEXT: movb $1, %al # encoding: [0xb0,0x01] +; CHECK-NEXT: testb %al, %al # encoding: [0x84,0xc0] ; CHECK-NEXT: jne b # TAILCALL ; CHECK-NEXT: # encoding: [0x75,A] ; CHECK-NEXT: # fixup A - offset: 1, value: b-1, kind: FK_PCRel_1 @@ -1528,6 +1539,8 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: orl $-4, g32(%rip) # encoding: [0x83,0x0d,A,A,A,A,0xfc] ; CHECK-NEXT: # fixup A - offset: 2, value: g32-5, kind: reloc_riprel_4byte +; CHECK-NEXT: movb $1, %al # encoding: [0xb0,0x01] +; CHECK-NEXT: testb %al, %al # encoding: [0x84,0xc0] ; CHECK-NEXT: jne b # TAILCALL ; CHECK-NEXT: # encoding: [0x75,A] ; CHECK-NEXT: # fixup A - offset: 1, value: b-1, kind: FK_PCRel_1 @@ -1554,9 +1567,10 @@ define void @or16_imm_br() nounwind { ; CHECK-LABEL: or16_imm_br: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: orw $-32768, g16(%rip) # encoding: [0x66,0x81,0x0d,A,A,A,A,0x00,0x80] -; CHECK-NEXT: # fixup A - offset: 3, value: g16-6, kind: reloc_riprel_4byte -; CHECK-NEXT: # imm = 0x8000 +; CHECK-NEXT: orb $-128, g16+1(%rip) # encoding: [0x80,0x0d,A,A,A,A,0x80] +; CHECK-NEXT: # fixup A - offset: 2, value: (g16+1)-5, kind: reloc_riprel_4byte +; CHECK-NEXT: movb $1, %al # encoding: [0xb0,0x01] +; CHECK-NEXT: testb %al, %al # encoding: [0x84,0xc0] ; CHECK-NEXT: jne b # TAILCALL ; CHECK-NEXT: # encoding: [0x75,A] ; CHECK-NEXT: # fixup A - offset: 1, value: b-1, kind: FK_PCRel_1 @@ -1583,8 +1597,10 @@ define void @or16_imm8_br() nounwind { ; CHECK-LABEL: or16_imm8_br: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: orw $15, g16(%rip) # encoding: [0x66,0x83,0x0d,A,A,A,A,0x0f] -; CHECK-NEXT: # fixup A - offset: 3, value: g16-5, kind: reloc_riprel_4byte +; CHECK-NEXT: orb $15, g16(%rip) # encoding: [0x80,0x0d,A,A,A,A,0x0f] +; CHECK-NEXT: # fixup A - offset: 2, value: g16-5, kind: reloc_riprel_4byte +; CHECK-NEXT: movb $1, %al # encoding: [0xb0,0x01] +; CHECK-NEXT: testb %al, %al # encoding: [0x84,0xc0] ; CHECK-NEXT: jne b # TAILCALL ; CHECK-NEXT: # encoding: [0x75,A] ; CHECK-NEXT: # fixup A - offset: 1, value: b-1, kind: FK_PCRel_1 @@ -1613,6 +1629,8 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: orw $-4, g16(%rip) # encoding: [0x66,0x83,0x0d,A,A,A,A,0xfc] ; CHECK-NEXT: # fixup A - offset: 3, value: g16-5, kind: reloc_riprel_4byte +; CHECK-NEXT: movb $1, %al # encoding: [0xb0,0x01] +; CHECK-NEXT: testb %al, %al # encoding: [0x84,0xc0] ; CHECK-NEXT: jne b # TAILCALL ; CHECK-NEXT: # encoding: [0x75,A] ; CHECK-NEXT: # fixup A - offset: 1, value: b-1, kind: FK_PCRel_1 @@ -1641,6 +1659,8 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: orb $-4, g8(%rip) # encoding: [0x80,0x0d,A,A,A,A,0xfc] ; CHECK-NEXT: # fixup A - offset: 2, value: g8-5, kind: reloc_riprel_4byte +; CHECK-NEXT: movb $1, %al # encoding: [0xb0,0x01] +; CHECK-NEXT: testb %al, %al # encoding: [0x84,0xc0] ; CHECK-NEXT: jne b # TAILCALL ; CHECK-NEXT: # encoding: [0x75,A] ; CHECK-NEXT: # fixup A - offset: 1, value: b-1, kind: FK_PCRel_1 diff --git a/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll b/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll --- a/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll +++ b/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll @@ -792,23 +792,12 @@ define i1 @scalar_i8_signbit_eq_with_nonzero(i8 %x, i8 %y) nounwind { ; X86-LABEL: scalar_i8_signbit_eq_with_nonzero: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movb $-128, %al -; X86-NEXT: shlb %cl, %al -; X86-NEXT: andb {{[0-9]+}}(%esp), %al -; X86-NEXT: cmpb $1, %al -; X86-NEXT: sete %al +; X86-NEXT: xorl %eax, %eax ; X86-NEXT: retl ; ; X64-LABEL: scalar_i8_signbit_eq_with_nonzero: ; X64: # %bb.0: -; X64-NEXT: movl %esi, %ecx -; X64-NEXT: movb $-128, %al -; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shlb %cl, %al -; X64-NEXT: andb %dil, %al -; X64-NEXT: cmpb $1, %al -; X64-NEXT: sete %al +; X64-NEXT: xorl %eax, %eax ; X64-NEXT: retq %t0 = shl i8 128, %y %t1 = and i8 %t0, %x diff --git a/llvm/test/CodeGen/X86/icmp-shift-opt.ll b/llvm/test/CodeGen/X86/icmp-shift-opt.ll --- a/llvm/test/CodeGen/X86/icmp-shift-opt.ll +++ b/llvm/test/CodeGen/X86/icmp-shift-opt.ll @@ -123,20 +123,12 @@ define i1 @opt_setcc_shl_eq_zero(i128 %a) nounwind { ; X86-LABEL: opt_setcc_shl_eq_zero: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: shll $17, %ecx -; X86-NEXT: orl {{[0-9]+}}(%esp), %eax -; X86-NEXT: orl {{[0-9]+}}(%esp), %eax -; X86-NEXT: orl %ecx, %eax -; X86-NEXT: sete %al +; X86-NEXT: xorl %eax, %eax ; X86-NEXT: retl ; ; X64-LABEL: opt_setcc_shl_eq_zero: ; X64: # %bb.0: -; X64-NEXT: shlq $17, %rsi -; X64-NEXT: orq %rdi, %rsi -; X64-NEXT: sete %al +; X64-NEXT: xorl %eax, %eax ; X64-NEXT: retq %shl = shl i128 %a, 17 %cmp = icmp eq i128 %shl, 0 @@ -146,20 +138,12 @@ define i1 @opt_setcc_shl_ne_zero(i128 %a) nounwind { ; X86-LABEL: opt_setcc_shl_ne_zero: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: shll $17, %ecx -; X86-NEXT: orl {{[0-9]+}}(%esp), %eax -; X86-NEXT: orl {{[0-9]+}}(%esp), %eax -; X86-NEXT: orl %ecx, %eax -; X86-NEXT: setne %al +; X86-NEXT: movb $1, %al ; X86-NEXT: retl ; ; X64-LABEL: opt_setcc_shl_ne_zero: ; X64: # %bb.0: -; X64-NEXT: shlq $17, %rsi -; X64-NEXT: orq %rdi, %rsi -; X64-NEXT: setne %al +; X64-NEXT: movb $1, %al ; X64-NEXT: retq %shl = shl i128 %a, 17 %cmp = icmp ne i128 %shl, 0 @@ -170,8 +154,6 @@ define i1 @opt_setcc_shl_eq_zero_multiple_shl_users(i128 %a) nounwind { ; X86-LABEL: opt_setcc_shl_eq_zero_multiple_shl_users: ; X86: # %bb.0: -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx @@ -181,35 +163,24 @@ ; X86-NEXT: shldl $17, %ecx, %edx ; X86-NEXT: shldl $17, %eax, %ecx ; X86-NEXT: shll $17, %eax -; X86-NEXT: movl %ecx, %edi -; X86-NEXT: orl %esi, %edi -; X86-NEXT: movl %eax, %ebx -; X86-NEXT: orl %edx, %ebx -; X86-NEXT: orl %edi, %ebx -; X86-NEXT: sete %bl ; X86-NEXT: pushl %esi ; X86-NEXT: pushl %edx ; X86-NEXT: pushl %ecx ; X86-NEXT: pushl %eax ; X86-NEXT: calll use@PLT ; X86-NEXT: addl $16, %esp -; X86-NEXT: movl %ebx, %eax +; X86-NEXT: xorl %eax, %eax ; X86-NEXT: popl %esi -; X86-NEXT: popl %edi -; X86-NEXT: popl %ebx ; X86-NEXT: retl ; ; X64-LABEL: opt_setcc_shl_eq_zero_multiple_shl_users: ; X64: # %bb.0: -; X64-NEXT: pushq %rbx +; X64-NEXT: pushq %rax ; X64-NEXT: shldq $17, %rdi, %rsi ; X64-NEXT: shlq $17, %rdi -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: orq %rsi, %rax -; X64-NEXT: sete %bl ; X64-NEXT: callq use@PLT -; X64-NEXT: movl %ebx, %eax -; X64-NEXT: popq %rbx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: popq %rcx ; X64-NEXT: retq %shl = shl i128 %a, 17 %cmp = icmp eq i128 %shl, 0 @@ -222,20 +193,12 @@ define i1 @opt_setcc_expanded_shl_correct_shifts(i64 %a, i64 %b) nounwind { ; X86-LABEL: opt_setcc_expanded_shl_correct_shifts: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: orl {{[0-9]+}}(%esp), %eax -; X86-NEXT: orl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: orl %eax, %ecx -; X86-NEXT: shldl $17, %eax, %ecx -; X86-NEXT: sete %al +; X86-NEXT: xorl %eax, %eax ; X86-NEXT: retl ; ; X64-LABEL: opt_setcc_expanded_shl_correct_shifts: ; X64: # %bb.0: -; X64-NEXT: shlq $17, %rdi -; X64-NEXT: orq %rsi, %rdi -; X64-NEXT: sete %al +; X64-NEXT: xorl %eax, %eax ; X64-NEXT: retq %shl.a = shl i64 %a, 17 %srl.b = lshr i64 %b, 47 @@ -251,28 +214,12 @@ define i1 @opt_setcc_expanded_shl_wrong_shifts(i64 %a, i64 %b) nounwind { ; X86-LABEL: opt_setcc_expanded_shl_wrong_shifts: ; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: shldl $17, %edx, %esi -; X86-NEXT: shldl $17, %ecx, %edx -; X86-NEXT: shldl $18, %eax, %ecx -; X86-NEXT: shll $18, %eax -; X86-NEXT: orl %edx, %eax -; X86-NEXT: orl %esi, %ecx -; X86-NEXT: orl %eax, %ecx -; X86-NEXT: sete %al -; X86-NEXT: popl %esi +; X86-NEXT: xorl %eax, %eax ; X86-NEXT: retl ; ; X64-LABEL: opt_setcc_expanded_shl_wrong_shifts: ; X64: # %bb.0: -; X64-NEXT: shldq $17, %rsi, %rdi -; X64-NEXT: shlq $18, %rsi -; X64-NEXT: orq %rdi, %rsi -; X64-NEXT: sete %al +; X64-NEXT: xorl %eax, %eax ; X64-NEXT: retq %shl.a = shl i64 %a, 17 %srl.b = lshr i64 %b, 47 diff --git a/llvm/test/CodeGen/X86/legalize-shift.ll b/llvm/test/CodeGen/X86/legalize-shift.ll --- a/llvm/test/CodeGen/X86/legalize-shift.ll +++ b/llvm/test/CodeGen/X86/legalize-shift.ll @@ -5,14 +5,12 @@ define void @PR36250() nounwind { ; X86-LABEL: PR36250: ; X86: # %bb.0: -; X86-NEXT: cmpl $0, (%eax) -; X86-NEXT: sete (%eax) +; X86-NEXT: movb $0, (%eax) ; X86-NEXT: retl ; ; X64-LABEL: PR36250: ; X64: # %bb.0: -; X64-NEXT: cmpq $0, (%rax) -; X64-NEXT: sete (%rax) +; X64-NEXT: movb $0, (%rax) ; X64-NEXT: retq %1 = load i448, ptr undef %2 = sub i448 0, %1 diff --git a/llvm/test/CodeGen/X86/movmsk-cmp.ll b/llvm/test/CodeGen/X86/movmsk-cmp.ll --- a/llvm/test/CodeGen/X86/movmsk-cmp.ll +++ b/llvm/test/CodeGen/X86/movmsk-cmp.ll @@ -89,20 +89,12 @@ define i1 @allzeros_v32i8_sign(<32 x i8> %arg) { ; SSE-LABEL: allzeros_v32i8_sign: ; SSE: # %bb.0: -; SSE-NEXT: por %xmm1, %xmm0 -; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: testl %eax, %eax -; SSE-NEXT: sete %al +; SSE-NEXT: xorl %eax, %eax ; SSE-NEXT: retq ; ; AVX1-LABEL: allzeros_v32i8_sign: ; AVX1: # %bb.0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: testl %eax, %eax -; AVX1-NEXT: sete %al -; AVX1-NEXT: vzeroupper +; AVX1-NEXT: xorl %eax, %eax ; AVX1-NEXT: retq ; ; AVX2-LABEL: allzeros_v32i8_sign: @@ -185,44 +177,17 @@ define i1 @allzeros_v64i8_sign(<64 x i8> %arg) { ; SSE-LABEL: allzeros_v64i8_sign: ; SSE: # %bb.0: -; SSE-NEXT: por %xmm3, %xmm1 -; SSE-NEXT: por %xmm2, %xmm0 -; SSE-NEXT: por %xmm1, %xmm0 -; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: testl %eax, %eax -; SSE-NEXT: sete %al +; SSE-NEXT: xorl %eax, %eax ; SSE-NEXT: retq ; -; AVX1-LABEL: allzeros_v64i8_sign: -; AVX1: # %bb.0: -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 -; AVX1-NEXT: vpor %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: testl %eax, %eax -; AVX1-NEXT: sete %al -; AVX1-NEXT: vzeroupper -; AVX1-NEXT: retq -; -; AVX2-LABEL: allzeros_v64i8_sign: -; AVX2: # %bb.0: -; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpmovmskb %ymm0, %eax -; AVX2-NEXT: testl %eax, %eax -; AVX2-NEXT: sete %al -; AVX2-NEXT: vzeroupper -; AVX2-NEXT: retq +; AVX1OR2-LABEL: allzeros_v64i8_sign: +; AVX1OR2: # %bb.0: +; AVX1OR2-NEXT: xorl %eax, %eax +; AVX1OR2-NEXT: retq ; ; KNL-LABEL: allzeros_v64i8_sign: ; KNL: # %bb.0: -; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; KNL-NEXT: vpor %ymm1, %ymm0, %ymm0 -; KNL-NEXT: vpmovmskb %ymm0, %eax -; KNL-NEXT: testl %eax, %eax -; KNL-NEXT: sete %al -; KNL-NEXT: vzeroupper +; KNL-NEXT: xorl %eax, %eax ; KNL-NEXT: retq ; ; SKX-LABEL: allzeros_v64i8_sign: @@ -488,25 +453,12 @@ define i1 @allzeros_v32i16_sign(<32 x i16> %arg) { ; SSE-LABEL: allzeros_v32i16_sign: ; SSE: # %bb.0: -; SSE-NEXT: packsswb %xmm3, %xmm2 -; SSE-NEXT: packsswb %xmm1, %xmm0 -; SSE-NEXT: por %xmm2, %xmm0 -; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: testl %eax, %eax -; SSE-NEXT: sete %al +; SSE-NEXT: xorl %eax, %eax ; SSE-NEXT: retq ; ; AVX1-LABEL: allzeros_v32i16_sign: ; AVX1: # %bb.0: -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vpacksswb %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: testl %eax, %eax -; AVX1-NEXT: sete %al -; AVX1-NEXT: vzeroupper +; AVX1-NEXT: xorl %eax, %eax ; AVX1-NEXT: retq ; ; AVX2-LABEL: allzeros_v32i16_sign: @@ -520,16 +472,7 @@ ; ; KNL-LABEL: allzeros_v32i16_sign: ; KNL: # %bb.0: -; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; KNL-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1 -; KNL-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0 -; KNL-NEXT: vpor %ymm1, %ymm0, %ymm0 -; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kortestw %k0, %k0 -; KNL-NEXT: sete %al -; KNL-NEXT: vzeroupper +; KNL-NEXT: xorl %eax, %eax ; KNL-NEXT: retq ; ; SKX-LABEL: allzeros_v32i16_sign: diff --git a/llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll b/llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll --- a/llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll +++ b/llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll @@ -64,7 +64,7 @@ ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: shll %cl, %edi ; CHECK-NEXT: imull $-1431655765, %edi, %eax # imm = 0xAAAAAAAB -; CHECK-NEXT: cmpl $1431655766, %eax # imm = 0x55555556 +; CHECK-NEXT: cmpl $1431655765, %eax # imm = 0x55555555 ; CHECK-NEXT: setb %al ; CHECK-NEXT: retq %t0 = and i32 %x, 2 ; clearly a power-of-two or zero diff --git a/llvm/test/CodeGen/X86/or-with-overflow.ll b/llvm/test/CodeGen/X86/or-with-overflow.ll --- a/llvm/test/CodeGen/X86/or-with-overflow.ll +++ b/llvm/test/CodeGen/X86/or-with-overflow.ll @@ -10,20 +10,13 @@ ; X86-LABEL: or_i8_ri: ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: orb $-17, %cl -; X86-NEXT: je .LBB0_2 -; X86-NEXT: # %bb.1: -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: .LBB0_2: +; X86-NEXT: orb $-17, %al ; X86-NEXT: retl ; ; X64-LABEL: or_i8_ri: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax ; X64-NEXT: orb $-17, %al -; X64-NEXT: movzbl %al, %eax -; X64-NEXT: cmovel %edi, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %3 = or i8 %0, -17 @@ -60,14 +53,8 @@ define i16 @or_i16_ri(i16 zeroext %0, i16 zeroext %1) { ; X86-LABEL: or_i16_ri: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: orl $65519, %ecx # imm = 0xFFEF -; X86-NEXT: testw %cx, %cx -; X86-NEXT: je .LBB2_2 -; X86-NEXT: # %bb.1: -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: .LBB2_2: +; X86-NEXT: movl $65519, %eax # imm = 0xFFEF +; X86-NEXT: orl {{[0-9]+}}(%esp), %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; @@ -75,7 +62,6 @@ ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax ; X64-NEXT: orl $65519, %eax # imm = 0xFFEF -; X64-NEXT: cmovel %edi, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %3 = or i16 %0, -17 @@ -114,19 +100,11 @@ ; X86-LABEL: or_i32_ri: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: orl $-17, %ecx -; X86-NEXT: jle .LBB4_2 -; X86-NEXT: # %bb.1: -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: .LBB4_2: ; X86-NEXT: retl ; ; X64-LABEL: or_i32_ri: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: orl $-17, %eax -; X64-NEXT: cmovlel %edi, %eax ; X64-NEXT: retq %3 = or i32 %0, -17 %4 = icmp slt i32 %3, 1 @@ -161,19 +139,13 @@ define i64 @or_i64_ri(i64 %0, i64 %1) nounwind { ; X86-LABEL: or_i64_ri: ; X86: # %bb.0: -; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: orl $17, %ecx -; X86-NEXT: cmpl $1, %ecx -; X86-NEXT: movl %edx, %esi -; X86-NEXT: sbbl $0, %esi -; X86-NEXT: jl .LBB6_2 +; X86-NEXT: testl %edx, %edx +; X86-NEXT: js .LBB6_2 ; X86-NEXT: # %bb.1: -; X86-NEXT: movl %ecx, %eax +; X86-NEXT: orl $17, %eax ; X86-NEXT: .LBB6_2: -; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; X64-LABEL: or_i64_ri: diff --git a/llvm/test/CodeGen/X86/peep-test-4.ll b/llvm/test/CodeGen/X86/peep-test-4.ll --- a/llvm/test/CodeGen/X86/peep-test-4.ll +++ b/llvm/test/CodeGen/X86/peep-test-4.ll @@ -83,9 +83,13 @@ define void @shl(i32 %x) nounwind { ; CHECK-LABEL: shl: ; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: jne .LBB4_1 +; CHECK-NEXT: # %bb.2: # %bb ; CHECK-NEXT: addl %edi, %edi -; CHECK-NEXT: jne foo # TAILCALL -; CHECK-NEXT: # %bb.1: # %return +; CHECK-NEXT: jmp foo # TAILCALL +; CHECK-NEXT: .LBB4_1: # %return ; CHECK-NEXT: retq %shl = shl i32 %x, 1 %cmp = icmp eq i32 %shl, 0 @@ -102,9 +106,13 @@ define void @shli(i32 %x) nounwind { ; CHECK-LABEL: shli: ; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: jne .LBB5_1 +; CHECK-NEXT: # %bb.2: # %bb ; CHECK-NEXT: shll $4, %edi -; CHECK-NEXT: jne foo # TAILCALL -; CHECK-NEXT: # %bb.1: # %return +; CHECK-NEXT: jmp foo # TAILCALL +; CHECK-NEXT: .LBB5_1: # %return ; CHECK-NEXT: retq %shl = shl i32 %x, 4 %cmp = icmp eq i32 %shl, 0 diff --git a/llvm/test/CodeGen/X86/pr16031.ll b/llvm/test/CodeGen/X86/pr16031.ll --- a/llvm/test/CodeGen/X86/pr16031.ll +++ b/llvm/test/CodeGen/X86/pr16031.ll @@ -4,16 +4,7 @@ define i64 @main(i1 %tobool1) nounwind { ; CHECK-LABEL: main: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: andl $1, %eax -; CHECK-NEXT: decl %eax -; CHECK-NEXT: orl $-12, %eax -; CHECK-NEXT: xorl %ecx, %ecx -; CHECK-NEXT: movl %eax, %edx -; CHECK-NEXT: addl $-1, %edx -; CHECK-NEXT: movl $0, %edx -; CHECK-NEXT: adcl $-2, %edx -; CHECK-NEXT: cmovsl %ecx, %eax +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: retl entry: diff --git a/llvm/test/CodeGen/X86/select.ll b/llvm/test/CodeGen/X86/select.ll --- a/llvm/test/CodeGen/X86/select.ll +++ b/llvm/test/CodeGen/X86/select.ll @@ -60,7 +60,7 @@ ; GENERIC-NEXT: testb $1, %al ; GENERIC-NEXT: movl $-3840, %eax ## imm = 0xF100 ; GENERIC-NEXT: cmovnel %ecx, %eax -; GENERIC-NEXT: cmpl $32768, %eax ## imm = 0x8000 +; GENERIC-NEXT: cmpl $32767, %eax ## imm = 0x7FFF ; GENERIC-NEXT: jge LBB1_1 ; GENERIC-NEXT: ## %bb.2: ## %bb91 ; GENERIC-NEXT: xorl %eax, %eax @@ -77,7 +77,7 @@ ; ATOM-NEXT: movl $-3840, %edx ## imm = 0xF100 ; ATOM-NEXT: testb $1, %al ; ATOM-NEXT: cmovnel %ecx, %edx -; ATOM-NEXT: cmpl $32768, %edx ## imm = 0x8000 +; ATOM-NEXT: cmpl $32767, %edx ## imm = 0x7FFF ; ATOM-NEXT: jge LBB1_1 ; ATOM-NEXT: ## %bb.2: ## %bb91 ; ATOM-NEXT: xorl %eax, %eax @@ -94,7 +94,7 @@ ; ATHLON-NEXT: testb $1, %al ; ATHLON-NEXT: movl $-3840, %eax ## imm = 0xF100 ; ATHLON-NEXT: cmovnel %ecx, %eax -; ATHLON-NEXT: cmpl $32768, %eax ## imm = 0x8000 +; ATHLON-NEXT: cmpl $32767, %eax ## imm = 0x7FFF ; ATHLON-NEXT: jge LBB1_1 ; ATHLON-NEXT: ## %bb.2: ## %bb91 ; ATHLON-NEXT: xorl %eax, %eax @@ -112,7 +112,7 @@ ; MCU-NEXT: # %bb.1: # %entry ; MCU-NEXT: movl $-3840, %ecx # imm = 0xF100 ; MCU-NEXT: .LBB1_2: # %entry -; MCU-NEXT: cmpl $32768, %ecx # imm = 0x8000 +; MCU-NEXT: cmpl $32767, %ecx # imm = 0x7FFF ; MCU-NEXT: jge .LBB1_3 ; MCU-NEXT: # %bb.4: # %bb91 ; MCU-NEXT: xorl %eax, %eax diff --git a/llvm/test/CodeGen/X86/setcc-fsh.ll b/llvm/test/CodeGen/X86/setcc-fsh.ll --- a/llvm/test/CodeGen/X86/setcc-fsh.ll +++ b/llvm/test/CodeGen/X86/setcc-fsh.ll @@ -265,9 +265,7 @@ define i1 @fshl_or_eq_0(i32 %x, i32 %y) { ; CHECK-LABEL: fshl_or_eq_0: ; CHECK: # %bb.0: -; CHECK-NEXT: shll $5, %esi -; CHECK-NEXT: orl %edi, %esi -; CHECK-NEXT: sete %al +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: retq %or = or i32 %x, %y %f = call i32 @llvm.fshl.i32(i32 %or, i32 %x, i32 5) @@ -278,9 +276,7 @@ define i1 @fshl_or_commute_eq_0(i32 %x, i32 %y) { ; CHECK-LABEL: fshl_or_commute_eq_0: ; CHECK: # %bb.0: -; CHECK-NEXT: shll $5, %esi -; CHECK-NEXT: orl %edi, %esi -; CHECK-NEXT: sete %al +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: retq %or = or i32 %y, %x %f = call i32 @llvm.fshl.i32(i32 %or, i32 %x, i32 5) @@ -319,9 +315,7 @@ define i1 @fshr_or_eq_0(i16 %x, i16 %y) { ; CHECK-LABEL: fshr_or_eq_0: ; CHECK: # %bb.0: -; CHECK-NEXT: shll $8, %esi -; CHECK-NEXT: orw %di, %si -; CHECK-NEXT: sete %al +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: retq %or = or i16 %x, %y %f = call i16 @llvm.fshr.i16(i16 %or, i16 %x, i16 8) @@ -332,9 +326,7 @@ define i1 @fshr_or_commute_eq_0(i16 %x, i16 %y) { ; CHECK-LABEL: fshr_or_commute_eq_0: ; CHECK: # %bb.0: -; CHECK-NEXT: shll $8, %esi -; CHECK-NEXT: orw %di, %si -; CHECK-NEXT: sete %al +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: retq %or = or i16 %y, %x %f = call i16 @llvm.fshr.i16(i16 %or, i16 %x, i16 8) @@ -371,9 +363,7 @@ define i1 @fshl_or_ne_0(i32 %x, i32 %y) { ; CHECK-LABEL: fshl_or_ne_0: ; CHECK: # %bb.0: -; CHECK-NEXT: shll $7, %esi -; CHECK-NEXT: orl %edi, %esi -; CHECK-NEXT: setne %al +; CHECK-NEXT: movb $1, %al ; CHECK-NEXT: retq %or = or i32 %x, %y %f = call i32 @llvm.fshl.i32(i32 %or, i32 %x, i32 7) @@ -384,9 +374,7 @@ define i1 @fshl_or_commute_ne_0(i32 %x, i32 %y) { ; CHECK-LABEL: fshl_or_commute_ne_0: ; CHECK: # %bb.0: -; CHECK-NEXT: shll $7, %esi -; CHECK-NEXT: orl %edi, %esi -; CHECK-NEXT: setne %al +; CHECK-NEXT: movb $1, %al ; CHECK-NEXT: retq %or = or i32 %y, %x %f = call i32 @llvm.fshl.i32(i32 %or, i32 %x, i32 7) @@ -429,9 +417,7 @@ define i1 @fshr_or_ne_0(i64 %x, i64 %y) { ; CHECK-LABEL: fshr_or_ne_0: ; CHECK: # %bb.0: -; CHECK-NEXT: shlq $63, %rsi -; CHECK-NEXT: orq %rdi, %rsi -; CHECK-NEXT: setne %al +; CHECK-NEXT: movb $1, %al ; CHECK-NEXT: retq %or = or i64 %x, %y %f = call i64 @llvm.fshr.i64(i64 %or, i64 %x, i64 1) @@ -442,9 +428,7 @@ define i1 @fshr_or_commute_ne_0(i64 %x, i64 %y) { ; CHECK-LABEL: fshr_or_commute_ne_0: ; CHECK: # %bb.0: -; CHECK-NEXT: shlq $63, %rsi -; CHECK-NEXT: orq %rdi, %rsi -; CHECK-NEXT: setne %al +; CHECK-NEXT: movb $1, %al ; CHECK-NEXT: retq %or = or i64 %y, %x %f = call i64 @llvm.fshr.i64(i64 %or, i64 %x, i64 1) diff --git a/llvm/test/CodeGen/X86/shrink-compare-pgso.ll b/llvm/test/CodeGen/X86/shrink-compare-pgso.ll --- a/llvm/test/CodeGen/X86/shrink-compare-pgso.ll +++ b/llvm/test/CodeGen/X86/shrink-compare-pgso.ll @@ -124,8 +124,8 @@ define dso_local void @test2_1(i32 %X) nounwind !prof !14 { ; CHECK-LABEL: test2_1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: cmpl $256, %eax # imm = 0x100 +; CHECK-NEXT: movb $1, %al +; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: je bar # TAILCALL ; CHECK-NEXT: # %bb.1: # %if.end ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/shrink-compare.ll b/llvm/test/CodeGen/X86/shrink-compare.ll --- a/llvm/test/CodeGen/X86/shrink-compare.ll +++ b/llvm/test/CodeGen/X86/shrink-compare.ll @@ -124,8 +124,8 @@ define dso_local void @test2_1(i32 %X) nounwind minsize { ; CHECK-LABEL: test2_1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: cmpl $256, %eax # imm = 0x100 +; CHECK-NEXT: movb $1, %al +; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: je bar # TAILCALL ; CHECK-NEXT: # %bb.1: # %if.end ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-compare-any_of.ll b/llvm/test/CodeGen/X86/vector-compare-any_of.ll --- a/llvm/test/CodeGen/X86/vector-compare-any_of.ll +++ b/llvm/test/CodeGen/X86/vector-compare-any_of.ll @@ -1374,7 +1374,7 @@ ; SSE-NEXT: pmovmskb %xmm2, %eax ; SSE-NEXT: shll $16, %eax ; SSE-NEXT: orl %ecx, %eax -; SSE-NEXT: sete %dl +; SSE-NEXT: xorl %edx, %edx ; SSE-NEXT: retq ; ; AVX1-LABEL: test_v16i8_muti_uses: @@ -1385,7 +1385,7 @@ ; AVX1-NEXT: vpmovmskb %xmm1, %eax ; AVX1-NEXT: shll $16, %eax ; AVX1-NEXT: orl %ecx, %eax -; AVX1-NEXT: sete %dl +; AVX1-NEXT: xorl %edx, %edx ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_v16i8_muti_uses: diff --git a/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll --- a/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll @@ -927,26 +927,12 @@ define i1 @icmp0_v32i8_v32i1(<32 x i8>) { ; SSE-LABEL: icmp0_v32i8_v32i1: ; SSE: # %bb.0: -; SSE-NEXT: pxor %xmm2, %xmm2 -; SSE-NEXT: pcmpeqb %xmm2, %xmm1 -; SSE-NEXT: pcmpeqb %xmm2, %xmm0 -; SSE-NEXT: por %xmm1, %xmm0 -; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: testl %eax, %eax -; SSE-NEXT: setne %al +; SSE-NEXT: movb $1, %al ; SSE-NEXT: retq ; ; AVX1-LABEL: icmp0_v32i8_v32i1: ; AVX1: # %bb.0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: testl %eax, %eax -; AVX1-NEXT: setne %al -; AVX1-NEXT: vzeroupper +; AVX1-NEXT: movb $1, %al ; AVX1-NEXT: retq ; ; AVX2-LABEL: icmp0_v32i8_v32i1: @@ -1149,35 +1135,12 @@ define i1 @icmp0_v32i16_v32i1(<32 x i16>) { ; SSE-LABEL: icmp0_v32i16_v32i1: ; SSE: # %bb.0: -; SSE-NEXT: pxor %xmm4, %xmm4 -; SSE-NEXT: pcmpeqw %xmm4, %xmm3 -; SSE-NEXT: pcmpeqw %xmm4, %xmm2 -; SSE-NEXT: packsswb %xmm3, %xmm2 -; SSE-NEXT: pcmpeqw %xmm4, %xmm1 -; SSE-NEXT: pcmpeqw %xmm4, %xmm0 -; SSE-NEXT: packsswb %xmm1, %xmm0 -; SSE-NEXT: por %xmm2, %xmm0 -; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: testl %eax, %eax -; SSE-NEXT: setne %al +; SSE-NEXT: movb $1, %al ; SSE-NEXT: retq ; ; AVX1-LABEL: icmp0_v32i16_v32i1: ; AVX1: # %bb.0: -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vpcmpeqw %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm1 -; AVX1-NEXT: vpacksswb %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vpcmpeqw %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0 -; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: testl %eax, %eax -; AVX1-NEXT: setne %al -; AVX1-NEXT: vzeroupper +; AVX1-NEXT: movb $1, %al ; AVX1-NEXT: retq ; ; AVX2-LABEL: icmp0_v32i16_v32i1: @@ -1194,16 +1157,7 @@ ; ; AVX512F-LABEL: icmp0_v32i16_v32i1: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512F-NEXT: vpcmpeqw %ymm2, %ymm1, %ymm1 -; AVX512F-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 -; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 -; AVX512F-NEXT: kortestw %k0, %k0 -; AVX512F-NEXT: setne %al -; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: movb $1, %al ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: icmp0_v32i16_v32i1: @@ -1229,60 +1183,17 @@ define i1 @icmp0_v64i8_v64i1(<64 x i8>) { ; SSE-LABEL: icmp0_v64i8_v64i1: ; SSE: # %bb.0: -; SSE-NEXT: pxor %xmm4, %xmm4 -; SSE-NEXT: pcmpeqb %xmm4, %xmm2 -; SSE-NEXT: pcmpeqb %xmm4, %xmm0 -; SSE-NEXT: por %xmm2, %xmm0 -; SSE-NEXT: pcmpeqb %xmm4, %xmm3 -; SSE-NEXT: pcmpeqb %xmm4, %xmm1 -; SSE-NEXT: por %xmm3, %xmm1 -; SSE-NEXT: por %xmm0, %xmm1 -; SSE-NEXT: pmovmskb %xmm1, %eax -; SSE-NEXT: testl %eax, %eax -; SSE-NEXT: setne %al +; SSE-NEXT: movb $1, %al ; SSE-NEXT: retq ; -; AVX1-LABEL: icmp0_v64i8_v64i1: -; AVX1: # %bb.0: -; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm3 -; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm4 -; AVX1-NEXT: vpor %xmm3, %xmm4, %xmm3 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 -; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: testl %eax, %eax -; AVX1-NEXT: setne %al -; AVX1-NEXT: vzeroupper -; AVX1-NEXT: retq -; -; AVX2-LABEL: icmp0_v64i8_v64i1: -; AVX2: # %bb.0: -; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1 -; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpmovmskb %ymm0, %eax -; AVX2-NEXT: testl %eax, %eax -; AVX2-NEXT: setne %al -; AVX2-NEXT: vzeroupper -; AVX2-NEXT: retq +; AVX1OR2-LABEL: icmp0_v64i8_v64i1: +; AVX1OR2: # %bb.0: +; AVX1OR2-NEXT: movb $1, %al +; AVX1OR2-NEXT: retq ; ; AVX512F-LABEL: icmp0_v64i8_v64i1: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512F-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1 -; AVX512F-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpmovmskb %ymm0, %eax -; AVX512F-NEXT: testl %eax, %eax -; AVX512F-NEXT: setne %al -; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: movb $1, %al ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: icmp0_v64i8_v64i1: @@ -1728,25 +1639,12 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>, <32 x i8>) { ; SSE-LABEL: icmp_v32i8_v32i1: ; SSE: # %bb.0: -; SSE-NEXT: pcmpeqb %xmm3, %xmm1 -; SSE-NEXT: pcmpeqb %xmm2, %xmm0 -; SSE-NEXT: por %xmm1, %xmm0 -; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: testl %eax, %eax -; SSE-NEXT: setne %al +; SSE-NEXT: movb $1, %al ; SSE-NEXT: retq ; ; AVX1-LABEL: icmp_v32i8_v32i1: ; AVX1: # %bb.0: -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 -; AVX1-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: testl %eax, %eax -; AVX1-NEXT: setne %al -; AVX1-NEXT: vzeroupper +; AVX1-NEXT: movb $1, %al ; AVX1-NEXT: retq ; ; AVX2-LABEL: icmp_v32i8_v32i1: @@ -1945,35 +1843,12 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>, <32 x i16>) { ; SSE-LABEL: icmp_v32i16_v32i1: ; SSE: # %bb.0: -; SSE-NEXT: pcmpeqw %xmm7, %xmm3 -; SSE-NEXT: pcmpeqw %xmm6, %xmm2 -; SSE-NEXT: packsswb %xmm3, %xmm2 -; SSE-NEXT: pcmpeqw %xmm5, %xmm1 -; SSE-NEXT: pcmpeqw %xmm4, %xmm0 -; SSE-NEXT: packsswb %xmm1, %xmm0 -; SSE-NEXT: por %xmm2, %xmm0 -; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: testl %eax, %eax -; SSE-NEXT: setne %al +; SSE-NEXT: movb $1, %al ; SSE-NEXT: retq ; ; AVX1-LABEL: icmp_v32i16_v32i1: ; AVX1: # %bb.0: -; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 -; AVX1-NEXT: vpcmpeqw %xmm4, %xmm5, %xmm4 -; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm1 -; AVX1-NEXT: vpacksswb %xmm4, %xmm1, %xmm1 -; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 -; AVX1-NEXT: vpcmpeqw %xmm3, %xmm4, %xmm3 -; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpacksswb %xmm3, %xmm0, %xmm0 -; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: testl %eax, %eax -; AVX1-NEXT: setne %al -; AVX1-NEXT: vzeroupper +; AVX1-NEXT: movb $1, %al ; AVX1-NEXT: retq ; ; AVX2-LABEL: icmp_v32i16_v32i1: @@ -1989,16 +1864,7 @@ ; ; AVX512F-LABEL: icmp_v32i16_v32i1: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2 -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3 -; AVX512F-NEXT: vpcmpeqw %ymm2, %ymm3, %ymm2 -; AVX512F-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpor %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 -; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 -; AVX512F-NEXT: kortestw %k0, %k0 -; AVX512F-NEXT: setne %al -; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: movb $1, %al ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: icmp_v32i16_v32i1: @@ -2024,59 +1890,17 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>, <64 x i8>) { ; SSE-LABEL: icmp_v64i8_v64i1: ; SSE: # %bb.0: -; SSE-NEXT: pcmpeqb %xmm6, %xmm2 -; SSE-NEXT: pcmpeqb %xmm4, %xmm0 -; SSE-NEXT: por %xmm2, %xmm0 -; SSE-NEXT: pcmpeqb %xmm7, %xmm3 -; SSE-NEXT: pcmpeqb %xmm5, %xmm1 -; SSE-NEXT: por %xmm3, %xmm1 -; SSE-NEXT: por %xmm0, %xmm1 -; SSE-NEXT: pmovmskb %xmm1, %eax -; SSE-NEXT: testl %eax, %eax -; SSE-NEXT: setne %al +; SSE-NEXT: movb $1, %al ; SSE-NEXT: retq ; -; AVX1-LABEL: icmp_v64i8_v64i1: -; AVX1: # %bb.0: -; AVX1-NEXT: vpcmpeqb %xmm3, %xmm1, %xmm4 -; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm5 -; AVX1-NEXT: vpor %xmm4, %xmm5, %xmm4 -; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 -; AVX1-NEXT: vpcmpeqb %xmm3, %xmm1, %xmm1 -; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpor %xmm0, %xmm4, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: testl %eax, %eax -; AVX1-NEXT: setne %al -; AVX1-NEXT: vzeroupper -; AVX1-NEXT: retq -; -; AVX2-LABEL: icmp_v64i8_v64i1: -; AVX2: # %bb.0: -; AVX2-NEXT: vpcmpeqb %ymm3, %ymm1, %ymm1 -; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpmovmskb %ymm0, %eax -; AVX2-NEXT: testl %eax, %eax -; AVX2-NEXT: setne %al -; AVX2-NEXT: vzeroupper -; AVX2-NEXT: retq +; AVX1OR2-LABEL: icmp_v64i8_v64i1: +; AVX1OR2: # %bb.0: +; AVX1OR2-NEXT: movb $1, %al +; AVX1OR2-NEXT: retq ; ; AVX512F-LABEL: icmp_v64i8_v64i1: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2 -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3 -; AVX512F-NEXT: vpcmpeqb %ymm2, %ymm3, %ymm2 -; AVX512F-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpor %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: vpmovmskb %ymm0, %eax -; AVX512F-NEXT: testl %eax, %eax -; AVX512F-NEXT: setne %al -; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: movb $1, %al ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: icmp_v64i8_v64i1: diff --git a/llvm/test/CodeGen/X86/xor.ll b/llvm/test/CodeGen/X86/xor.ll --- a/llvm/test/CodeGen/X86/xor.ll +++ b/llvm/test/CodeGen/X86/xor.ll @@ -57,33 +57,39 @@ define i32 @test4(i32 %a, i32 %b) nounwind { ; X86-LABEL: test4: ; X86: # %bb.0: # %entry -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb $1, %cl ; X86-NEXT: .p2align 4, 0x90 ; X86-NEXT: .LBB3_1: # %bb ; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: xorl %ecx, %eax -; X86-NEXT: movl %eax, %edx -; X86-NEXT: notl %edx -; X86-NEXT: andl %ecx, %edx -; X86-NEXT: addl %edx, %edx -; X86-NEXT: movl %edx, %ecx +; X86-NEXT: xorl %edx, %eax +; X86-NEXT: movl %eax, %esi +; X86-NEXT: notl %esi +; X86-NEXT: andl %edx, %esi +; X86-NEXT: addl %esi, %esi +; X86-NEXT: testb %cl, %cl +; X86-NEXT: movl %esi, %edx ; X86-NEXT: jne .LBB3_1 ; X86-NEXT: # %bb.2: # %bb12 +; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; X64-LIN-LABEL: test4: ; X64-LIN: # %bb.0: # %entry ; X64-LIN-NEXT: movl %edi, %eax +; X64-LIN-NEXT: movb $1, %cl ; X64-LIN-NEXT: .p2align 4, 0x90 ; X64-LIN-NEXT: .LBB3_1: # %bb ; X64-LIN-NEXT: # =>This Inner Loop Header: Depth=1 ; X64-LIN-NEXT: xorl %esi, %eax -; X64-LIN-NEXT: movl %eax, %ecx -; X64-LIN-NEXT: notl %ecx -; X64-LIN-NEXT: andl %esi, %ecx -; X64-LIN-NEXT: addl %ecx, %ecx -; X64-LIN-NEXT: movl %ecx, %esi +; X64-LIN-NEXT: movl %eax, %edx +; X64-LIN-NEXT: notl %edx +; X64-LIN-NEXT: andl %esi, %edx +; X64-LIN-NEXT: addl %edx, %edx +; X64-LIN-NEXT: testb %cl, %cl +; X64-LIN-NEXT: movl %edx, %esi ; X64-LIN-NEXT: jne .LBB3_1 ; X64-LIN-NEXT: # %bb.2: # %bb12 ; X64-LIN-NEXT: retq @@ -91,15 +97,17 @@ ; X64-WIN-LABEL: test4: ; X64-WIN: # %bb.0: # %entry ; X64-WIN-NEXT: movl %ecx, %eax +; X64-WIN-NEXT: movb $1, %cl ; X64-WIN-NEXT: .p2align 4, 0x90 ; X64-WIN-NEXT: .LBB3_1: # %bb ; X64-WIN-NEXT: # =>This Inner Loop Header: Depth=1 ; X64-WIN-NEXT: xorl %edx, %eax -; X64-WIN-NEXT: movl %eax, %ecx -; X64-WIN-NEXT: notl %ecx -; X64-WIN-NEXT: andl %edx, %ecx -; X64-WIN-NEXT: addl %ecx, %ecx -; X64-WIN-NEXT: movl %ecx, %edx +; X64-WIN-NEXT: movl %eax, %r8d +; X64-WIN-NEXT: notl %r8d +; X64-WIN-NEXT: andl %edx, %r8d +; X64-WIN-NEXT: addl %r8d, %r8d +; X64-WIN-NEXT: testb %cl, %cl +; X64-WIN-NEXT: movl %r8d, %edx ; X64-WIN-NEXT: jne .LBB3_1 ; X64-WIN-NEXT: # %bb.2: # %bb12 ; X64-WIN-NEXT: retq @@ -121,36 +129,40 @@ define i16 @test5(i16 %a, i16 %b) nounwind { ; X86-LABEL: test5: ; X86: # %bb.0: # %entry -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: pushl %esi +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb $1, %cl ; X86-NEXT: .p2align 4, 0x90 ; X86-NEXT: .LBB4_1: # %bb ; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: xorl %ecx, %eax -; X86-NEXT: movl %eax, %edx -; X86-NEXT: notl %edx -; X86-NEXT: andl %ecx, %edx -; X86-NEXT: addl %edx, %edx -; X86-NEXT: testw %dx, %dx -; X86-NEXT: movl %edx, %ecx +; X86-NEXT: xorl %edx, %eax +; X86-NEXT: movl %eax, %esi +; X86-NEXT: notl %esi +; X86-NEXT: andl %edx, %esi +; X86-NEXT: addl %esi, %esi +; X86-NEXT: testb %cl, %cl +; X86-NEXT: movl %esi, %edx ; X86-NEXT: jne .LBB4_1 ; X86-NEXT: # %bb.2: # %bb12 ; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; X64-LIN-LABEL: test5: ; X64-LIN: # %bb.0: # %entry ; X64-LIN-NEXT: movl %edi, %eax +; X64-LIN-NEXT: movb $1, %cl ; X64-LIN-NEXT: .p2align 4, 0x90 ; X64-LIN-NEXT: .LBB4_1: # %bb ; X64-LIN-NEXT: # =>This Inner Loop Header: Depth=1 ; X64-LIN-NEXT: xorl %esi, %eax -; X64-LIN-NEXT: movl %eax, %ecx -; X64-LIN-NEXT: notl %ecx -; X64-LIN-NEXT: andl %esi, %ecx -; X64-LIN-NEXT: addl %ecx, %ecx -; X64-LIN-NEXT: testw %cx, %cx -; X64-LIN-NEXT: movl %ecx, %esi +; X64-LIN-NEXT: movl %eax, %edx +; X64-LIN-NEXT: notl %edx +; X64-LIN-NEXT: andl %esi, %edx +; X64-LIN-NEXT: addl %edx, %edx +; X64-LIN-NEXT: testb %cl, %cl +; X64-LIN-NEXT: movl %edx, %esi ; X64-LIN-NEXT: jne .LBB4_1 ; X64-LIN-NEXT: # %bb.2: # %bb12 ; X64-LIN-NEXT: # kill: def $ax killed $ax killed $eax @@ -160,16 +172,17 @@ ; X64-WIN: # %bb.0: # %entry ; X64-WIN-NEXT: # kill: def $dx killed $dx def $edx ; X64-WIN-NEXT: movl %ecx, %eax +; X64-WIN-NEXT: movb $1, %cl ; X64-WIN-NEXT: .p2align 4, 0x90 ; X64-WIN-NEXT: .LBB4_1: # %bb ; X64-WIN-NEXT: # =>This Inner Loop Header: Depth=1 ; X64-WIN-NEXT: xorl %edx, %eax -; X64-WIN-NEXT: movl %eax, %ecx -; X64-WIN-NEXT: notl %ecx -; X64-WIN-NEXT: andl %edx, %ecx -; X64-WIN-NEXT: addl %ecx, %ecx -; X64-WIN-NEXT: testw %cx, %cx -; X64-WIN-NEXT: movl %ecx, %edx +; X64-WIN-NEXT: movl %eax, %r8d +; X64-WIN-NEXT: notl %r8d +; X64-WIN-NEXT: andl %edx, %r8d +; X64-WIN-NEXT: addl %r8d, %r8d +; X64-WIN-NEXT: testb %cl, %cl +; X64-WIN-NEXT: movl %r8d, %edx ; X64-WIN-NEXT: jne .LBB4_1 ; X64-WIN-NEXT: # %bb.2: # %bb12 ; X64-WIN-NEXT: # kill: def $ax killed $ax killed $eax @@ -192,17 +205,19 @@ define i8 @test6(i8 %a, i8 %b) nounwind { ; X86-LABEL: test6: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb $1, %cl ; X86-NEXT: .p2align 4, 0x90 ; X86-NEXT: .LBB5_1: # %bb ; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: xorb %cl, %al -; X86-NEXT: movl %eax, %edx -; X86-NEXT: notb %dl -; X86-NEXT: andb %cl, %dl -; X86-NEXT: addb %dl, %dl -; X86-NEXT: movl %edx, %ecx +; X86-NEXT: xorb %dl, %al +; X86-NEXT: movb %al, %ah +; X86-NEXT: notb %ah +; X86-NEXT: andb %dl, %ah +; X86-NEXT: addb %ah, %ah +; X86-NEXT: testb %cl, %cl +; X86-NEXT: movb %ah, %dl ; X86-NEXT: jne .LBB5_1 ; X86-NEXT: # %bb.2: # %bb12 ; X86-NEXT: retl @@ -210,15 +225,17 @@ ; X64-LIN-LABEL: test6: ; X64-LIN: # %bb.0: # %entry ; X64-LIN-NEXT: movl %edi, %eax +; X64-LIN-NEXT: movb $1, %cl ; X64-LIN-NEXT: .p2align 4, 0x90 ; X64-LIN-NEXT: .LBB5_1: # %bb ; X64-LIN-NEXT: # =>This Inner Loop Header: Depth=1 ; X64-LIN-NEXT: xorb %sil, %al -; X64-LIN-NEXT: movl %eax, %ecx -; X64-LIN-NEXT: notb %cl -; X64-LIN-NEXT: andb %sil, %cl -; X64-LIN-NEXT: addb %cl, %cl -; X64-LIN-NEXT: movl %ecx, %esi +; X64-LIN-NEXT: movl %eax, %edx +; X64-LIN-NEXT: notb %dl +; X64-LIN-NEXT: andb %sil, %dl +; X64-LIN-NEXT: addb %dl, %dl +; X64-LIN-NEXT: testb %cl, %cl +; X64-LIN-NEXT: movl %edx, %esi ; X64-LIN-NEXT: jne .LBB5_1 ; X64-LIN-NEXT: # %bb.2: # %bb12 ; X64-LIN-NEXT: # kill: def $al killed $al killed $eax @@ -227,15 +244,17 @@ ; X64-WIN-LABEL: test6: ; X64-WIN: # %bb.0: # %entry ; X64-WIN-NEXT: movl %ecx, %eax +; X64-WIN-NEXT: movb $1, %cl ; X64-WIN-NEXT: .p2align 4, 0x90 ; X64-WIN-NEXT: .LBB5_1: # %bb ; X64-WIN-NEXT: # =>This Inner Loop Header: Depth=1 ; X64-WIN-NEXT: xorb %dl, %al -; X64-WIN-NEXT: movl %eax, %ecx -; X64-WIN-NEXT: notb %cl -; X64-WIN-NEXT: andb %dl, %cl -; X64-WIN-NEXT: addb %cl, %cl -; X64-WIN-NEXT: movl %ecx, %edx +; X64-WIN-NEXT: movl %eax, %r8d +; X64-WIN-NEXT: notb %r8b +; X64-WIN-NEXT: andb %dl, %r8b +; X64-WIN-NEXT: addb %r8b, %r8b +; X64-WIN-NEXT: testb %cl, %cl +; X64-WIN-NEXT: movl %r8d, %edx ; X64-WIN-NEXT: jne .LBB5_1 ; X64-WIN-NEXT: # %bb.2: # %bb12 ; X64-WIN-NEXT: retq @@ -257,33 +276,39 @@ define i32 @test7(i32 %a, i32 %b) nounwind { ; X86-LABEL: test7: ; X86: # %bb.0: # %entry -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb $1, %cl ; X86-NEXT: .p2align 4, 0x90 ; X86-NEXT: .LBB6_1: # %bb ; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: xorl %ecx, %eax -; X86-NEXT: movl %eax, %edx -; X86-NEXT: xorl $2147483646, %edx # imm = 0x7FFFFFFE -; X86-NEXT: andl %ecx, %edx -; X86-NEXT: addl %edx, %edx -; X86-NEXT: movl %edx, %ecx +; X86-NEXT: xorl %edx, %eax +; X86-NEXT: movl %eax, %esi +; X86-NEXT: xorl $2147483646, %esi # imm = 0x7FFFFFFE +; X86-NEXT: andl %edx, %esi +; X86-NEXT: addl %esi, %esi +; X86-NEXT: testb %cl, %cl +; X86-NEXT: movl %esi, %edx ; X86-NEXT: jne .LBB6_1 ; X86-NEXT: # %bb.2: # %bb12 +; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; X64-LIN-LABEL: test7: ; X64-LIN: # %bb.0: # %entry ; X64-LIN-NEXT: movl %edi, %eax +; X64-LIN-NEXT: movb $1, %cl ; X64-LIN-NEXT: .p2align 4, 0x90 ; X64-LIN-NEXT: .LBB6_1: # %bb ; X64-LIN-NEXT: # =>This Inner Loop Header: Depth=1 ; X64-LIN-NEXT: xorl %esi, %eax -; X64-LIN-NEXT: movl %eax, %ecx -; X64-LIN-NEXT: xorl $2147483646, %ecx # imm = 0x7FFFFFFE -; X64-LIN-NEXT: andl %esi, %ecx -; X64-LIN-NEXT: addl %ecx, %ecx -; X64-LIN-NEXT: movl %ecx, %esi +; X64-LIN-NEXT: movl %eax, %edx +; X64-LIN-NEXT: xorl $2147483646, %edx # imm = 0x7FFFFFFE +; X64-LIN-NEXT: andl %esi, %edx +; X64-LIN-NEXT: addl %edx, %edx +; X64-LIN-NEXT: testb %cl, %cl +; X64-LIN-NEXT: movl %edx, %esi ; X64-LIN-NEXT: jne .LBB6_1 ; X64-LIN-NEXT: # %bb.2: # %bb12 ; X64-LIN-NEXT: retq @@ -291,15 +316,17 @@ ; X64-WIN-LABEL: test7: ; X64-WIN: # %bb.0: # %entry ; X64-WIN-NEXT: movl %ecx, %eax +; X64-WIN-NEXT: movb $1, %cl ; X64-WIN-NEXT: .p2align 4, 0x90 ; X64-WIN-NEXT: .LBB6_1: # %bb ; X64-WIN-NEXT: # =>This Inner Loop Header: Depth=1 ; X64-WIN-NEXT: xorl %edx, %eax -; X64-WIN-NEXT: movl %eax, %ecx -; X64-WIN-NEXT: xorl $2147483646, %ecx # imm = 0x7FFFFFFE -; X64-WIN-NEXT: andl %edx, %ecx -; X64-WIN-NEXT: addl %ecx, %ecx -; X64-WIN-NEXT: movl %ecx, %edx +; X64-WIN-NEXT: movl %eax, %r8d +; X64-WIN-NEXT: xorl $2147483646, %r8d # imm = 0x7FFFFFFE +; X64-WIN-NEXT: andl %edx, %r8d +; X64-WIN-NEXT: addl %r8d, %r8d +; X64-WIN-NEXT: testb %cl, %cl +; X64-WIN-NEXT: movl %r8d, %edx ; X64-WIN-NEXT: jne .LBB6_1 ; X64-WIN-NEXT: # %bb.2: # %bb12 ; X64-WIN-NEXT: retq