diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -2645,9 +2645,9 @@ getApproximateEVTForLLT(ToTy, DL, Ctx)); } - /// Return true if sign-extension from FromTy to ToTy is cheaper than - /// zero-extension. - virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const { + /// Return true if sign-extension of value \p V from FromTy to ToTy is + /// cheaper than zero-extension, where \p V can be SDValue() if unknown. + virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy, SDValue V) const { return false; } diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -7004,7 +7004,7 @@ // matching the argument extension instead. Instruction::CastOps ExtType = Instruction::ZExt; // Some targets prefer SExt over ZExt. - if (TLI->isSExtCheaperThanZExt(OldVT, RegType)) + if (TLI->isSExtCheaperThanZExt(OldVT, RegType, SDValue())) ExtType = Instruction::SExt; if (auto *Arg = dyn_cast(Cond)) { diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1704,7 +1704,7 @@ SDValue OpL = GetPromotedInteger(LHS); SDValue OpR = GetPromotedInteger(RHS); - if (TLI.isSExtCheaperThanZExt(LHS.getValueType(), OpL.getValueType())) { + if (TLI.isSExtCheaperThanZExt(LHS.getValueType(), OpL.getValueType(), LHS)) { // The target would prefer to promote the comparison operand with sign // extension. Honor that unless the promoted values are already zero // extended. diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -283,7 +283,7 @@ EVT OldVT = Op.getValueType(); SDLoc DL(Op); Op = GetPromotedInteger(Op); - if (TLI.isSExtCheaperThanZExt(OldVT, Op.getValueType())) + if (TLI.isSExtCheaperThanZExt(OldVT, Op.getValueType(), Op)) return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), Op, DAG.getValueType(OldVT)); return DAG.getZeroExtendInReg(Op, DL, OldVT); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4767,7 +4767,7 @@ C->isTargetOpcode(), C->isOpaque()); case ISD::ANY_EXTEND: // Some targets like RISCV prefer to sign extend some types. - if (TLI->isSExtCheaperThanZExt(Operand.getValueType(), VT)) + if (TLI->isSExtCheaperThanZExt(Operand.getValueType(), VT, Operand)) return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), DL, VT, C->isTargetOpcode(), C->isOpaque()); return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT, diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -3844,7 +3844,7 @@ } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && (Cond == ISD::SETEQ || Cond == ISD::SETNE) && !isSExtCheaperThanZExt(cast(N0.getOperand(1))->getVT(), - OpVT)) { + OpVT, N0.getOperand(1))) { EVT ExtSrcTy = cast(N0.getOperand(1))->getVT(); unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits(); EVT ExtDstTy = N0.getValueType(); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -1138,6 +1138,14 @@ bool isConstantUnsignedBitfieldExtractLegal(unsigned Opc, LLT Ty1, LLT Ty2) const override; + + bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT, SDValue V) const override { + if (!V) + return false; + if (ConstantSDNode *C = isConstOrConstSplat(V)) + return C->getAPIntValue().isNegative(); + return false; + } }; namespace AArch64 { diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -326,7 +326,7 @@ bool isTruncateFree(Type *SrcTy, Type *DstTy) const override; bool isTruncateFree(EVT SrcVT, EVT DstVT) const override; bool isZExtFree(SDValue Val, EVT VT2) const override; - bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override; + bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT, SDValue V) const override; bool isCheapToSpeculateCttz() const override; bool isCheapToSpeculateCtlz() const override; bool hasAndNotCompare(SDValue Y) const override; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1198,7 +1198,8 @@ return TargetLowering::isZExtFree(Val, VT2); } -bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const { +bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT, + SDValue V) const { return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64; } diff --git a/llvm/test/CodeGen/AArch64/arm64-vshuffle.ll b/llvm/test/CodeGen/AArch64/arm64-vshuffle.ll --- a/llvm/test/CodeGen/AArch64/arm64-vshuffle.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vshuffle.ll @@ -14,18 +14,8 @@ ret <8 x i1> %Shuff } -; CHECK: lCPI1_0: -; CHECK: .byte 0 ; 0x0 -; CHECK: .byte 0 ; 0x0 -; CHECK: .byte 0 ; 0x0 -; CHECK: .byte 0 ; 0x0 -; CHECK: .byte 1 ; 0x1 -; CHECK: .byte 0 ; 0x0 -; CHECK: .byte 0 ; 0x0 -; CHECK: .byte 0 ; 0x0 ; CHECK: test2 -; CHECK: adrp x[[REG2:[0-9]+]], lCPI1_0@PAGE -; CHECK: ldr d[[REG1:[0-9]+]], [x[[REG2]], lCPI1_0@PAGEOFF] +; CHECK: movi d{{[0-9]+}}, #0x0000ff00000000 define <8 x i1>@test2() { bb: %Shuff = shufflevector <8 x i1> zeroinitializer, @@ -36,7 +26,7 @@ } ; CHECK: test3 -; CHECK: movi.4s v{{[0-9]+}}, #1 +; CHECK: movi.2d v{{[0-9]+}}, #0x0000ff000000ff define <16 x i1> @test3(i1* %ptr, i32 %v) { bb: %Shuff = shufflevector <16 x i1> , <16 x i1> undef, @@ -45,11 +35,13 @@ i32 14, i32 0> ret <16 x i1> %Shuff } + + ; CHECK: lCPI3_0: ; CHECK: .byte 0 ; 0x0 ; CHECK: .byte 0 ; 0x0 ; CHECK: .byte 0 ; 0x0 -; CHECK: .byte 1 ; 0x1 +; CHECK: .byte 255 ; 0xff ; CHECK: .byte 0 ; 0x0 ; CHECK: .byte 0 ; 0x0 ; CHECK: .byte 0 ; 0x0 diff --git a/llvm/test/CodeGen/AArch64/arm64_32-atomics.ll b/llvm/test/CodeGen/AArch64/arm64_32-atomics.ll --- a/llvm/test/CodeGen/AArch64/arm64_32-atomics.ll +++ b/llvm/test/CodeGen/AArch64/arm64_32-atomics.ll @@ -249,7 +249,7 @@ ; CHECK: stlxr [[SUCCESS:w[0-9]+]], w2, [x0] ; CHECK: cbnz [[SUCCESS]], [[LOOP]] -; CHECK: mov w1, #1 +; CHECK: mov w1, #-1 ; CHECK: mov w0, [[OLD]] ; CHECK: ret diff --git a/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll b/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll --- a/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll +++ b/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll @@ -15,7 +15,7 @@ ; CHECK-NEXT: stlxr w8, w2, [x0] ; CHECK-NEXT: cbnz w8, LBB0_1 ; CHECK-NEXT: ; %bb.3: -; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: mov w0, #-1 ; CHECK-NEXT: ret ; CHECK-NEXT: LBB0_4: ; %cmpxchg.nostore ; CHECK-NEXT: mov w0, wzr @@ -64,7 +64,7 @@ ; CHECK-NEXT: stlxrb w9, w2, [x0] ; CHECK-NEXT: cbnz w9, LBB1_1 ; CHECK-NEXT: ; %bb.3: -; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: mov w8, #-1 ; CHECK-NEXT: eor w0, w8, #0x1 ; CHECK-NEXT: ret ; CHECK-NEXT: LBB1_4: ; %cmpxchg.nostore @@ -188,7 +188,7 @@ ; CHECK-NEXT: stlxr w8, w20, [x19] ; CHECK-NEXT: cbnz w8, LBB3_1 ; CHECK-NEXT: ; %bb.3: -; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: mov w8, #-1 ; CHECK-NEXT: b LBB3_5 ; CHECK-NEXT: LBB3_4: ; %cmpxchg.nostore ; CHECK-NEXT: mov w8, wzr diff --git a/llvm/test/CodeGen/AArch64/dag-numsignbits.ll b/llvm/test/CodeGen/AArch64/dag-numsignbits.ll --- a/llvm/test/CodeGen/AArch64/dag-numsignbits.ll +++ b/llvm/test/CodeGen/AArch64/dag-numsignbits.ll @@ -8,18 +8,13 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI0_0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov w1, wzr +; CHECK-NEXT: movi v2.4h, #1 ; CHECK-NEXT: dup v0.4h, v0.h[0] +; CHECK-NEXT: mov w1, wzr ; CHECK-NEXT: mov w2, wzr ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI0_0] -; CHECK-NEXT: adrp x8, .LCPI0_1 ; CHECK-NEXT: add v0.4h, v0.4h, v1.4h -; CHECK-NEXT: movi v1.4h, #1 -; CHECK-NEXT: cmgt v0.4h, v1.4h, v0.4h -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI0_1] -; CHECK-NEXT: and v0.8b, v0.8b, v1.8b -; CHECK-NEXT: shl v0.4h, v0.4h, #15 -; CHECK-NEXT: cmlt v0.4h, v0.4h, #0 +; CHECK-NEXT: cmgt v0.4h, v2.4h, v0.4h ; CHECK-NEXT: umov w0, v0.h[0] ; CHECK-NEXT: umov w3, v0.h[3] ; CHECK-NEXT: b foo diff --git a/llvm/test/CodeGen/AArch64/fast-isel-cmp-vec.ll b/llvm/test/CodeGen/AArch64/fast-isel-cmp-vec.ll --- a/llvm/test/CodeGen/AArch64/fast-isel-cmp-vec.ll +++ b/llvm/test/CodeGen/AArch64/fast-isel-cmp-vec.ll @@ -25,8 +25,10 @@ define <2 x i32> @icmp_constfold_v2i32(<2 x i32> %a) { ; CHECK-LABEL: icmp_constfold_v2i32: ; CHECK: ; %bb.0: -; CHECK-NEXT: movi.2s v0, #1 -; CHECK-NEXT: and.8b v0, v0, v0 +; CHECK-NEXT: movi.2d v0, #0xffffffffffffffff +; CHECK-NEXT: ; %bb.1: ; %bb2 +; CHECK-NEXT: movi.2s v1, #1 +; CHECK-NEXT: and.8b v0, v0, v1 ; CHECK-NEXT: ret %1 = icmp eq <2 x i32> %a, %a br label %bb2 @@ -55,9 +57,10 @@ define <4 x i32> @icmp_constfold_v4i32(<4 x i32> %a) { ; CHECK-LABEL: icmp_constfold_v4i32: ; CHECK: ; %bb.0: -; CHECK-NEXT: movi.4h v0, #1 +; CHECK-NEXT: movi.2d v0, #0xffffffffffffffff ; CHECK-NEXT: ; %bb.1: ; %bb2 -; CHECK-NEXT: and.8b v0, v0, v0 +; CHECK-NEXT: movi.4h v1, #1 +; CHECK-NEXT: and.8b v0, v0, v1 ; CHECK-NEXT: ushll.4s v0, v0, #0 ; CHECK-NEXT: ret %1 = icmp eq <4 x i32> %a, %a @@ -85,8 +88,10 @@ define <16 x i8> @icmp_constfold_v16i8(<16 x i8> %a) { ; CHECK-LABEL: icmp_constfold_v16i8: ; CHECK: ; %bb.0: -; CHECK-NEXT: movi.16b v0, #1 -; CHECK-NEXT: and.16b v0, v0, v0 +; CHECK-NEXT: movi.2d v0, #0xffffffffffffffff +; CHECK-NEXT: ; %bb.1: ; %bb2 +; CHECK-NEXT: movi.16b v1, #1 +; CHECK-NEXT: and.16b v0, v0, v1 ; CHECK-NEXT: ret %1 = icmp eq <16 x i8> %a, %a br label %bb2 diff --git a/llvm/test/CodeGen/AArch64/funnel-shift.ll b/llvm/test/CodeGen/AArch64/funnel-shift.ll --- a/llvm/test/CodeGen/AArch64/funnel-shift.ll +++ b/llvm/test/CodeGen/AArch64/funnel-shift.ll @@ -93,7 +93,7 @@ define i7 @fshl_i7_const_fold() { ; CHECK-LABEL: fshl_i7_const_fold: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w0, #67 +; CHECK-NEXT: mov w0, #-61 ; CHECK-NEXT: ret %f = call i7 @llvm.fshl.i7(i7 112, i7 127, i7 2) ret i7 %f @@ -102,7 +102,7 @@ define i8 @fshl_i8_const_fold_overshift_1() { ; CHECK-LABEL: fshl_i8_const_fold_overshift_1: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w0, #128 +; CHECK-NEXT: mov w0, #-128 ; CHECK-NEXT: ret %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 15) ret i8 %f @@ -164,7 +164,7 @@ define i8 @fshl_i8_const_fold() { ; CHECK-LABEL: fshl_i8_const_fold: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w0, #128 +; CHECK-NEXT: mov w0, #-128 ; CHECK-NEXT: ret %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 7) ret i8 %f @@ -241,7 +241,7 @@ define i8 @fshr_i8_const_fold_overshift_1() { ; CHECK-LABEL: fshr_i8_const_fold_overshift_1: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w0, #254 +; CHECK-NEXT: mov w0, #-2 ; CHECK-NEXT: ret %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 15) ret i8 %f @@ -250,7 +250,7 @@ define i8 @fshr_i8_const_fold_overshift_2() { ; CHECK-LABEL: fshr_i8_const_fold_overshift_2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w0, #225 +; CHECK-NEXT: mov w0, #-31 ; CHECK-NEXT: ret %f = call i8 @llvm.fshr.i8(i8 15, i8 15, i8 11) ret i8 %f @@ -259,7 +259,7 @@ define i8 @fshr_i8_const_fold_overshift_3() { ; CHECK-LABEL: fshr_i8_const_fold_overshift_3: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w0, #255 +; CHECK-NEXT: mov w0, #-1 ; CHECK-NEXT: ret %f = call i8 @llvm.fshr.i8(i8 0, i8 255, i8 8) ret i8 %f @@ -303,7 +303,7 @@ define i8 @fshr_i8_const_fold() { ; CHECK-LABEL: fshr_i8_const_fold: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w0, #254 +; CHECK-NEXT: mov w0, #-2 ; CHECK-NEXT: ret %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 7) ret i8 %f diff --git a/llvm/test/CodeGen/AArch64/reduce-and.ll b/llvm/test/CodeGen/AArch64/reduce-and.ll --- a/llvm/test/CodeGen/AArch64/reduce-and.ll +++ b/llvm/test/CodeGen/AArch64/reduce-and.ll @@ -223,8 +223,7 @@ ; CHECK-LABEL: test_redand_v3i8: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, w1 -; CHECK-NEXT: and w8, w8, w2 -; CHECK-NEXT: and w0, w8, #0xff +; CHECK-NEXT: and w0, w8, w2 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redand_v3i8: diff --git a/llvm/test/CodeGen/AArch64/redundant-copy-elim-empty-mbb.ll b/llvm/test/CodeGen/AArch64/redundant-copy-elim-empty-mbb.ll --- a/llvm/test/CodeGen/AArch64/redundant-copy-elim-empty-mbb.ll +++ b/llvm/test/CodeGen/AArch64/redundant-copy-elim-empty-mbb.ll @@ -9,7 +9,7 @@ ; CHECK-LABEL: foo: ; CHECK: tbz -; CHECK: mov{{.*}}, #1 +; CHECK: mov{{.*}}, #-1 ; CHECK: ret ; CHECK: bl bar ; CHECK: cbnz diff --git a/llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll b/llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll --- a/llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll +++ b/llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll @@ -177,7 +177,7 @@ ; CHECK-NEXT: bl consume ; CHECK-NEXT: b .LBB8_3 ; CHECK-NEXT: .LBB8_2: -; CHECK-NEXT: mov w19, #1 +; CHECK-NEXT: mov w19, #-1 ; CHECK-NEXT: .LBB8_3: // %common.ret ; CHECK-NEXT: and w0, w19, #0x1 ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll --- a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll +++ b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll @@ -119,8 +119,7 @@ define @sve_splat_8xi8_imm() { ; CHECK-LABEL: sve_splat_8xi8_imm: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #255 -; CHECK-NEXT: mov z0.h, w8 +; CHECK-NEXT: mov z0.h, #-1 // =0xffffffffffffffff ; CHECK-NEXT: ret %ins = insertelement undef, i8 -1, i32 0 %splat = shufflevector %ins, undef, zeroinitializer @@ -151,8 +150,7 @@ define @sve_splat_4xi16_imm() { ; CHECK-LABEL: sve_splat_4xi16_imm: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #65535 -; CHECK-NEXT: mov z0.s, w8 +; CHECK-NEXT: mov z0.s, #-1 // =0xffffffffffffffff ; CHECK-NEXT: ret %ins = insertelement undef, i16 -1, i32 0 %splat = shufflevector %ins, undef, zeroinitializer @@ -173,8 +171,7 @@ define @sve_splat_2xi32_imm() { ; CHECK-LABEL: sve_splat_2xi32_imm: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-1 -; CHECK-NEXT: mov z0.d, x8 +; CHECK-NEXT: mov z0.d, #-1 // =0xffffffffffffffff ; CHECK-NEXT: ret %ins = insertelement undef, i32 -1, i32 0 %splat = shufflevector %ins, undef, zeroinitializer diff --git a/llvm/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask.ll b/llvm/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask.ll --- a/llvm/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask.ll +++ b/llvm/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask.ll @@ -29,11 +29,7 @@ define <2 x i8> @out_v2i8(<2 x i8> %x, <2 x i8> %y, <2 x i8> %mask) nounwind { ; CHECK-LABEL: out_v2i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d3, #0x0000ff000000ff -; CHECK-NEXT: and v0.8b, v0.8b, v2.8b -; CHECK-NEXT: eor v2.8b, v2.8b, v3.8b -; CHECK-NEXT: and v1.8b, v1.8b, v2.8b -; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b ; CHECK-NEXT: ret %mx = and <2 x i8> %x, %mask %notmask = xor <2 x i8> %mask, @@ -61,11 +57,7 @@ define <4 x i8> @out_v4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind { ; CHECK-LABEL: out_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d3, #0xff00ff00ff00ff -; CHECK-NEXT: and v0.8b, v0.8b, v2.8b -; CHECK-NEXT: eor v2.8b, v2.8b, v3.8b -; CHECK-NEXT: and v1.8b, v1.8b, v2.8b -; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b ; CHECK-NEXT: ret %mx = and <4 x i8> %x, %mask %notmask = xor <4 x i8> %mask, @@ -77,11 +69,7 @@ define <4 x i8> @out_v4i8_undef(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind { ; CHECK-LABEL: out_v4i8_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d3, #0xff00ff00ff00ff -; CHECK-NEXT: and v0.8b, v0.8b, v2.8b -; CHECK-NEXT: eor v2.8b, v2.8b, v3.8b -; CHECK-NEXT: and v1.8b, v1.8b, v2.8b -; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b ; CHECK-NEXT: ret %mx = and <4 x i8> %x, %mask %notmask = xor <4 x i8> %mask, @@ -93,11 +81,7 @@ define <2 x i16> @out_v2i16(<2 x i16> %x, <2 x i16> %y, <2 x i16> %mask) nounwind { ; CHECK-LABEL: out_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d3, #0x00ffff0000ffff -; CHECK-NEXT: and v0.8b, v0.8b, v2.8b -; CHECK-NEXT: eor v2.8b, v2.8b, v3.8b -; CHECK-NEXT: and v1.8b, v1.8b, v2.8b -; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b ; CHECK-NEXT: ret %mx = and <2 x i16> %x, %mask %notmask = xor <2 x i16> %mask, diff --git a/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll --- a/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll @@ -86,8 +86,7 @@ ; CHECK-LABEL: test_v3i8: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, w1 -; CHECK-NEXT: and w8, w8, w2 -; CHECK-NEXT: and w0, w8, #0xff +; CHECK-NEXT: and w0, w8, w2 ; CHECK-NEXT: ret %b = call i8 @llvm.vector.reduce.and.v3i8(<3 x i8> %a) ret i8 %b