Index: llvm/include/llvm/CodeGen/TargetLowering.h =================================================================== --- llvm/include/llvm/CodeGen/TargetLowering.h +++ llvm/include/llvm/CodeGen/TargetLowering.h @@ -2645,9 +2645,9 @@ getApproximateEVTForLLT(ToTy, DL, Ctx)); } - /// Return true if sign-extension from FromTy to ToTy is cheaper than - /// zero-extension. - virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const { + /// Return true if sign-extension of value \p V from FromTy to ToTy is + /// cheaper than zero-extension, where \p V can be SDValue() if unknown. + virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy, SDValue V) const { return false; } Index: llvm/lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- llvm/lib/CodeGen/CodeGenPrepare.cpp +++ llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -7004,7 +7004,7 @@ // matching the argument extension instead. Instruction::CastOps ExtType = Instruction::ZExt; // Some targets prefer SExt over ZExt. - if (TLI->isSExtCheaperThanZExt(OldVT, RegType)) + if (TLI->isSExtCheaperThanZExt(OldVT, RegType, SDValue())) ExtType = Instruction::SExt; if (auto *Arg = dyn_cast(Cond)) { Index: llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1704,7 +1704,7 @@ SDValue OpL = GetPromotedInteger(LHS); SDValue OpR = GetPromotedInteger(RHS); - if (TLI.isSExtCheaperThanZExt(LHS.getValueType(), OpL.getValueType())) { + if (TLI.isSExtCheaperThanZExt(LHS.getValueType(), OpL.getValueType(), LHS)) { // The target would prefer to promote the comparison operand with sign // extension. Honor that unless the promoted values are already zero // extended. Index: llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -283,7 +283,7 @@ EVT OldVT = Op.getValueType(); SDLoc DL(Op); Op = GetPromotedInteger(Op); - if (TLI.isSExtCheaperThanZExt(OldVT, Op.getValueType())) + if (TLI.isSExtCheaperThanZExt(OldVT, Op.getValueType(), Op)) return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), Op, DAG.getValueType(OldVT)); return DAG.getZeroExtendInReg(Op, DL, OldVT); Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4765,7 +4765,7 @@ C->isTargetOpcode(), C->isOpaque()); case ISD::ANY_EXTEND: // Some targets like RISCV prefer to sign extend some types. - if (TLI->isSExtCheaperThanZExt(Operand.getValueType(), VT)) + if (TLI->isSExtCheaperThanZExt(Operand.getValueType(), VT, Operand)) return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), DL, VT, C->isTargetOpcode(), C->isOpaque()); return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT, Index: llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -3844,7 +3844,7 @@ } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && (Cond == ISD::SETEQ || Cond == ISD::SETNE) && !isSExtCheaperThanZExt(cast(N0.getOperand(1))->getVT(), - OpVT)) { + OpVT, N0.getOperand(1))) { EVT ExtSrcTy = cast(N0.getOperand(1))->getVT(); unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits(); EVT ExtDstTy = N0.getValueType(); Index: llvm/lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -1138,6 +1138,14 @@ bool isConstantUnsignedBitfieldExtractLegal(unsigned Opc, LLT Ty1, LLT Ty2) const override; + + bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT, SDValue V) const override { + if (!V) + return false; + if (ConstantSDNode *C = isConstOrConstSplat(V)) + return C->getAPIntValue().isNegative(); + return false; + } }; namespace AArch64 { Index: llvm/lib/Target/RISCV/RISCVISelLowering.h =================================================================== --- llvm/lib/Target/RISCV/RISCVISelLowering.h +++ llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -319,7 +319,7 @@ bool isTruncateFree(Type *SrcTy, Type *DstTy) const override; bool isTruncateFree(EVT SrcVT, EVT DstVT) const override; bool isZExtFree(SDValue Val, EVT VT2) const override; - bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override; + bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT, SDValue V) const override; bool isCheapToSpeculateCttz() const override; bool isCheapToSpeculateCtlz() const override; bool hasAndNotCompare(SDValue Y) const override; Index: llvm/lib/Target/RISCV/RISCVISelLowering.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1188,7 +1188,8 @@ return TargetLowering::isZExtFree(Val, VT2); } -bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const { +bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT, + SDValue V) const { return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64; } Index: llvm/test/CodeGen/AArch64/arm64-vshuffle.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-vshuffle.ll +++ llvm/test/CodeGen/AArch64/arm64-vshuffle.ll @@ -14,18 +14,8 @@ ret <8 x i1> %Shuff } -; CHECK: lCPI1_0: -; CHECK: .byte 0 ; 0x0 -; CHECK: .byte 0 ; 0x0 -; CHECK: .byte 0 ; 0x0 -; CHECK: .byte 0 ; 0x0 -; CHECK: .byte 1 ; 0x1 -; CHECK: .byte 0 ; 0x0 -; CHECK: .byte 0 ; 0x0 -; CHECK: .byte 0 ; 0x0 ; CHECK: test2 -; CHECK: adrp x[[REG2:[0-9]+]], lCPI1_0@PAGE -; CHECK: ldr d[[REG1:[0-9]+]], [x[[REG2]], lCPI1_0@PAGEOFF] +; CHECK: movi d{{[0-9]+}}, #0x0000ff00000000 define <8 x i1>@test2() { bb: %Shuff = shufflevector <8 x i1> zeroinitializer, @@ -36,7 +26,7 @@ } ; CHECK: test3 -; CHECK: movi.4s v{{[0-9]+}}, #1 +; CHECK: movi.2d v{{[0-9]+}}, #0x0000ff000000ff define <16 x i1> @test3(i1* %ptr, i32 %v) { bb: %Shuff = shufflevector <16 x i1> , <16 x i1> undef, @@ -45,11 +35,13 @@ i32 14, i32 0> ret <16 x i1> %Shuff } + + ; CHECK: lCPI3_0: ; CHECK: .byte 0 ; 0x0 ; CHECK: .byte 0 ; 0x0 ; CHECK: .byte 0 ; 0x0 -; CHECK: .byte 1 ; 0x1 +; CHECK: .byte 255 ; 0xff ; CHECK: .byte 0 ; 0x0 ; CHECK: .byte 0 ; 0x0 ; CHECK: .byte 0 ; 0x0 Index: llvm/test/CodeGen/AArch64/arm64_32-atomics.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64_32-atomics.ll +++ llvm/test/CodeGen/AArch64/arm64_32-atomics.ll @@ -249,7 +249,7 @@ ; CHECK: stlxr [[SUCCESS:w[0-9]+]], w2, [x0] ; CHECK: cbnz [[SUCCESS]], [[LOOP]] -; CHECK: mov w1, #1 +; CHECK: mov w1, #-1 ; CHECK: mov w0, [[OLD]] ; CHECK: ret Index: llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll =================================================================== --- llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll +++ llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll @@ -15,7 +15,7 @@ ; CHECK-NEXT: stlxr w8, w2, [x0] ; CHECK-NEXT: cbnz w8, LBB0_1 ; CHECK-NEXT: ; %bb.3: -; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: mov w0, #-1 ; CHECK-NEXT: ret ; CHECK-NEXT: LBB0_4: ; %cmpxchg.nostore ; CHECK-NEXT: mov w0, wzr @@ -64,7 +64,7 @@ ; CHECK-NEXT: stlxrb w9, w2, [x0] ; CHECK-NEXT: cbnz w9, LBB1_1 ; CHECK-NEXT: ; %bb.3: -; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: mov w8, #-1 ; CHECK-NEXT: eor w0, w8, #0x1 ; CHECK-NEXT: ret ; CHECK-NEXT: LBB1_4: ; %cmpxchg.nostore @@ -188,7 +188,7 @@ ; CHECK-NEXT: stlxr w8, w20, [x19] ; CHECK-NEXT: cbnz w8, LBB3_1 ; CHECK-NEXT: ; %bb.3: -; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: mov w8, #-1 ; CHECK-NEXT: b LBB3_5 ; CHECK-NEXT: LBB3_4: ; %cmpxchg.nostore ; CHECK-NEXT: mov w8, wzr Index: llvm/test/CodeGen/AArch64/dag-numsignbits.ll =================================================================== --- llvm/test/CodeGen/AArch64/dag-numsignbits.ll +++ llvm/test/CodeGen/AArch64/dag-numsignbits.ll @@ -8,18 +8,13 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI0_0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov w1, wzr +; CHECK-NEXT: movi v2.4h, #1 ; CHECK-NEXT: dup v0.4h, v0.h[0] +; CHECK-NEXT: mov w1, wzr ; CHECK-NEXT: mov w2, wzr ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI0_0] -; CHECK-NEXT: adrp x8, .LCPI0_1 ; CHECK-NEXT: add v0.4h, v0.4h, v1.4h -; CHECK-NEXT: movi v1.4h, #1 -; CHECK-NEXT: cmgt v0.4h, v1.4h, v0.4h -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI0_1] -; CHECK-NEXT: and v0.8b, v0.8b, v1.8b -; CHECK-NEXT: shl v0.4h, v0.4h, #15 -; CHECK-NEXT: cmlt v0.4h, v0.4h, #0 +; CHECK-NEXT: cmgt v0.4h, v2.4h, v0.4h ; CHECK-NEXT: umov w0, v0.h[0] ; CHECK-NEXT: umov w3, v0.h[3] ; CHECK-NEXT: b foo Index: llvm/test/CodeGen/AArch64/fast-isel-cmp-vec.ll =================================================================== --- llvm/test/CodeGen/AArch64/fast-isel-cmp-vec.ll +++ llvm/test/CodeGen/AArch64/fast-isel-cmp-vec.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -verify-machineinstrs \ ; RUN: -aarch64-enable-atomic-cfg-tidy=0 -disable-cgp -disable-branch-fold \ ; RUN: < %s | FileCheck %s @@ -8,12 +9,12 @@ define <2 x i32> @icmp_v2i32(<2 x i32> %a) { ; CHECK-LABEL: icmp_v2i32: -; CHECK: ; %bb.0: -; CHECK-NEXT: cmeq.2s [[CMP:v[0-9]+]], v0, #0 -; CHECK-NEXT: ; %bb.1: -; CHECK-NEXT: movi.2s [[MASK:v[0-9]+]], #1 -; CHECK-NEXT: and.8b v0, [[CMP]], [[MASK]] -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: cmeq.2s v0, v0, #0 +; CHECK-NEXT: ; %bb.1: ; %bb2 +; CHECK-NEXT: movi.2s v1, #1 +; CHECK-NEXT: and.8b v0, v0, v1 +; CHECK-NEXT: ret %c = icmp eq <2 x i32> %a, zeroinitializer br label %bb2 bb2: @@ -23,10 +24,12 @@ define <2 x i32> @icmp_constfold_v2i32(<2 x i32> %a) { ; CHECK-LABEL: icmp_constfold_v2i32: -; CHECK: ; %bb.0: -; CHECK-NEXT: movi.2s [[MASK:v[0-9]+]], #1 -; CHECK-NEXT: and.8b v0, [[MASK]], [[MASK]] -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: movi.2d v0, #0xffffffffffffffff +; CHECK-NEXT: ; %bb.1: ; %bb2 +; CHECK-NEXT: movi.2s v1, #1 +; CHECK-NEXT: and.8b v0, v0, v1 +; CHECK-NEXT: ret %1 = icmp eq <2 x i32> %a, %a br label %bb2 bb2: @@ -36,14 +39,14 @@ define <4 x i32> @icmp_v4i32(<4 x i32> %a) { ; CHECK-LABEL: icmp_v4i32: -; CHECK: ; %bb.0: -; CHECK-NEXT: cmeq.4s [[CMP:v[0-9]+]], v0, #0 -; CHECK-NEXT: xtn.4h [[CMPV4I16:v[0-9]+]], [[CMP]] -; CHECK-NEXT: ; %bb.1: -; CHECK-NEXT: movi.4h [[MASK:v[0-9]+]], #1 -; CHECK-NEXT: and.8b [[ZEXT:v[0-9]+]], [[CMPV4I16]], [[MASK]] -; CHECK-NEXT: ushll.4s v0, [[ZEXT]], #0 -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: cmeq.4s v0, v0, #0 +; CHECK-NEXT: xtn.4h v0, v0 +; CHECK-NEXT: ; %bb.1: ; %bb2 +; CHECK-NEXT: movi.4h v1, #1 +; CHECK-NEXT: and.8b v0, v0, v1 +; CHECK-NEXT: ushll.4s v0, v0, #0 +; CHECK-NEXT: ret %c = icmp eq <4 x i32> %a, zeroinitializer br label %bb2 bb2: @@ -53,12 +56,13 @@ define <4 x i32> @icmp_constfold_v4i32(<4 x i32> %a) { ; CHECK-LABEL: icmp_constfold_v4i32: -; CHECK: ; %bb.0: -; CHECK-NEXT: movi.4h [[MASK:v[0-9]+]], #1 -; CHECK-NEXT: ; %bb.1: -; CHECK-NEXT: and.8b [[ZEXT:v[0-9]+]], [[MASK]], [[MASK]] -; CHECK-NEXT: ushll.4s v0, [[ZEXT]], #0 -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: movi.2d v0, #0xffffffffffffffff +; CHECK-NEXT: ; %bb.1: ; %bb2 +; CHECK-NEXT: movi.4h v1, #1 +; CHECK-NEXT: and.8b v0, v0, v1 +; CHECK-NEXT: ushll.4s v0, v0, #0 +; CHECK-NEXT: ret %1 = icmp eq <4 x i32> %a, %a br label %bb2 bb2: @@ -68,12 +72,12 @@ define <16 x i8> @icmp_v16i8(<16 x i8> %a) { ; CHECK-LABEL: icmp_v16i8: -; CHECK: ; %bb.0: -; CHECK-NEXT: cmeq.16b [[CMP:v[0-9]+]], v0, #0 -; CHECK-NEXT: ; %bb.1: -; CHECK-NEXT: movi.16b [[MASK:v[0-9]+]], #1 -; CHECK-NEXT: and.16b v0, [[CMP]], [[MASK]] -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: cmeq.16b v0, v0, #0 +; CHECK-NEXT: ; %bb.1: ; %bb2 +; CHECK-NEXT: movi.16b v1, #1 +; CHECK-NEXT: and.16b v0, v0, v1 +; CHECK-NEXT: ret %c = icmp eq <16 x i8> %a, zeroinitializer br label %bb2 bb2: @@ -83,10 +87,12 @@ define <16 x i8> @icmp_constfold_v16i8(<16 x i8> %a) { ; CHECK-LABEL: icmp_constfold_v16i8: -; CHECK: ; %bb.0: -; CHECK-NEXT: movi.16b [[MASK:v[0-9]+]], #1 -; CHECK-NEXT: and.16b v0, [[MASK]], [[MASK]] -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: movi.2d v0, #0xffffffffffffffff +; CHECK-NEXT: ; %bb.1: ; %bb2 +; CHECK-NEXT: movi.16b v1, #1 +; CHECK-NEXT: and.16b v0, v0, v1 +; CHECK-NEXT: ret %1 = icmp eq <16 x i8> %a, %a br label %bb2 bb2: Index: llvm/test/CodeGen/AArch64/funnel-shift.ll =================================================================== --- llvm/test/CodeGen/AArch64/funnel-shift.ll +++ llvm/test/CodeGen/AArch64/funnel-shift.ll @@ -93,7 +93,7 @@ define i7 @fshl_i7_const_fold() { ; CHECK-LABEL: fshl_i7_const_fold: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w0, #67 +; CHECK-NEXT: mov w0, #-61 ; CHECK-NEXT: ret %f = call i7 @llvm.fshl.i7(i7 112, i7 127, i7 2) ret i7 %f @@ -102,7 +102,7 @@ define i8 @fshl_i8_const_fold_overshift_1() { ; CHECK-LABEL: fshl_i8_const_fold_overshift_1: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w0, #128 +; CHECK-NEXT: mov w0, #-128 ; CHECK-NEXT: ret %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 15) ret i8 %f @@ -164,7 +164,7 @@ define i8 @fshl_i8_const_fold() { ; CHECK-LABEL: fshl_i8_const_fold: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w0, #128 +; CHECK-NEXT: mov w0, #-128 ; CHECK-NEXT: ret %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 7) ret i8 %f @@ -241,7 +241,7 @@ define i8 @fshr_i8_const_fold_overshift_1() { ; CHECK-LABEL: fshr_i8_const_fold_overshift_1: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w0, #254 +; CHECK-NEXT: mov w0, #-2 ; CHECK-NEXT: ret %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 15) ret i8 %f @@ -250,7 +250,7 @@ define i8 @fshr_i8_const_fold_overshift_2() { ; CHECK-LABEL: fshr_i8_const_fold_overshift_2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w0, #225 +; CHECK-NEXT: mov w0, #-31 ; CHECK-NEXT: ret %f = call i8 @llvm.fshr.i8(i8 15, i8 15, i8 11) ret i8 %f @@ -259,7 +259,7 @@ define i8 @fshr_i8_const_fold_overshift_3() { ; CHECK-LABEL: fshr_i8_const_fold_overshift_3: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w0, #255 +; CHECK-NEXT: mov w0, #-1 ; CHECK-NEXT: ret %f = call i8 @llvm.fshr.i8(i8 0, i8 255, i8 8) ret i8 %f @@ -303,7 +303,7 @@ define i8 @fshr_i8_const_fold() { ; CHECK-LABEL: fshr_i8_const_fold: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w0, #254 +; CHECK-NEXT: mov w0, #-2 ; CHECK-NEXT: ret %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 7) ret i8 %f Index: llvm/test/CodeGen/AArch64/reduce-and.ll =================================================================== --- llvm/test/CodeGen/AArch64/reduce-and.ll +++ llvm/test/CodeGen/AArch64/reduce-and.ll @@ -223,8 +223,7 @@ ; CHECK-LABEL: test_redand_v3i8: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, w1 -; CHECK-NEXT: and w8, w8, w2 -; CHECK-NEXT: and w0, w8, #0xff +; CHECK-NEXT: and w0, w8, w2 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redand_v3i8: Index: llvm/test/CodeGen/AArch64/redundant-copy-elim-empty-mbb.ll =================================================================== --- llvm/test/CodeGen/AArch64/redundant-copy-elim-empty-mbb.ll +++ llvm/test/CodeGen/AArch64/redundant-copy-elim-empty-mbb.ll @@ -9,7 +9,7 @@ ; CHECK-LABEL: foo: ; CHECK: tbz -; CHECK: mov{{.*}}, #1 +; CHECK: mov{{.*}}, #-1 ; CHECK: ret ; CHECK: bl bar ; CHECK: cbnz Index: llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll =================================================================== --- llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll +++ llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll @@ -177,7 +177,7 @@ ; CHECK-NEXT: bl consume ; CHECK-NEXT: b .LBB8_3 ; CHECK-NEXT: .LBB8_2: -; CHECK-NEXT: mov w19, #1 +; CHECK-NEXT: mov w19, #-1 ; CHECK-NEXT: .LBB8_3: // %common.ret ; CHECK-NEXT: and w0, w19, #0x1 ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload Index: llvm/test/CodeGen/AArch64/sve-vector-splat.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-vector-splat.ll +++ llvm/test/CodeGen/AArch64/sve-vector-splat.ll @@ -119,8 +119,7 @@ define @sve_splat_8xi8_imm() { ; CHECK-LABEL: sve_splat_8xi8_imm: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #255 -; CHECK-NEXT: mov z0.h, w8 +; CHECK-NEXT: mov z0.h, #-1 // =0xffffffffffffffff ; CHECK-NEXT: ret %ins = insertelement undef, i8 -1, i32 0 %splat = shufflevector %ins, undef, zeroinitializer @@ -151,8 +150,7 @@ define @sve_splat_4xi16_imm() { ; CHECK-LABEL: sve_splat_4xi16_imm: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #65535 -; CHECK-NEXT: mov z0.s, w8 +; CHECK-NEXT: mov z0.s, #-1 // =0xffffffffffffffff ; CHECK-NEXT: ret %ins = insertelement undef, i16 -1, i32 0 %splat = shufflevector %ins, undef, zeroinitializer @@ -173,8 +171,7 @@ define @sve_splat_2xi32_imm() { ; CHECK-LABEL: sve_splat_2xi32_imm: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-1 -; CHECK-NEXT: mov z0.d, x8 +; CHECK-NEXT: mov z0.d, #-1 // =0xffffffffffffffff ; CHECK-NEXT: ret %ins = insertelement undef, i32 -1, i32 0 %splat = shufflevector %ins, undef, zeroinitializer Index: llvm/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask.ll =================================================================== --- llvm/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask.ll +++ llvm/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask.ll @@ -29,11 +29,7 @@ define <2 x i8> @out_v2i8(<2 x i8> %x, <2 x i8> %y, <2 x i8> %mask) nounwind { ; CHECK-LABEL: out_v2i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d3, #0x0000ff000000ff -; CHECK-NEXT: and v0.8b, v0.8b, v2.8b -; CHECK-NEXT: eor v2.8b, v2.8b, v3.8b -; CHECK-NEXT: and v1.8b, v1.8b, v2.8b -; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b ; CHECK-NEXT: ret %mx = and <2 x i8> %x, %mask %notmask = xor <2 x i8> %mask, @@ -61,11 +57,7 @@ define <4 x i8> @out_v4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind { ; CHECK-LABEL: out_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d3, #0xff00ff00ff00ff -; CHECK-NEXT: and v0.8b, v0.8b, v2.8b -; CHECK-NEXT: eor v2.8b, v2.8b, v3.8b -; CHECK-NEXT: and v1.8b, v1.8b, v2.8b -; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b ; CHECK-NEXT: ret %mx = and <4 x i8> %x, %mask %notmask = xor <4 x i8> %mask, @@ -77,11 +69,7 @@ define <4 x i8> @out_v4i8_undef(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind { ; CHECK-LABEL: out_v4i8_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d3, #0xff00ff00ff00ff -; CHECK-NEXT: and v0.8b, v0.8b, v2.8b -; CHECK-NEXT: eor v2.8b, v2.8b, v3.8b -; CHECK-NEXT: and v1.8b, v1.8b, v2.8b -; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b ; CHECK-NEXT: ret %mx = and <4 x i8> %x, %mask %notmask = xor <4 x i8> %mask, @@ -93,11 +81,7 @@ define <2 x i16> @out_v2i16(<2 x i16> %x, <2 x i16> %y, <2 x i16> %mask) nounwind { ; CHECK-LABEL: out_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d3, #0x00ffff0000ffff -; CHECK-NEXT: and v0.8b, v0.8b, v2.8b -; CHECK-NEXT: eor v2.8b, v2.8b, v3.8b -; CHECK-NEXT: and v1.8b, v1.8b, v2.8b -; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b ; CHECK-NEXT: ret %mx = and <2 x i16> %x, %mask %notmask = xor <2 x i16> %mask, Index: llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll =================================================================== --- llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll +++ llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll @@ -86,8 +86,7 @@ ; CHECK-LABEL: test_v3i8: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, w1 -; CHECK-NEXT: and w8, w8, w2 -; CHECK-NEXT: and w0, w8, #0xff +; CHECK-NEXT: and w0, w8, w2 ; CHECK-NEXT: ret %b = call i8 @llvm.vector.reduce.and.v3i8(<3 x i8> %a) ret i8 %b