Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -6244,6 +6244,11 @@ return false; } +static bool resolveBuildVector(SDNode *Op, APInt &CnstBits, APInt &UndefBits) { + BuildVectorSDNode *BVN = cast(Op); + return (resolveBuildVector(BVN, CnstBits, UndefBits)); +} + // Try 64-bit splatted SIMD immediate. static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits) { @@ -6339,8 +6344,8 @@ if (LHS) Mov = DAG.getNode(NewOp, dl, MovTy, *LHS, - DAG.getConstant(Value, dl, MVT::i32), - DAG.getConstant(Shift, dl, MVT::i32)); + DAG.getConstant(Value, dl, MVT::i32), + DAG.getConstant(Shift, dl, MVT::i32)); else Mov = DAG.getNode(NewOp, dl, MovTy, DAG.getConstant(Value, dl, MVT::i32), @@ -6584,9 +6589,9 @@ return Res; } - SDValue LHS = Op.getOperand(0); EVT VT = Op.getValueType(); + SDValue LHS = Op.getOperand(0); BuildVectorSDNode *BVN = dyn_cast(Op.getOperand(1).getNode()); if (!BVN) { @@ -6607,14 +6612,12 @@ (NewOp = tryAdvSIMDModImm16(AArch64ISD::ORRi, Op, DAG, DefBits, &LHS))) return NewOp; - else { - DefBits = UndefBits; - if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::ORRi, Op, DAG, - DefBits, &LHS)) || - (NewOp = tryAdvSIMDModImm16(AArch64ISD::ORRi, Op, DAG, - DefBits, &LHS))) - return NewOp; - } + + if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::ORRi, Op, DAG, + UndefBits, &LHS)) || + (NewOp = tryAdvSIMDModImm16(AArch64ISD::ORRi, Op, DAG, + UndefBits, &LHS))) + return NewOp; } // We can always fall back to a non-immediate OR. @@ -6645,25 +6648,12 @@ return DAG.getBuildVector(VT, dl, Ops); } -SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, - SelectionDAG &DAG) const { - SDLoc dl(Op); +static SDValue ConstantBuildVector(SDValue Op, SelectionDAG &DAG) { EVT VT = Op.getValueType(); - Op = NormalizeBuildVector(Op, DAG); - BuildVectorSDNode *BVN = cast(Op.getNode()); APInt DefBits(VT.getSizeInBits(), 0); APInt UndefBits(VT.getSizeInBits(), 0); - if (resolveBuildVector(BVN, DefBits, UndefBits)) { - // Certain magic vector constants (used to express things like NOT - // and NEG) are passed through unmodified. This allows codegen patterns - // for these operations to match. Special-purpose patterns will lower - // these immediates to MOVI if it proves necessary. - uint64_t DefVal = DefBits.zextOrTrunc(64).getZExtValue(); - if (DefBits.getHiBits(64) == DefBits.getLoBits(64) && - VT.isInteger() && (DefVal == 0 || DefVal == UINT64_MAX)) - return Op; - + if (resolveBuildVector(Op.getNode(), DefBits, UndefBits)) { SDValue NewOp; if ((NewOp = tryAdvSIMDModImm64(AArch64ISD::MOVIedit, Op, DAG, DefBits)) || (NewOp = tryAdvSIMDModImm32(AArch64ISD::MOVIshift, Op, DAG, DefBits)) || @@ -6695,6 +6685,35 @@ return NewOp; } + return SDValue(); +} + +SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, + SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + unsigned NumElts = VT.getVectorNumElements(); + + // Try to build a simple constant vector. + Op = NormalizeBuildVector(Op, DAG); + if (VT.isInteger()) { + // Certain vector constants, used to express things like logical NOT and + // arithmetic NEG, are passed through unmodified. This allows special + // patterns for these operations to match, which will lower these constants + // to whatever is proven necessary. + BuildVectorSDNode *BVN = cast(Op.getNode()); + if (BVN->isConstant()) + if (ConstantSDNode *Const = BVN->getConstantSplatNode()) { + unsigned BitSize = VT.getVectorElementType().getSizeInBits(); + APInt Val(BitSize, + Const->getAPIntValue().zextOrTrunc(BitSize).getZExtValue()); + if (Val.isNullValue() || Val.isAllOnesValue()) + return Op; + } + } + + if (SDValue V = ConstantBuildVector(Op, DAG)) + return V; + // Scan through the operands to find some interesting properties we can // exploit: // 1) If only one value is used, we can use a DUP, or @@ -6706,7 +6725,7 @@ // select the values we'll be overwriting for the non-constant // lanes such that we can directly materialize the vector // some other way (MOVI, e.g.), we can be sneaky. - unsigned NumElts = VT.getVectorNumElements(); + SDLoc dl(Op); bool isOnlyLowElement = true; bool usesOnlyOneValue = true; bool usesOnlyOneConstantValue = true; @@ -6799,16 +6818,23 @@ // is better than the default, which will perform a separate initialization // for each lane. if (NumConstantLanes > 0 && usesOnlyOneConstantValue) { - SDValue Val = DAG.getNode(AArch64ISD::DUP, dl, VT, ConstantValue); + // Firstly, try to materialize the splat constant. + SDValue Vec = DAG.getSplatBuildVector(VT, dl, ConstantValue), + Val = ConstantBuildVector(Vec, DAG); + if (!Val) { + // Otherwise, materialize the constant and splat it. + Val = DAG.getNode(AArch64ISD::DUP, dl, VT, ConstantValue); + DAG.ReplaceAllUsesWith(Vec.getNode(), &Val); + } + // Now insert the non-constant lanes. for (unsigned i = 0; i < NumElts; ++i) { SDValue V = Op.getOperand(i); SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i64); - if (!isa(V) && !isa(V)) { + if (!isa(V) && !isa(V)) // Note that type legalization likely mucked about with the VT of the // source operand, so we may have to convert it here before inserting. Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, V, LaneIdx); - } } return Val; } Index: llvm/lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -4644,21 +4644,6 @@ [(set (v16i8 V128:$Rd), (AArch64movi imm0_255:$imm8))]>; } -// Use the more efficient MOVI instead of DUP from ZR to zero up vectors -def : Pat<(v2f32 (AArch64dup (f32 fpimm0))), (MOVIv2i32 (i32 0), (i32 0))>; - -def : Pat<(v2i32 (AArch64dup (i32 0))), (MOVIv2i32 (i32 0), (i32 0))>; -def : Pat<(v4i16 (AArch64dup (i32 0))), (MOVIv4i16 (i32 0), (i32 0))>; -def : Pat<(v8i8 (AArch64dup (i32 0))), (MOVIv8b_ns (i32 0))>; - -def : Pat<(v2f64 (AArch64dup (f64 fpimm0))), (MOVIv2d_ns (i32 0))>; -def : Pat<(v4f32 (AArch64dup (f32 fpimm0))), (MOVIv4i32 (i32 0), (i32 0))>; - -def : Pat<(v2i64 (AArch64dup (i64 0))), (MOVIv2d_ns (i32 0))>; -def : Pat<(v4i32 (AArch64dup (i32 0))), (MOVIv4i32 (i32 0), (i32 0))>; -def : Pat<(v8i16 (AArch64dup (i32 0))), (MOVIv8i16 (i32 0), (i32 0))>; -def : Pat<(v16i8 (AArch64dup (i32 0))), (MOVIv16b_ns (i32 0))>; - // AdvSIMD MVNI // EDIT per word & halfword: 2s, 4h, 4s, & 8h Index: llvm/test/CodeGen/AArch64/arm64-vector-insertion.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-vector-insertion.ll +++ llvm/test/CodeGen/AArch64/arm64-vector-insertion.ll @@ -8,7 +8,7 @@ ret void ; CHECK-LABEL: test0f - ; CHECK: movi.4s v[[TEMP:[0-9]+]], #0 + ; CHECK: movi.2d v[[TEMP:[0-9]+]], #0 ; CHECK: mov.s v[[TEMP]][0], v{{[0-9]+}}[0] ; CHECK: str q[[TEMP]], [x0] ; CHECK: ret @@ -24,9 +24,8 @@ ret void ; CHECK-LABEL: test1f - ; CHECK: fmov s[[TEMP:[0-9]+]], #1.0000000 - ; CHECK: dup.4s v[[TEMP2:[0-9]+]], v[[TEMP]][0] - ; CHECK: mov.s v[[TEMP2]][0], v0[0] - ; CHECK: str q[[TEMP2]], [x0] + ; CHECK: fmov.4s v[[TEMP:[0-9]+]], #1.0 + ; CHECK: mov.s v[[TEMP]][0], v0[0] + ; CHECK: str q[[TEMP]], [x0] ; CHECK: ret } Index: llvm/test/CodeGen/AArch64/build-one-lane.ll =================================================================== --- llvm/test/CodeGen/AArch64/build-one-lane.ll +++ llvm/test/CodeGen/AArch64/build-one-lane.ll @@ -1,174 +1,272 @@ ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s ; Check that building up a vector w/ only one non-zero lane initializes -; intelligently. +; efficiently. -define <8 x i8> @v8i8(i8 %t, i8 %s) nounwind { +define <8 x i8> @v8i8z(i8 %t, i8 %s) nounwind { %v = insertelement <8 x i8> , i8 %s, i32 7 ret <8 x i8> %v -; CHECK-LABEL: v8i8: -; CHECK: movi v[[R:[0-9]+]].8b, #0 +; CHECK-LABEL: v8i8z +; CHECK: movi d[[R:[0-9]+]], #0 ; CHECK: mov v[[R]].b[7], w{{[0-9]+}} } -define <16 x i8> @v16i8(i8 %t, i8 %s) nounwind { +define <16 x i8> @v16i8z(i8 %t, i8 %s) nounwind { %v = insertelement <16 x i8> , i8 %s, i32 15 ret <16 x i8> %v -; CHECK-LABEL: v16i8: -; CHECK: movi v[[R:[0-9]+]].16b, #0 +; CHECK-LABEL: v16i8z: +; CHECK: movi v[[R:[0-9]+]].2d, #0 ; CHECK: mov v[[R]].b[15], w{{[0-9]+}} } -define <4 x i16> @v4i16(i16 %t, i16 %s) nounwind { +define <4 x i16> @v4i16z(i16 %t, i16 %s) nounwind { %v = insertelement <4 x i16> , i16 %s, i32 3 ret <4 x i16> %v -; CHECK-LABEL: v4i16: -; CHECK: movi v[[R:[0-9]+]].4h, #0 +; CHECK-LABEL: v4i16z: +; CHECK: movi d[[R:[0-9]+]], #0 ; CHECK: mov v[[R]].h[3], w{{[0-9]+}} } -define <8 x i16> @v8i16(i16 %t, i16 %s) nounwind { +define <8 x i16> @v8i16z(i16 %t, i16 %s) nounwind { %v = insertelement <8 x i16> , i16 %s, i32 7 ret <8 x i16> %v -; CHECK-LABEL: v8i16: -; CHECK: movi v[[R:[0-9]+]].8h, #0 +; CHECK-LABEL: v8i16z: +; CHECK: movi v[[R:[0-9]+]].2d, #0 ; CHECK: mov v[[R]].h[7], w{{[0-9]+}} } -define <2 x i32> @v2i32(i32 %t, i32 %s) nounwind { +define <2 x i32> @v2i32z(i32 %t, i32 %s) nounwind { %v = insertelement <2 x i32> , i32 %s, i32 1 ret <2 x i32> %v -; CHECK-LABEL: v2i32: -; CHECK: movi v[[R:[0-9]+]].2s, #0 +; CHECK-LABEL: v2i32z: +; CHECK: movi d[[R:[0-9]+]], #0 ; CHECK: mov v[[R]].s[1], w{{[0-9]+}} } -define <4 x i32> @v4i32(i32 %t, i32 %s) nounwind { +define <4 x i32> @v4i32z(i32 %t, i32 %s) nounwind { %v = insertelement <4 x i32> , i32 %s, i32 3 ret <4 x i32> %v -; CHECK-LABEL: v4i32: -; CHECK: movi v[[R:[0-9]+]].4s, #0 +; CHECK-LABEL: v4i32z: +; CHECK: movi v[[R:[0-9]+]].2d, #0 ; CHECK: mov v[[R]].s[3], w{{[0-9]+}} } -define <2 x i64> @v2i64(i64 %t, i64 %s) nounwind { +define <2 x i64> @v2i64z(i64 %t, i64 %s) nounwind { %v = insertelement <2 x i64> , i64 %s, i32 1 ret <2 x i64> %v -; CHECK-LABEL: v2i64: +; CHECK-LABEL: v2i64z: ; CHECK: movi v[[R:[0-9]+]].2d, #0 ; CHECK: mov v[[R]].d[1], x{{[0-9]+}} } -define <2 x float> @v2f32(float %t, float %s) nounwind { +define <2 x float> @v2f32z(float %t, float %s) nounwind { %v = insertelement <2 x float> , float %s, i32 1 ret <2 x float> %v -; CHECK-LABEL: v2f32: -; CHECK: movi v[[R:[0-9]+]].2s, #0 +; CHECK-LABEL: v2f32z: +; CHECK: movi d[[R:[0-9]+]], #0 ; CHECK: mov v[[R]].s[1], v{{[0-9]+}}.s[0] } -define <4 x float> @v4f32(float %t, float %s) nounwind { +define <4 x float> @v4f32z(float %t, float %s) nounwind { %v = insertelement <4 x float> , float %s, i32 3 ret <4 x float> %v -; CHECK-LABEL: v4f32: -; CHECK: movi v[[R:[0-9]+]].4s, #0 +; CHECK-LABEL: v4f32z: +; CHECK: movi v[[R:[0-9]+]].2d, #0 ; CHECK: mov v[[R]].s[3], v{{[0-9]+}}.s[0] } -define <2 x double> @v2f64(double %t, double %s) nounwind { +define <2 x double> @v2f64z(double %t, double %s) nounwind { %v = insertelement <2 x double> , double %s, i32 1 ret <2 x double> %v -; CHECK-LABEL: v2f64: +; CHECK-LABEL: v2f64z: ; CHECK: movi v[[R:[0-9]+]].2d, #0 ; CHECK: mov v[[R]].d[1], v{{[0-9]+}}.d[0] } -define void @v8i8st(<8 x i8>* %p, <8 x i8> %s) nounwind { - store <8 x i8> , <8 x i8>* %p, align 8 +; Check that building up a vector w/ only one non-ones lane initializes +; efficiently. + +define <8 x i8> @v8i8m(i8 %t, i8 %s) nounwind { + %v = insertelement <8 x i8> , i8 %s, i32 7 + ret <8 x i8> %v + +; CHECK-LABEL: v8i8m +; CHECK: movi d{{[0-9]+}}, #0xffffffffffffffff +; CHECK: mov v[[R]].b[7], w{{[0-9]+}} +} + +define <16 x i8> @v16i8m(i8 %t, i8 %s) nounwind { + %v = insertelement <16 x i8> , i8 %s, i32 15 + ret <16 x i8> %v + +; CHECK-LABEL: v16i8m +; CHECK: movi v[[R:[0-9]+]].2d, #0xffffffffffffffff +; CHECK: mov v[[R]].b[15], w{{[0-9]+}} +} + +define <4 x i16> @v4i16m(i16 %t, i16 %s) nounwind { + %v = insertelement <4 x i16> , i16 %s, i32 3 + ret <4 x i16> %v + +; CHECK-LABEL: v4i16m +; CHECK: movi d{{[0-9]+}}, #0xffffffffffffffff +; CHECK: mov v[[R]].h[3], w{{[0-9]+}} +} + +define <8 x i16> @v8i16m(i16 %t, i16 %s) nounwind { + %v = insertelement <8 x i16> , i16 %s, i32 7 + ret <8 x i16> %v + +; CHECK-LABEL: v8i16m +; CHECK: movi v[[R:[0-9]+]].2d, #0xffffffffffffffff +; CHECK: mov v[[R]].h[7], w{{[0-9]+}} +} + +define <2 x i32> @v2i32m(i32 %t, i32 %s) nounwind { + %v = insertelement <2 x i32> , i32 %s, i32 1 + ret <2 x i32> %v + +; CHECK-LABEL: v2i32m +; CHECK: movi d{{[0-9]+}}, #0xffffffffffffffff +; CHECK: mov v[[R]].s[1], w{{[0-9]+}} +} + +define <4 x i32> @v4i32m(i32 %t, i32 %s) nounwind { + %v = insertelement <4 x i32> , i32 %s, i32 3 + ret <4 x i32> %v + +; CHECK-LABEL: v4i32m +; CHECK: movi v[[R:[0-9]+]].2d, #0xffffffffffffffff +; CHECK: mov v[[R]].s[3], w{{[0-9]+}} +} + +define <2 x i64> @v2i64m(i64 %t, i64 %s) nounwind { + %v = insertelement <2 x i64> , i64 %s, i32 1 + ret <2 x i64> %v + +; CHECK-LABEL: v2i64m +; CHECK: movi v[[R:[0-9]+]].2d, #0xffffffffffffffff +; CHECK: mov v[[R]].d[1], x{{[0-9]+}} +} + +; Check that building up a vector w/ some constants initializes efficiently. + +define void @v8i8st(<8 x i8>* %p, i8 %s) nounwind { + %v = insertelement <8 x i8> , i8 %s, i32 7 + store <8 x i8> %v, <8 x i8>* %p, align 8 ret void ; CHECK-LABEL: v8i8st: -; CHECK: movi v[[R:[0-9]+]].8b, #64 +; CHECK: movi v[[R:[0-9]+]].8b, #1 +; CHECK: mov v[[R]].b[7], w{{[0-9]+}} +; CHECK: str d[[R]], [x{{[0-9]+}}] } -define void @v16i8st(<16 x i8>* %p, <16 x i8> %s) nounwind { - store <16 x i8> , <16 x i8>* %p, align 16 +define void @v16i8st(<16 x i8>* %p, i8 %s) nounwind { + %v = insertelement <16 x i8> , i8 %s, i32 15 + store <16 x i8> %v, <16 x i8>* %p, align 16 ret void ; CHECK-LABEL: v16i8st: -; CHECK: movi v[[R:[0-9]+]].16b, #64 +; CHECK: movi v[[R:[0-9]+]].16b, #128 +; CHECK: mov v[[R]].b[15], w{{[0-9]+}} +; CHECK: str q[[R]], [x{{[0-9]+}}] } -define void @v4i16st(<4 x i16>* %p, <4 x i16> %s) nounwind { - store <4 x i16> , <4 x i16>* %p, align 8 +define void @v4i16st(<4 x i16>* %p, i16 %s) nounwind { + %v = insertelement <4 x i16> , i16 %s, i32 3 + store <4 x i16> %v, <4 x i16>* %p, align 8 ret void ; CHECK-LABEL: v4i16st: -; CHECK: movi v[[R:[0-9]+]].4h, #64, lsl #8 +; CHECK: movi v[[R:[0-9]+]].4h, #85, lsl #8 +; CHECK: mov v[[R]].h[3], w{{[0-9]+}} +; CHECK: str d[[R]], [x{{[0-9]+}}] } -define void @v8i16st(<8 x i16>* %p, <8 x i16> %s) nounwind { - store <8 x i16> , <8 x i16>* %p, align 16 +define void @v8i16st(<8 x i16>* %p, i16 %s) nounwind { + %v = insertelement <8 x i16> , i16 %s, i32 7 + store <8 x i16> %v, <8 x i16>* %p, align 16 ret void ; CHECK-LABEL: v8i16st: -; CHECK: movi v[[R:[0-9]+]].8h, #64, lsl #8 +; CHECK: mvni v[[R:[0-9]+]].8h, #85, lsl #8 +; CHECK: mov v[[R]].h[7], w{{[0-9]+}} +; CHECK: str q[[R]], [x{{[0-9]+}}] } -define void @v2i32st(<2 x i32>* %p, <2 x i32> %s) nounwind { - store <2 x i32> , <2 x i32>* %p, align 8 +define void @v2i32st(<2 x i32>* %p, i32 %s) nounwind { + %v = insertelement <2 x i32> , i32 %s, i32 1 + store <2 x i32> %v, <2 x i32>* %p, align 8 ret void ; CHECK-LABEL: v2i32st: -; CHECK: movi v[[R:[0-9]+]].2s, #64, lsl #24 +; CHECK: movi v[[R:[0-9]+]].2s, #15, lsl #16 +; CHECK: mov v[[R]].s[1], w{{[0-9]+}} +; CHECK: str d[[R]], [x{{[0-9]+}}] } -define void @v4i32st(<4 x i32>* %p, <4 x i32> %s) nounwind { - store <4 x i32> , <4 x i32>* %p, align 16 +define void @v4i32st(<4 x i32>* %p, i32 %s) nounwind { + %v = insertelement <4 x i32> , i32 %s, i32 3 + store <4 x i32> %v, <4 x i32>* %p, align 16 ret void ; CHECK-LABEL: v4i32st: -; CHECK: movi v[[R:[0-9]+]].4s, #64, lsl #24 +; CHECK: movi v[[R:[0-9]+]].4s, #248, msl #16 +; CHECK: mov v[[R]].s[3], w{{[0-9]+}} +; CHECK: str q[[R]], [x{{[0-9]+}}] } -define void @v2i64st(<2 x i64>* %p, <2 x i64> %s) nounwind { - store <2 x i64> , <2 x i64>* %p, align 16 +define void @v2i64st(<2 x i64>* %p, i64 %s) nounwind { + %v = insertelement <2 x i64> , i64 %s, i32 1 + store <2 x i64> %v, <2 x i64>* %p, align 16 ret void -; CHECK-LABEL: v2i64st -; CHECK: fmov v[[R:[0-9]+]].2d, #2.0 +; CHECK-LABEL: v2i64st: +; CHECK: fmov v[[R:[0-9]+]].2d, #-2.0 +; CHECK: mov v[[R]].d[1], x{{[0-9]+}} +; CHECK: str q[[R]], [x{{[0-9]+}}] } -define void @v2f32st(<2 x float>* %p, <2 x float> %s) nounwind { - store <2 x float> , <2 x float>* %p, align 8 +define void @v2f32st(<2 x float>* %p, float %s) nounwind { + %v = insertelement <2 x float> , float %s, i32 1 + store <2 x float> %v, <2 x float>* %p, align 8 ret void -; CHECK-LABEL: v2f32st +; CHECK-LABEL: v2f32st: ; CHECK: movi v[[R:[0-9]+]].2s, #64, lsl #24 +; CHECK: mov v[[R]].s[1], v{{[0-9]+}}.s[0] +; CHECK: str d[[R]], [x{{[0-9]+}}] } -define void @v4f32st(<4 x float>* %p, <4 x float> %s) nounwind { - store <4 x float> , <4 x float>* %p, align 16 +define void @v4f32st(<4 x float>* %p, float %s) nounwind { + %v = insertelement <4 x float> , float %s, i32 3 + store <4 x float> %v, <4 x float>* %p, align 16 ret void ; CHECK-LABEL: v4f32st: -; CHECK: movi v[[R:[0-9]+]].4s, #64, lsl #24 +; CHECK: movi v[[R:[0-9]+]].4s, #192, lsl #24 +; CHECK: mov v[[R]].s[3], v{{[0-9]+}}.s[0] +; CHECK: str q[[R]], [x{{[0-9]+}}] } -define void @v2f64st(<2 x double>* %p, <2 x double> %s) nounwind { - store <2 x double> , <2 x double>* %p, align 16 +define void @v2f64st(<2 x double>* %p, double %s) nounwind { + %v = insertelement <2 x double> , double %s, i32 1 + store <2 x double> %v, <2 x double>* %p, align 16 ret void ; CHECK-LABEL: v2f64st: ; CHECK: fmov v[[R:[0-9]+]].2d, #2.0 +; CHECK: mov v[[R]].d[1], v{{[0-9]+}}.d[0] +; CHECK: str q[[R]], [x{{[0-9]+}}] } Index: llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll =================================================================== --- llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll +++ llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll @@ -537,8 +537,8 @@ define <1 x i64> @bsl1xi64_const(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: bsl1xi64_const: ; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b - %tmp1 = and <1 x i64> %a, < i64 -16 > - %tmp2 = and <1 x i64> %b, < i64 15 > + %tmp1 = and <1 x i64> %a, < i64 -256 > + %tmp2 = and <1 x i64> %b, < i64 255 > %tmp3 = or <1 x i64> %tmp1, %tmp2 ret <1 x i64> %tmp3 }