diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -12440,12 +12440,16 @@ // for each lane. if (!PreferDUPAndInsert && NumConstantLanes > 0 && usesOnlyOneConstantValue) { // Firstly, try to materialize the splat constant. - SDValue Vec = DAG.getSplatBuildVector(VT, dl, ConstantValue), - Val = ConstantBuildVector(Vec, DAG); - if (!Val) { - // Otherwise, materialize the constant and splat it. - Val = DAG.getNode(AArch64ISD::DUP, dl, VT, ConstantValue); - DAG.ReplaceAllUsesWith(Vec.getNode(), &Val); + SDValue Val = DAG.getSplatBuildVector(VT, dl, ConstantValue); + unsigned BitSize = VT.getScalarSizeInBits(); + APInt ConstantValueAPInt(1, 0); + if (auto *C = dyn_cast(ConstantValue)) + ConstantValueAPInt = C->getAPIntValue().zextOrTrunc(BitSize); + if (!isNullConstant(ConstantValue) && !ConstantValueAPInt.isAllOnes()) { + Val = ConstantBuildVector(Val, DAG); + if (!Val) + // Otherwise, materialize the constant and splat it. + Val = DAG.getNode(AArch64ISD::DUP, dl, VT, ConstantValue); } // Now insert the non-constant lanes. diff --git a/llvm/test/CodeGen/AArch64/build-one-lane.ll b/llvm/test/CodeGen/AArch64/build-one-lane.ll --- a/llvm/test/CodeGen/AArch64/build-one-lane.ll +++ b/llvm/test/CodeGen/AArch64/build-one-lane.ll @@ -7,7 +7,7 @@ define <8 x i8> @v8i8z(i8 %t, i8 %s) nounwind { ; CHECK-LABEL: v8i8z: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d0, #0000000000000000 +; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov v0.b[7], w1 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret @@ -28,7 +28,7 @@ define <4 x i16> @v4i16z(i16 %t, i16 %s) nounwind { ; CHECK-LABEL: v4i16z: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d0, #0000000000000000 +; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov v0.h[3], w1 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret @@ -49,7 +49,7 @@ define <2 x i32> @v2i32z(i32 %t, i32 %s) nounwind { ; CHECK-LABEL: v2i32z: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d0, #0000000000000000 +; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov v0.s[1], w1 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret @@ -117,7 +117,7 @@ define <8 x i8> @v8i8m(i8 %t, i8 %s) nounwind { ; CHECK-LABEL: v8i8m: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d0, #0xffffffffffffffff +; CHECK-NEXT: movi v0.2d, #0xffffffffffffffff ; CHECK-NEXT: mov v0.b[7], w1 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret @@ -138,7 +138,7 @@ define <4 x i16> @v4i16m(i16 %t, i16 %s) nounwind { ; CHECK-LABEL: v4i16m: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d0, #0xffffffffffffffff +; CHECK-NEXT: movi v0.2d, #0xffffffffffffffff ; CHECK-NEXT: mov v0.h[3], w1 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret @@ -159,7 +159,7 @@ define <2 x i32> @v2i32m(i32 %t, i32 %s) nounwind { ; CHECK-LABEL: v2i32m: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d0, #0xffffffffffffffff +; CHECK-NEXT: movi v0.2d, #0xffffffffffffffff ; CHECK-NEXT: mov v0.s[1], w1 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll b/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll --- a/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll +++ b/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll @@ -197,37 +197,37 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: smov w8, v0.h[1] -; CHECK-NEXT: smov w9, v0.h[2] -; CHECK-NEXT: mov w10, #30865 +; CHECK-NEXT: mov w9, #30865 +; CHECK-NEXT: movk w9, #51306, lsl #16 +; CHECK-NEXT: smov w10, v0.h[2] ; CHECK-NEXT: mov w11, #17097 -; CHECK-NEXT: movk w10, #51306, lsl #16 -; CHECK-NEXT: movk w11, #45590, lsl #16 ; CHECK-NEXT: mov w12, #654 -; CHECK-NEXT: smull x10, w8, w10 -; CHECK-NEXT: smull x11, w9, w11 -; CHECK-NEXT: lsr x10, x10, #32 +; CHECK-NEXT: movk w11, #45590, lsl #16 +; CHECK-NEXT: smull x9, w8, w9 +; CHECK-NEXT: smull x11, w10, w11 +; CHECK-NEXT: lsr x9, x9, #32 +; CHECK-NEXT: add w9, w9, w8 ; CHECK-NEXT: lsr x11, x11, #32 -; CHECK-NEXT: add w10, w10, w8 -; CHECK-NEXT: add w11, w11, w9 -; CHECK-NEXT: asr w13, w10, #9 -; CHECK-NEXT: add w10, w13, w10, lsr #31 -; CHECK-NEXT: asr w13, w11, #4 -; CHECK-NEXT: add w11, w13, w11, lsr #31 -; CHECK-NEXT: smov w13, v0.h[3] -; CHECK-NEXT: msub w8, w10, w12, w8 -; CHECK-NEXT: movi d0, #0000000000000000 -; CHECK-NEXT: mov w12, #47143 -; CHECK-NEXT: mov w10, #23 -; CHECK-NEXT: movk w12, #24749, lsl #16 -; CHECK-NEXT: msub w9, w11, w10, w9 -; CHECK-NEXT: smull x10, w13, w12 +; CHECK-NEXT: asr w13, w9, #9 +; CHECK-NEXT: add w11, w11, w10 +; CHECK-NEXT: add w9, w13, w9, lsr #31 +; CHECK-NEXT: mov w13, #23 +; CHECK-NEXT: msub w8, w9, w12, w8 +; CHECK-NEXT: asr w9, w11, #4 +; CHECK-NEXT: smov w12, v0.h[3] +; CHECK-NEXT: add w9, w9, w11, lsr #31 +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov w11, #47143 +; CHECK-NEXT: movk w11, #24749, lsl #16 +; CHECK-NEXT: msub w9, w9, w13, w10 +; CHECK-NEXT: smull x10, w12, w11 ; CHECK-NEXT: mov v0.h[1], w8 ; CHECK-NEXT: lsr x8, x10, #63 ; CHECK-NEXT: asr x10, x10, #43 ; CHECK-NEXT: add w8, w10, w8 ; CHECK-NEXT: mov w10, #5423 ; CHECK-NEXT: mov v0.h[2], w9 -; CHECK-NEXT: msub w8, w8, w10, w13 +; CHECK-NEXT: msub w8, w8, w10, w12 ; CHECK-NEXT: mov v0.h[3], w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret @@ -240,25 +240,25 @@ ; CHECK-LABEL: dont_fold_srem_i16_smax: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: smov w8, v0.h[2] -; CHECK-NEXT: mov w9, #17097 -; CHECK-NEXT: movk w9, #45590, lsl #16 +; CHECK-NEXT: smov w9, v0.h[2] +; CHECK-NEXT: mov w8, #17097 +; CHECK-NEXT: movk w8, #45590, lsl #16 ; CHECK-NEXT: smov w10, v0.h[1] ; CHECK-NEXT: smov w12, v0.h[3] -; CHECK-NEXT: movi d1, #0000000000000000 ; CHECK-NEXT: mov w11, #23 -; CHECK-NEXT: smull x9, w8, w9 -; CHECK-NEXT: lsr x9, x9, #32 -; CHECK-NEXT: add w9, w9, w8 -; CHECK-NEXT: asr w13, w9, #4 -; CHECK-NEXT: add w9, w13, w9, lsr #31 +; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: smull x8, w9, w8 +; CHECK-NEXT: lsr x8, x8, #32 +; CHECK-NEXT: add w8, w8, w9 +; CHECK-NEXT: asr w13, w8, #4 +; CHECK-NEXT: add w8, w13, w8, lsr #31 ; CHECK-NEXT: negs w13, w10 ; CHECK-NEXT: and w10, w10, #0x7fff ; CHECK-NEXT: and w13, w13, #0x7fff ; CHECK-NEXT: csneg w10, w10, w13, mi ; CHECK-NEXT: mov w13, #47143 ; CHECK-NEXT: movk w13, #24749, lsl #16 -; CHECK-NEXT: msub w8, w9, w11, w8 +; CHECK-NEXT: msub w8, w8, w11, w9 ; CHECK-NEXT: smull x9, w12, w13 ; CHECK-NEXT: mov v1.h[1], w10 ; CHECK-NEXT: lsr x10, x9, #63 diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll @@ -572,8 +572,8 @@ ; CHECK-LABEL: masked_gather_v2f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr s1, [x0] -; CHECK-NEXT: movi d0, #0000000000000000 ; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: fcmeq v1.4h, v1.4h, #0.0 ; CHECK-NEXT: sshll v1.4s, v1.4h, #0 ; CHECK-NEXT: fmov w8, s1 diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll @@ -13,9 +13,9 @@ ; CHECK-LABEL: masked_load_v2f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr s1, [x0] -; CHECK-NEXT: movi d0, #0000000000000000 -; CHECK-NEXT: ldr s2, [x1] ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: ldr s2, [x1] +; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: fcmeq v1.4h, v1.4h, v2.4h ; CHECK-NEXT: sshll v1.4s, v1.4h, #0 ; CHECK-NEXT: fmov w8, s1 diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll @@ -534,8 +534,8 @@ ; CHECK-LABEL: masked_scatter_v2f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr s1, [x0] -; CHECK-NEXT: movi d0, #0000000000000000 ; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: fcmeq v2.4h, v1.4h, #0.0 ; CHECK-NEXT: uunpklo z1.s, z1.h ; CHECK-NEXT: sshll v2.4s, v2.4h, #0 diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll @@ -13,9 +13,9 @@ ; CHECK-LABEL: masked_store_v2f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr s1, [x0] -; CHECK-NEXT: movi d0, #0000000000000000 -; CHECK-NEXT: ldr s2, [x1] ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: ldr s2, [x1] +; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: fcmeq v2.4h, v1.4h, v2.4h ; CHECK-NEXT: sshll v2.4s, v2.4h, #0 ; CHECK-NEXT: fmov w8, s2 diff --git a/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll b/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll --- a/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll +++ b/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll @@ -158,12 +158,12 @@ ; CHECK-NEXT: mov w11, #25645 ; CHECK-NEXT: mov w12, #654 ; CHECK-NEXT: movk w11, #2849, lsl #16 -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: umull x9, w8, w9 ; CHECK-NEXT: mov w13, #5560 -; CHECK-NEXT: umull x11, w10, w11 +; CHECK-NEXT: umull x9, w8, w9 ; CHECK-NEXT: movk w13, #12, lsl #16 +; CHECK-NEXT: umull x11, w10, w11 ; CHECK-NEXT: lsr x9, x9, #32 +; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: lsr x11, x11, #32 ; CHECK-NEXT: msub w8, w9, w12, w8 ; CHECK-NEXT: umov w9, v0.h[3] diff --git a/llvm/test/CodeGen/AArch64/vecreduce-add-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-add-legalization.ll --- a/llvm/test/CodeGen/AArch64/vecreduce-add-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-add-legalization.ll @@ -85,7 +85,7 @@ define i8 @test_v3i8(<3 x i8> %a) nounwind { ; CHECK-LABEL: test_v3i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d0, #0000000000000000 +; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov v0.h[0], w0 ; CHECK-NEXT: mov v0.h[1], w1 ; CHECK-NEXT: mov v0.h[2], w2 diff --git a/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll --- a/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll @@ -99,7 +99,7 @@ define i8 @test_v3i8(<3 x i8> %a) nounwind { ; CHECK-LABEL: test_v3i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d0, #0000000000000000 +; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov v0.h[0], w0 ; CHECK-NEXT: mov v0.h[1], w1 ; CHECK-NEXT: mov v0.h[2], w2