Index: llvm/lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -4638,8 +4638,10 @@ } // Use the more efficient MOVI instead of DUP from ZR to zero up vectors -def : Pat<(v2f32 (AArch64dup (f32 fpimm0))), (MOVIv2i32 (i32 0), (i32 0))>; +def : Pat<(v1f64 (AArch64dup (f64 fpimm0))), (MOVID (i32 0))>; +def : Pat<(v2f32 (AArch64dup (f32 fpimm0))), (MOVIv2i32 (i32 0), (i32 0))>; +def : Pat<(v1i64 (AArch64dup (i32 0))), (MOVID (i32 0))>; def : Pat<(v2i32 (AArch64dup (i32 0))), (MOVIv2i32 (i32 0), (i32 0))>; def : Pat<(v4i16 (AArch64dup (i32 0))), (MOVIv4i16 (i32 0), (i32 0))>; def : Pat<(v8i8 (AArch64dup (i32 0))), (MOVIv8b_ns (i32 0))>; @@ -4652,6 +4654,17 @@ def : Pat<(v8i16 (AArch64dup (i32 0))), (MOVIv8i16 (i32 0), (i32 0))>; def : Pat<(v16i8 (AArch64dup (i32 0))), (MOVIv16b_ns (i32 0))>; +// Use the more efficient MOVI instead of DUP from register to set vector masks +def : Pat<(v1i64 (AArch64dup (i64 -1))), (MOVID (i32 -1))>; +def : Pat<(v2i32 (AArch64dup (i32 -1))), (MOVIv2i32 (i32 -1), (i32 0))>; +def : Pat<(v4i16 (AArch64dup (i32 0xffff))), (MOVIv4i16 (i32 -1), (i32 0))>; +def : Pat<(v8i8 (AArch64dup (i32 0xff))), (MOVIv8b_ns (i32 -1))>; + +def : Pat<(v2i64 (AArch64dup (i64 -1))), (MOVIv2d_ns (i32 -1))>; +def : Pat<(v4i32 (AArch64dup (i32 -1))), (MOVIv4i32 (i32 -1), (i32 0))>; +def : Pat<(v8i16 (AArch64dup (i32 0xffff))), (MOVIv8i16 (i32 -1), (i32 0))>; +def : Pat<(v16i8 (AArch64dup (i32 0xff))), (MOVIv16b_ns (i32 -1))>; + // AdvSIMD MVNI // EDIT per word & halfword: 2s, 4h, 4s, & 8h Index: llvm/test/CodeGen/AArch64/build-one-lane.ll =================================================================== --- llvm/test/CodeGen/AArch64/build-one-lane.ll +++ llvm/test/CodeGen/AArch64/build-one-lane.ll @@ -3,7 +3,7 @@ ; Check that building up a vector w/ only one non-zero lane initializes ; intelligently. -define <8 x i8> @v8i8(i8 %t, i8 %s) nounwind { +define <8 x i8> @v8i8z(i8 %t, i8 %s) nounwind { %v = insertelement <8 x i8> , i8 %s, i32 7 ret <8 x i8> %v @@ -11,7 +11,7 @@ ; CHECK: mov v[[R]].b[7], w{{[0-9]+}} } -define <16 x i8> @v16i8(i8 %t, i8 %s) nounwind { +define <16 x i8> @v16i8z(i8 %t, i8 %s) nounwind { %v = insertelement <16 x i8> , i8 %s, i32 15 ret <16 x i8> %v @@ -19,7 +19,7 @@ ; CHECK: mov v[[R]].b[15], w{{[0-9]+}} } -define <4 x i16> @v4i16(i16 %t, i16 %s) nounwind { +define <4 x i16> @v4i16z(i16 %t, i16 %s) nounwind { %v = insertelement <4 x i16> , i16 %s, i32 3 ret <4 x i16> %v @@ -27,7 +27,7 @@ ; CHECK: mov v[[R]].h[3], w{{[0-9]+}} } -define <8 x i16> @v8i16(i16 %t, i16 %s) nounwind { +define <8 x i16> @v8i16z(i16 %t, i16 %s) nounwind { %v = insertelement <8 x i16> , i16 %s, i32 7 ret <8 x i16> %v @@ -35,7 +35,7 @@ ; CHECK: mov v[[R]].h[7], w{{[0-9]+}} } -define <2 x i32> @v2i32(i32 %t, i32 %s) nounwind { +define <2 x i32> @v2i32z(i32 %t, i32 %s) nounwind { %v = insertelement <2 x i32> , i32 %s, i32 1 ret <2 x i32> %v @@ -43,7 +43,7 @@ ; CHECK: mov v[[R]].s[1], w{{[0-9]+}} } -define <4 x i32> @v4i32(i32 %t, i32 %s) nounwind { +define <4 x i32> @v4i32z(i32 %t, i32 %s) nounwind { %v = insertelement <4 x i32> , i32 %s, i32 3 ret <4 x i32> %v @@ -51,7 +51,7 @@ ; CHECK: mov v[[R]].s[3], w{{[0-9]+}} } -define <2 x i64> @v2i64(i64 %t, i64 %s) nounwind { +define <2 x i64> @v2i64z(i64 %t, i64 %s) nounwind { %v = insertelement <2 x i64> , i64 %s, i32 1 ret <2 x i64> %v @@ -59,7 +59,7 @@ ; CHECK: mov v[[R]].d[1], x{{[0-9]+}} } -define <2 x float> @v2f32(float %t, float %s) nounwind { +define <2 x float> @v2f32z(float %t, float %s) nounwind { %v = insertelement <2 x float> , float %s, i32 1 ret <2 x float> %v @@ -67,7 +67,7 @@ ; CHECK: mov v[[R]].s[1], v{{[0-9]+}}.s[0] } -define <4 x float> @v4f32(float %t, float %s) nounwind { +define <4 x float> @v4f32z(float %t, float %s) nounwind { %v = insertelement <4 x float> , float %s, i32 3 ret <4 x float> %v @@ -75,10 +75,83 @@ ; CHECK: mov v[[R]].s[3], v{{[0-9]+}}.s[0] } -define <2 x double> @v2f64(double %t, double %s) nounwind { +define <1 x double> @v1f64z(double %t, double %s) nounwind { + %v = insertelement <1 x double> , double 0.0, i32 0 + ret <1 x double> %v + +; CHECK: movi d{{[0-9]+}}, #0 +} + +define <2 x double> @v2f64z(double %t, double %s) nounwind { %v = insertelement <2 x double> , double %s, i32 1 ret <2 x double> %v ; CHECK: movi v[[R:[0-9]+]].2d, #0 ; CHECK: mov v[[R]].d[1], v{{[0-9]+}}.d[0] } + +; Check that building up a vector w/ only one non-zero lane initializes +; intelligently. + +define <8 x i8> @v8i8m(i8 %t, i8 %s) nounwind { + %v = insertelement <8 x i8> , i8 %s, i32 7 + ret <8 x i8> %v + +; CHECK: movi v[[R:[0-9]+]].8b, #-1 +; CHECK: mov v[[R]].b[7], w{{[0-9]+}} +} + +define <16 x i8> @v16i8m(i8 %t, i8 %s) nounwind { + %v = insertelement <16 x i8> , i8 %s, i32 15 + ret <16 x i8> %v + +; CHECK: movi v[[R:[0-9]+]].16b, #-1 +; CHECK: mov v[[R]].b[15], w{{[0-9]+}} +} + +define <4 x i16> @v4i16m(i16 %t, i16 %s) nounwind { + %v = insertelement <4 x i16> , i16 %s, i32 3 + ret <4 x i16> %v + +; CHECK: movi v[[R:[0-9]+]].4h, #-1 +; CHECK: mov v[[R]].h[3], w{{[0-9]+}} +} + +define <8 x i16> @v8i16m(i16 %t, i16 %s) nounwind { + %v = insertelement <8 x i16> , i16 %s, i32 7 + ret <8 x i16> %v + +; CHECK: movi v[[R:[0-9]+]].8h, #-1 +; CHECK: mov v[[R]].h[7], w{{[0-9]+}} +} + +define <2 x i32> @v2i32m(i32 %t, i32 %s) nounwind { + %v = insertelement <2 x i32> , i32 %s, i32 1 + ret <2 x i32> %v + +; CHECK: movi v[[R:[0-9]+]].2s, #-1 +; CHECK: mov v[[R]].s[1], w{{[0-9]+}} +} + +define <4 x i32> @v4i32m(i32 %t, i32 %s) nounwind { + %v = insertelement <4 x i32> , i32 %s, i32 3 + ret <4 x i32> %v + +; CHECK: movi v[[R:[0-9]+]].4s, #-1 +; CHECK: mov v[[R]].s[3], w{{[0-9]+}} +} + +define <1 x i64> @v1i64m(i64 %t, i64 %s) nounwind { + %v = insertelement <1 x i64> , i64 -1, i32 0 + ret <1 x i64> %v + +; CHECK: movi d{{[0-9]+}}, #0xffffffffffffffff +} + +define <2 x i64> @v2i64m(i64 %t, i64 %s) nounwind { + %v = insertelement <2 x i64> , i64 %s, i32 1 + ret <2 x i64> %v + +; CHECK: movi v[[R:[0-9]+]].2d, #0xffffffffffffffff +; CHECK: mov v[[R]].d[1], x{{[0-9]+}} +}