diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -1801,8 +1801,13 @@ def : InstAlias<"mov $Zd, $imm", (!cast(NAME) ZPR64:$Zd, sve_preferred_logical_imm64:$imm), 5>; - def : Pat<(nxv2i64 (splat_vector (i64 logical_imm64:$imm))), - (!cast(NAME) logical_imm64:$imm)>; + // NOTE: No pattern for nxv16i8 because DUP has full coverage. + def : Pat<(nxv8i16 (splat_vector (i32 (SVELogicalImm16Pat i64:$imm)))), + (!cast(NAME) i64:$imm)>; + def : Pat<(nxv4i32 (splat_vector (i32 (SVELogicalImm32Pat i64:$imm)))), + (!cast(NAME) i64:$imm)>; + def : Pat<(nxv2i64 (splat_vector (i64 (SVELogicalImm64Pat i64:$imm)))), + (!cast(NAME) i64:$imm)>; } //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AArch64/DAGCombine_vscale.ll b/llvm/test/CodeGen/AArch64/DAGCombine_vscale.ll --- a/llvm/test/CodeGen/AArch64/DAGCombine_vscale.ll +++ b/llvm/test/CodeGen/AArch64/DAGCombine_vscale.ll @@ -50,9 +50,8 @@ define @ashr_add_shl_nxv4i8( %a) { ; CHECK-LABEL: ashr_add_shl_nxv4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #16777216 +; CHECK-NEXT: mov z1.s, #0x1000000 ; CHECK-NEXT: lsl z0.s, z0.s, #24 -; CHECK-NEXT: mov z1.s, w8 ; CHECK-NEXT: add z0.s, z0.s, z1.s ; CHECK-NEXT: asr z0.s, z0.s, #24 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll b/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll --- a/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll +++ b/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll @@ -40,22 +40,20 @@ ; CHECK-LABEL: test_signed_v4f32_v4i32: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #-822083584 -; CHECK-NEXT: mov w9, #-2147483648 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov z2.s, #0x80000000 ; CHECK-NEXT: mov z1.s, w8 ; CHECK-NEXT: mov w8, #1325400063 -; CHECK-NEXT: mov z2.s, w9 -; CHECK-NEXT: mov w9, #2147483647 ; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, z1.s ; CHECK-NEXT: movprfx z1, z0 ; CHECK-NEXT: fcvtzs z1.s, p0/m, z0.s ; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: mov z3.s, w8 ; CHECK-NEXT: mov z1.s, p1/m, z2.s -; CHECK-NEXT: fcmgt p1.s, p0/z, z0.s, z3.s -; CHECK-NEXT: mov z2.s, w9 -; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s +; CHECK-NEXT: mov z2.s, w8 +; CHECK-NEXT: fcmgt p1.s, p0/z, z0.s, z2.s +; CHECK-NEXT: mov z2.s, #0x7fffffff ; CHECK-NEXT: mov z1.s, p1/m, z2.s +; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s ; CHECK-NEXT: mov z1.s, p0/m, #0 // =0x0 ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret @@ -67,28 +65,26 @@ ; CHECK-LABEL: test_signed_v8f32_v8i32: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #-822083584 -; CHECK-NEXT: mov w9, #-2147483648 -; CHECK-NEXT: mov w10, #1325400063 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: movprfx z5, z0 -; CHECK-NEXT: fcvtzs z5.s, p0/m, z0.s +; CHECK-NEXT: mov w9, #1325400063 +; CHECK-NEXT: mov z3.s, #0x80000000 +; CHECK-NEXT: movprfx z4, z0 +; CHECK-NEXT: fcvtzs z4.s, p0/m, z0.s +; CHECK-NEXT: mov z6.s, #0x7fffffff ; CHECK-NEXT: mov z2.s, w8 -; CHECK-NEXT: mov w8, #2147483647 ; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, z2.s -; CHECK-NEXT: fcmge p2.s, p0/z, z1.s, z2.s -; CHECK-NEXT: mov z3.s, w9 -; CHECK-NEXT: mov z4.s, w10 +; CHECK-NEXT: mov z5.s, w9 +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: fcmgt p2.s, p0/z, z0.s, z5.s +; CHECK-NEXT: mov z4.s, p1/m, z3.s +; CHECK-NEXT: fcmge p1.s, p0/z, z1.s, z2.s ; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: mov z5.s, p1/m, z3.s -; CHECK-NEXT: fcmgt p1.s, p0/z, z0.s, z4.s -; CHECK-NEXT: mov z6.s, w8 ; CHECK-NEXT: movprfx z2, z1 ; CHECK-NEXT: fcvtzs z2.s, p0/m, z1.s -; CHECK-NEXT: sel z3.s, p2, z3.s, z2.s -; CHECK-NEXT: fcmgt p2.s, p0/z, z1.s, z4.s -; CHECK-NEXT: sel z2.s, p1, z6.s, z5.s -; CHECK-NEXT: mov z3.s, p2/m, z6.s +; CHECK-NEXT: sel z3.s, p1, z3.s, z2.s +; CHECK-NEXT: fcmgt p1.s, p0/z, z1.s, z5.s +; CHECK-NEXT: sel z2.s, p2, z6.s, z4.s +; CHECK-NEXT: mov z3.s, p1/m, z6.s ; CHECK-NEXT: fcmuo p1.s, p0/z, z0.s, z0.s ; CHECK-NEXT: fcmuo p0.s, p0/z, z1.s, z1.s ; CHECK-NEXT: mov z2.s, p1/m, #0 // =0x0 @@ -104,21 +100,20 @@ ; CHECK-LABEL: test_signed_v4f32_v4i16: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #-956301312 -; CHECK-NEXT: mov w9, #65024 -; CHECK-NEXT: movk w9, #18175, lsl #16 ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: mov z1.s, w8 -; CHECK-NEXT: mov w8, #32767 +; CHECK-NEXT: mov w8, #65024 +; CHECK-NEXT: movk w8, #18175, lsl #16 ; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, z1.s ; CHECK-NEXT: movprfx z1, z0 ; CHECK-NEXT: fcvtzs z1.s, p0/m, z0.s ; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: mov z2.s, w9 ; CHECK-NEXT: mov z1.s, p1/m, #-32768 // =0xffffffffffff8000 -; CHECK-NEXT: fcmgt p1.s, p0/z, z0.s, z2.s ; CHECK-NEXT: mov z2.s, w8 -; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s +; CHECK-NEXT: fcmgt p1.s, p0/z, z0.s, z2.s +; CHECK-NEXT: mov z2.s, #32767 // =0x7fff ; CHECK-NEXT: mov z1.s, p1/m, z2.s +; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s ; CHECK-NEXT: mov z1.s, p0/m, #0 // =0x0 ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret @@ -135,27 +130,26 @@ ; CHECK-NEXT: movk w9, #18175, lsl #16 ; CHECK-NEXT: movprfx z4, z1 ; CHECK-NEXT: fcvtzs z4.s, p0/m, z1.s +; CHECK-NEXT: mov z3.s, #32767 // =0x7fff ; CHECK-NEXT: mov z2.s, w8 -; CHECK-NEXT: mov w8, #32767 ; CHECK-NEXT: fcmge p1.s, p0/z, z1.s, z2.s -; CHECK-NEXT: mov z3.s, w9 +; CHECK-NEXT: mov z5.s, w9 ; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: fcmgt p2.s, p0/z, z1.s, z3.s +; CHECK-NEXT: fcmgt p2.s, p0/z, z1.s, z5.s ; CHECK-NEXT: mov z4.s, p1/m, #-32768 // =0xffffffffffff8000 ; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, z2.s ; CHECK-NEXT: movprfx z2, z0 ; CHECK-NEXT: fcvtzs z2.s, p0/m, z0.s ; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: mov z5.s, w8 ; CHECK-NEXT: mov z2.s, p1/m, #-32768 // =0xffffffffffff8000 -; CHECK-NEXT: fcmgt p1.s, p0/z, z0.s, z3.s -; CHECK-NEXT: sel z3.s, p2, z5.s, z4.s -; CHECK-NEXT: mov z2.s, p1/m, z5.s +; CHECK-NEXT: fcmgt p1.s, p0/z, z0.s, z5.s +; CHECK-NEXT: mov z4.s, p2/m, z3.s +; CHECK-NEXT: mov z2.s, p1/m, z3.s ; CHECK-NEXT: fcmuo p1.s, p0/z, z1.s, z1.s ; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s -; CHECK-NEXT: mov z3.s, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z4.s, p1/m, #0 // =0x0 ; CHECK-NEXT: mov z2.s, p0/m, #0 // =0x0 -; CHECK-NEXT: uzp1 z0.h, z2.h, z3.h +; CHECK-NEXT: uzp1 z0.h, z2.h, z4.h ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv8f32.nxv8i16( %f) ret %x @@ -534,22 +528,20 @@ ; CHECK-LABEL: test_signed_v4f16_v4i32: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #64511 -; CHECK-NEXT: mov w9, #-2147483648 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov z2.s, #0x80000000 ; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: mov w8, #31743 -; CHECK-NEXT: mov z2.s, w9 -; CHECK-NEXT: mov w9, #2147483647 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h ; CHECK-NEXT: movprfx z1, z0 ; CHECK-NEXT: fcvtzs z1.s, p0/m, z0.h ; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: mov z3.h, w8 ; CHECK-NEXT: mov z1.s, p1/m, z2.s -; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z3.h -; CHECK-NEXT: mov z2.s, w9 -; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h +; CHECK-NEXT: mov z2.h, w8 +; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z2.h +; CHECK-NEXT: mov z2.s, #0x7fffffff ; CHECK-NEXT: mov z1.s, p1/m, z2.s +; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h ; CHECK-NEXT: mov z1.s, p0/m, #0 // =0x0 ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret @@ -561,32 +553,30 @@ ; CHECK-LABEL: test_signed_v8f16_v8i32: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #64511 -; CHECK-NEXT: mov w9, #-2147483648 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: uunpklo z2.s, z0.h -; CHECK-NEXT: uunpkhi z6.s, z0.h -; CHECK-NEXT: movprfx z4, z2 -; CHECK-NEXT: fcvtzs z4.s, p0/m, z2.h +; CHECK-NEXT: uunpklo z3.s, z0.h +; CHECK-NEXT: mov w9, #31743 +; CHECK-NEXT: mov z2.s, #0x80000000 +; CHECK-NEXT: uunpkhi z5.s, z0.h ; CHECK-NEXT: mov z1.h, w8 -; CHECK-NEXT: mov w8, #31743 -; CHECK-NEXT: mov z3.s, w9 -; CHECK-NEXT: mov w9, #2147483647 -; CHECK-NEXT: fcmge p1.h, p0/z, z2.h, z1.h -; CHECK-NEXT: fcmge p2.h, p0/z, z6.h, z1.h +; CHECK-NEXT: movprfx z0, z3 +; CHECK-NEXT: fcvtzs z0.s, p0/m, z3.h +; CHECK-NEXT: fcmge p1.h, p0/z, z3.h, z1.h +; CHECK-NEXT: mov z4.h, w9 ; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: mov z5.h, w8 -; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: mov z4.s, p1/m, z3.s -; CHECK-NEXT: fcmgt p1.h, p0/z, z2.h, z5.h -; CHECK-NEXT: mov z7.s, w9 -; CHECK-NEXT: movprfx z0, z6 -; CHECK-NEXT: fcvtzs z0.s, p0/m, z6.h -; CHECK-NEXT: sel z1.s, p2, z3.s, z0.s -; CHECK-NEXT: fcmgt p2.h, p0/z, z6.h, z5.h -; CHECK-NEXT: sel z0.s, p1, z7.s, z4.s -; CHECK-NEXT: mov z1.s, p2/m, z7.s -; CHECK-NEXT: fcmuo p1.h, p0/z, z2.h, z2.h -; CHECK-NEXT: fcmuo p0.h, p0/z, z6.h, z6.h +; CHECK-NEXT: fcmgt p2.h, p0/z, z3.h, z4.h +; CHECK-NEXT: mov z0.s, p1/m, z2.s +; CHECK-NEXT: fcmge p1.h, p0/z, z5.h, z1.h +; CHECK-NEXT: movprfx z1, z5 +; CHECK-NEXT: fcvtzs z1.s, p0/m, z5.h +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z6.s, #0x7fffffff +; CHECK-NEXT: mov z1.s, p1/m, z2.s +; CHECK-NEXT: fcmgt p1.h, p0/z, z5.h, z4.h +; CHECK-NEXT: mov z0.s, p2/m, z6.s +; CHECK-NEXT: mov z1.s, p1/m, z6.s +; CHECK-NEXT: fcmuo p1.h, p0/z, z3.h, z3.h +; CHECK-NEXT: fcmuo p0.h, p0/z, z5.h, z5.h ; CHECK-NEXT: mov z0.s, p1/m, #0 // =0x0 ; CHECK-NEXT: mov z1.s, p0/m, #0 // =0x0 ; CHECK-NEXT: ret @@ -598,20 +588,19 @@ ; CHECK-LABEL: test_signed_v4f16_v4i16: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #63488 -; CHECK-NEXT: mov w9, #30719 ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: mov z1.h, w8 -; CHECK-NEXT: mov w8, #32767 +; CHECK-NEXT: mov w8, #30719 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h ; CHECK-NEXT: movprfx z1, z0 ; CHECK-NEXT: fcvtzs z1.s, p0/m, z0.h ; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: mov z2.h, w9 +; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: mov z1.s, p1/m, #-32768 // =0xffffffffffff8000 ; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z2.h -; CHECK-NEXT: mov z2.s, w8 -; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h +; CHECK-NEXT: mov z2.s, #32767 // =0x7fff ; CHECK-NEXT: mov z1.s, p1/m, z2.s +; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h ; CHECK-NEXT: mov z1.s, p0/m, #0 // =0x0 ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret @@ -623,20 +612,19 @@ ; CHECK-LABEL: test_signed_v8f16_v8i16: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #63488 -; CHECK-NEXT: mov w9, #30719 ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: mov z1.h, w8 -; CHECK-NEXT: mov w8, #32767 +; CHECK-NEXT: mov w8, #30719 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h ; CHECK-NEXT: movprfx z1, z0 ; CHECK-NEXT: fcvtzs z1.h, p0/m, z0.h ; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: mov z2.h, w9 +; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: mov z1.h, p1/m, #-32768 // =0xffffffffffff8000 ; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z2.h -; CHECK-NEXT: mov z2.h, w8 -; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h +; CHECK-NEXT: mov z2.h, #32767 // =0x7fff ; CHECK-NEXT: mov z1.h, p1/m, z2.h +; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h ; CHECK-NEXT: mov z1.h, p0/m, #0 // =0x0 ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-fptoui-sat.ll b/llvm/test/CodeGen/AArch64/sve-fptoui-sat.ll --- a/llvm/test/CodeGen/AArch64/sve-fptoui-sat.ll +++ b/llvm/test/CodeGen/AArch64/sve-fptoui-sat.ll @@ -81,13 +81,12 @@ ; CHECK-NEXT: mov w8, #65280 ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: movk w8, #18303, lsl #16 -; CHECK-NEXT: mov w9, #65535 ; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0 ; CHECK-NEXT: mov z1.s, w8 ; CHECK-NEXT: fcmgt p2.s, p0/z, z0.s, z1.s ; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s ; CHECK-NEXT: not p0.b, p0/z, p1.b -; CHECK-NEXT: mov z1.s, w9 +; CHECK-NEXT: mov z1.s, #65535 // =0xffff ; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0 ; CHECK-NEXT: mov z0.s, p2/m, z1.s ; CHECK-NEXT: ret @@ -108,13 +107,12 @@ ; CHECK-NEXT: mov z3.s, p1/m, #0 // =0x0 ; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0 ; CHECK-NEXT: mov z2.s, w8 -; CHECK-NEXT: mov w8, #65535 +; CHECK-NEXT: not p1.b, p0/z, p1.b ; CHECK-NEXT: fcmgt p2.s, p0/z, z1.s, z2.s ; CHECK-NEXT: movprfx z1, z0 ; CHECK-NEXT: fcvtzu z1.s, p0/m, z0.s -; CHECK-NEXT: not p1.b, p0/z, p1.b ; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z2.s -; CHECK-NEXT: mov z0.s, w8 +; CHECK-NEXT: mov z0.s, #65535 // =0xffff ; CHECK-NEXT: mov z1.s, p1/m, #0 // =0x0 ; CHECK-NEXT: sel z2.s, p2, z0.s, z3.s ; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s @@ -466,13 +464,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #31743 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov w9, #65535 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0 ; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z1.h ; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.h ; CHECK-NEXT: not p0.b, p0/z, p1.b -; CHECK-NEXT: mov z1.s, w9 +; CHECK-NEXT: mov z1.s, #65535 // =0xffff ; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0 ; CHECK-NEXT: mov z0.s, p2/m, z1.s ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-int-arith-imm.ll b/llvm/test/CodeGen/AArch64/sve-int-arith-imm.ll --- a/llvm/test/CodeGen/AArch64/sve-int-arith-imm.ll +++ b/llvm/test/CodeGen/AArch64/sve-int-arith-imm.ll @@ -55,9 +55,8 @@ define @smax_i16_out_of_range( %a) { ; CHECK-LABEL: smax_i16_out_of_range: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #257 +; CHECK-NEXT: dupm z1.b, #0x1 ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: smax z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret %elt = insertelement undef, i16 257, i32 0 @@ -94,9 +93,8 @@ define @smax_i32_out_of_range( %a) { ; CHECK-LABEL: smax_i32_out_of_range: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-129 +; CHECK-NEXT: mov z1.s, #-129 // =0xffffffffffffff7f ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z1.s, w8 ; CHECK-NEXT: smax z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret %elt = insertelement undef, i32 -129, i32 0 @@ -198,9 +196,8 @@ define @smin_i16_out_of_range( %a) { ; CHECK-LABEL: smin_i16_out_of_range: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #257 +; CHECK-NEXT: dupm z1.b, #0x1 ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: smin z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret %elt = insertelement undef, i16 257, i32 0 @@ -237,9 +234,8 @@ define @smin_i32_out_of_range( %a) { ; CHECK-LABEL: smin_i32_out_of_range: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-129 +; CHECK-NEXT: mov z1.s, #-129 // =0xffffffffffffff7f ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z1.s, w8 ; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret %elt = insertelement undef, i32 -129, i32 0 @@ -329,9 +325,8 @@ define @umax_i16_out_of_range( %a) { ; CHECK-LABEL: umax_i16_out_of_range: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #257 +; CHECK-NEXT: dupm z1.b, #0x1 ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret %elt = insertelement undef, i16 257, i32 0 @@ -436,9 +431,8 @@ define @umin_i16_out_of_range( %a) { ; CHECK-LABEL: umin_i16_out_of_range: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #257 +; CHECK-NEXT: dupm z1.b, #0x1 ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: umin z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret %elt = insertelement undef, i16 257, i32 0 @@ -595,9 +589,8 @@ define @mul_i16_range( %a) { ; CHECK-LABEL: mul_i16_range: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #255 +; CHECK-NEXT: mov z1.h, #255 // =0xff ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret %elt = insertelement undef, i16 255, i32 0 @@ -609,9 +602,8 @@ define @mul_i32_range( %a) { ; CHECK-LABEL: mul_i32_range: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #255 +; CHECK-NEXT: mov z1.s, #255 // =0xff ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z1.s, w8 ; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret %elt = insertelement undef, i32 255, i32 0 diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll @@ -34,8 +34,7 @@ define @add_i16_out_of_range( %a) { ; CHECK-LABEL: add_i16_out_of_range: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #257 -; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: dupm z1.b, #0x1 ; CHECK-NEXT: add z0.h, z0.h, z1.h ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) @@ -140,8 +139,7 @@ define @sub_i16_out_of_range( %a) { ; CHECK-LABEL: sub_i16_out_of_range: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #257 -; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: dupm z1.b, #0x1 ; CHECK-NEXT: sub z0.h, z0.h, z1.h ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) @@ -296,8 +294,7 @@ define @subr_i16_out_of_range( %a) { ; CHECK-LABEL: subr_i16_out_of_range: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #257 -; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: dupm z1.b, #0x1 ; CHECK-NEXT: sub z0.h, z1.h, z0.h ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) @@ -483,9 +480,8 @@ define @smax_i32_out_of_range( %a) { ; CHECK-LABEL: smax_i32_out_of_range: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-129 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: mov z1.s, #-129 // =0xffffffffffffff7f ; CHECK-NEXT: smax z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) @@ -610,9 +606,8 @@ define @smin_i16_out_of_range( %a) { ; CHECK-LABEL: smin_i16_out_of_range: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-129 ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: mov z1.h, #-129 // =0xffffffffffffff7f ; CHECK-NEXT: smin z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) @@ -769,9 +764,8 @@ define @umax_i16_out_of_range( %a) { ; CHECK-LABEL: umax_i16_out_of_range: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #257 ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: dupm z1.b, #0x1 ; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) @@ -927,9 +921,8 @@ define @umin_i16_out_of_range( %a) { ; CHECK-LABEL: umin_i16_out_of_range: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #257 ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: dupm z1.b, #0x1 ; CHECK-NEXT: umin z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-logical-imm.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-logical-imm.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-logical-imm.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-logical-imm.ll @@ -261,9 +261,8 @@ define @orr_i32_ptrue_all_d( %a) { ; CHECK-LABEL: orr_i32_ptrue_all_d: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #65535 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: mov z1.s, #65535 // =0xffff ; CHECK-NEXT: orr z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret %pg.d = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) diff --git a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll --- a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll +++ b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll @@ -53,8 +53,8 @@ ret %splat } -define @sve_splat_8xi16_imm() { -; CHECK-LABEL: sve_splat_8xi16_imm: +define @sve_splat_8xi16_dup_imm() { +; CHECK-LABEL: sve_splat_8xi16_dup_imm: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z0.h, #1 // =0x1 ; CHECK-NEXT: ret @@ -63,8 +63,18 @@ ret %splat } -define @sve_splat_4xi32_imm() { -; CHECK-LABEL: sve_splat_4xi32_imm: +define @sve_splat_8xi16_dupm_imm() { +; CHECK-LABEL: sve_splat_8xi16_dupm_imm: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.h, #16256 // =0x3f80 +; CHECK-NEXT: ret + %ins = insertelement undef, i16 16256, i32 0 ; 0x3f80 + %splat = shufflevector %ins, undef, zeroinitializer + ret %splat +} + +define @sve_splat_4xi32_dup_imm() { +; CHECK-LABEL: sve_splat_4xi32_dup_imm: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z0.s, #1 // =0x1 ; CHECK-NEXT: ret @@ -73,6 +83,16 @@ ret %splat } +define @sve_splat_4xi32_dupm_imm() { +; CHECK-LABEL: sve_splat_4xi32_dupm_imm: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.s, #0xff0000 +; CHECK-NEXT: ret + %ins = insertelement undef, i32 16711680, i32 0 ; 0xff0000 + %splat = shufflevector %ins, undef, zeroinitializer + ret %splat +} + define @sve_splat_2xi64_dup_imm() { ; CHECK-LABEL: sve_splat_2xi64_dup_imm: ; CHECK: // %bb.0: @@ -129,8 +149,7 @@ define @sve_splat_8xi8_imm() { ; CHECK-LABEL: sve_splat_8xi8_imm: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #255 -; CHECK-NEXT: mov z0.h, w8 +; CHECK-NEXT: mov z0.h, #255 // =0xff ; CHECK-NEXT: ret %ins = insertelement undef, i8 -1, i32 0 %splat = shufflevector %ins, undef, zeroinitializer @@ -161,8 +180,7 @@ define @sve_splat_4xi16_imm() { ; CHECK-LABEL: sve_splat_4xi16_imm: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #65535 -; CHECK-NEXT: mov z0.s, w8 +; CHECK-NEXT: mov z0.s, #65535 // =0xffff ; CHECK-NEXT: ret %ins = insertelement undef, i16 -1, i32 0 %splat = shufflevector %ins, undef, zeroinitializer @@ -577,8 +595,8 @@ define @splat_nxv2f64_imm_out_of_range() { ; CHECK-LABEL: splat_nxv2f64_imm_out_of_range: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI55_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI55_0 +; CHECK-NEXT: adrp x8, .LCPI57_0 +; CHECK-NEXT: add x8, x8, :lo12:.LCPI57_0 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x8] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll b/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll --- a/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll +++ b/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll @@ -118,10 +118,9 @@ define @sel_16_illegal_wrong_extension( %p) { ; CHECK-LABEL: sel_16_illegal_wrong_extension: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #128 -; CHECK-NEXT: mov z1.h, #0 // =0x0 -; CHECK-NEXT: mov z0.h, w8 -; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h +; CHECK-NEXT: mov z0.h, #0 // =0x0 +; CHECK-NEXT: mov z1.h, #128 // =0x80 +; CHECK-NEXT: mov z0.h, p0/m, z1.h ; CHECK-NEXT: ret %vec = shufflevector insertelement ( undef, i16 128, i32 0), zeroinitializer, zeroinitializer %sel = select %p, %vec, zeroinitializer @@ -131,10 +130,9 @@ define @sel_32_illegal_wrong_extension( %p) { ; CHECK-LABEL: sel_32_illegal_wrong_extension: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #128 -; CHECK-NEXT: mov z1.s, #0 // =0x0 -; CHECK-NEXT: mov z0.s, w8 -; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s +; CHECK-NEXT: mov z0.s, #0 // =0x0 +; CHECK-NEXT: mov z1.s, #128 // =0x80 +; CHECK-NEXT: mov z0.s, p0/m, z1.s ; CHECK-NEXT: ret %vec = shufflevector insertelement ( undef, i32 128, i32 0), zeroinitializer, zeroinitializer %sel = select %p, %vec, zeroinitializer @@ -399,8 +397,7 @@ define @sel_merge_16_illegal_wrong_extension( %p, %in) { ; CHECK-LABEL: sel_merge_16_illegal_wrong_extension: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #128 -; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: mov z1.h, #128 // =0x80 ; CHECK-NEXT: mov z0.h, p0/m, z1.h ; CHECK-NEXT: ret %vec = shufflevector insertelement ( undef, i16 128, i32 0), zeroinitializer, zeroinitializer @@ -411,8 +408,7 @@ define @sel_merge_32_illegal_wrong_extension( %p, %in) { ; CHECK-LABEL: sel_merge_32_illegal_wrong_extension: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #128 -; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: mov z1.s, #128 // =0x80 ; CHECK-NEXT: mov z0.s, p0/m, z1.s ; CHECK-NEXT: ret %vec = shufflevector insertelement ( undef, i32 128, i32 0), zeroinitializer, zeroinitializer diff --git a/llvm/test/CodeGen/AArch64/sve2-fcopysign.ll b/llvm/test/CodeGen/AArch64/sve2-fcopysign.ll --- a/llvm/test/CodeGen/AArch64/sve2-fcopysign.ll +++ b/llvm/test/CodeGen/AArch64/sve2-fcopysign.ll @@ -8,8 +8,7 @@ define @test_copysign_v2f32_v2f32( %a, %b) #0 { ; CHECK-LABEL: test_copysign_v2f32_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #2147483647 -; CHECK-NEXT: mov z2.s, w8 +; CHECK-NEXT: mov z2.s, #0x7fffffff ; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d ; CHECK-NEXT: ret %r = call @llvm.copysign.v2f32( %a, %b) @@ -19,10 +18,9 @@ define @test_copysign_v2f32_v2f64( %a, %b) #0 { ; CHECK-LABEL: test_copysign_v2f32_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #2147483647 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z2.s, #0x7fffffff ; CHECK-NEXT: fcvt z1.s, p0/m, z1.d -; CHECK-NEXT: mov z2.s, w8 ; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d ; CHECK-NEXT: ret %tmp0 = fptrunc %b to @@ -37,8 +35,7 @@ define @test_copysign_v4f32_v4f32( %a, %b) #0 { ; CHECK-LABEL: test_copysign_v4f32_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #2147483647 -; CHECK-NEXT: mov z2.s, w8 +; CHECK-NEXT: mov z2.s, #0x7fffffff ; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d ; CHECK-NEXT: ret %r = call @llvm.copysign.v4f32( %a, %b) @@ -49,12 +46,11 @@ define @test_copysign_v4f32_v4f64( %a, %b) #0 { ; CHECK-LABEL: test_copysign_v4f32_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #2147483647 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: fcvt z2.s, p0/m, z2.d ; CHECK-NEXT: fcvt z1.s, p0/m, z1.d ; CHECK-NEXT: uzp1 z1.s, z1.s, z2.s -; CHECK-NEXT: mov z2.s, w8 +; CHECK-NEXT: mov z2.s, #0x7fffffff ; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d ; CHECK-NEXT: ret %tmp0 = fptrunc %b to @@ -130,8 +126,7 @@ define @test_copysign_v4f16_v4f16( %a, %b) #0 { ; CHECK-LABEL: test_copysign_v4f16_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32767 -; CHECK-NEXT: mov z2.h, w8 +; CHECK-NEXT: mov z2.h, #32767 // =0x7fff ; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d ; CHECK-NEXT: ret %r = call @llvm.copysign.v4f16( %a, %b) @@ -141,10 +136,9 @@ define @test_copysign_v4f16_v4f32( %a, %b) #0 { ; CHECK-LABEL: test_copysign_v4f16_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32767 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov z2.h, #32767 // =0x7fff ; CHECK-NEXT: fcvt z1.h, p0/m, z1.s -; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d ; CHECK-NEXT: ret %tmp0 = fptrunc %b to @@ -155,12 +149,11 @@ define @test_copysign_v4f16_v4f64( %a, %b) #0 { ; CHECK-LABEL: test_copysign_v4f16_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32767 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: fcvt z2.h, p0/m, z2.d ; CHECK-NEXT: fcvt z1.h, p0/m, z1.d ; CHECK-NEXT: uzp1 z1.s, z1.s, z2.s -; CHECK-NEXT: mov z2.h, w8 +; CHECK-NEXT: mov z2.h, #32767 // =0x7fff ; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d ; CHECK-NEXT: ret %tmp0 = fptrunc %b to @@ -175,8 +168,7 @@ define @test_copysign_v8f16_v8f16( %a, %b) #0 { ; CHECK-LABEL: test_copysign_v8f16_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32767 -; CHECK-NEXT: mov z2.h, w8 +; CHECK-NEXT: mov z2.h, #32767 // =0x7fff ; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d ; CHECK-NEXT: ret %r = call @llvm.copysign.v8f16( %a, %b) @@ -186,12 +178,11 @@ define @test_copysign_v8f16_v8f32( %a, %b) #0 { ; CHECK-LABEL: test_copysign_v8f16_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32767 ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: fcvt z2.h, p0/m, z2.s ; CHECK-NEXT: fcvt z1.h, p0/m, z1.s ; CHECK-NEXT: uzp1 z1.h, z1.h, z2.h -; CHECK-NEXT: mov z2.h, w8 +; CHECK-NEXT: mov z2.h, #32767 // =0x7fff ; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d ; CHECK-NEXT: ret %tmp0 = fptrunc %b to diff --git a/llvm/test/CodeGen/AArch64/sve2-int-mul.ll b/llvm/test/CodeGen/AArch64/sve2-int-mul.ll --- a/llvm/test/CodeGen/AArch64/sve2-int-mul.ll +++ b/llvm/test/CodeGen/AArch64/sve2-int-mul.ll @@ -7,8 +7,7 @@ define @mul_i16_imm( %a) { ; CHECK-LABEL: mul_i16_imm: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #255 -; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: mov z1.h, #255 // =0xff ; CHECK-NEXT: mul z0.h, z0.h, z1.h ; CHECK-NEXT: ret %elt = insertelement undef, i16 255, i32 0 @@ -33,8 +32,7 @@ define @mul_i32_imm( %a) { ; CHECK-LABEL: mul_i32_imm: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #255 -; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: mov z1.s, #255 // =0xff ; CHECK-NEXT: mul z0.s, z0.s, z1.s ; CHECK-NEXT: ret %elt = insertelement undef, i32 255, i32 0