diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -4045,22 +4045,30 @@ let ElementSize = zprty.ElementSize; } -multiclass sve_int_dup_imm_pred_merge { - let Constraints = "$Zd = $_Zd" in { - def _B : sve_int_dup_imm_pred<0b00, 1, asm, ZPR8, "/m", (ins ZPR8:$_Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i8:$imm)>; - def _H : sve_int_dup_imm_pred<0b01, 1, asm, ZPR16, "/m", (ins ZPR16:$_Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i16:$imm)>; - def _S : sve_int_dup_imm_pred<0b10, 1, asm, ZPR32, "/m", (ins ZPR32:$_Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i32:$imm)>; - def _D : sve_int_dup_imm_pred<0b11, 1, asm, ZPR64, "/m", (ins ZPR64:$_Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i64:$imm)>; - } - - def : InstAlias<"mov $Zd, $Pg/m, $imm", - (!cast(NAME # _B) ZPR8:$Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i8:$imm), 1>; - def : InstAlias<"mov $Zd, $Pg/m, $imm", - (!cast(NAME # _H) ZPR16:$Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i16:$imm), 1>; - def : InstAlias<"mov $Zd, $Pg/m, $imm", - (!cast(NAME # _S) ZPR32:$Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i32:$imm), 1>; +multiclass sve_int_dup_imm_pred_merge_inst< + bits<2> sz8_64, string asm, ZPRRegOp zprty, ValueType intty, + ValueType predty, ValueType scalarty, imm8_opt_lsl cpyimm> { + let Constraints = "$Zd = $_Zd" in + def NAME : sve_int_dup_imm_pred; def : InstAlias<"mov $Zd, $Pg/m, $imm", - (!cast(NAME # _D) ZPR64:$Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i64:$imm), 1>; + (!cast(NAME) zprty:$Zd, PPRAny:$Pg, cpyimm:$imm), 1>; + def : Pat<(intty + (vselect predty:$Pg, + (intty (AArch64dup (scalarty (SVE8BitLslImm i32:$imm, i32:$shift)))), + intty:$Zd)), + (!cast(NAME) zprty:$Zd, $Pg, i32:$imm, i32:$shift)>; +} + +multiclass sve_int_dup_imm_pred_merge { + defm _B : sve_int_dup_imm_pred_merge_inst<0b00, asm, ZPR8, nxv16i8, nxv16i1, + i32, cpy_imm8_opt_lsl_i8>; + defm _H : sve_int_dup_imm_pred_merge_inst<0b01, asm, ZPR16, nxv8i16, nxv8i1, + i32, cpy_imm8_opt_lsl_i16>; + defm _S : sve_int_dup_imm_pred_merge_inst<0b10, asm, ZPR32, nxv4i32, nxv4i1, + i32, cpy_imm8_opt_lsl_i32>; + defm _D : sve_int_dup_imm_pred_merge_inst<0b11, asm, ZPR64, nxv2i64, nxv2i1, + i64, cpy_imm8_opt_lsl_i64>; def : InstAlias<"fmov $Zd, $Pg/m, #0.0", (!cast(NAME # _H) ZPR16:$Zd, PPRAny:$Pg, 0, 0), 0>; diff --git a/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll b/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll --- a/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll +++ b/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll @@ -111,8 +111,10 @@ ret %sel } -; TODO: We could actually use something like "sel z0.b, p0/z, #-128" if the -; odd bits of the predicate are zero. +; TODO: We could actually use something like "cpy z0.b, p0/z, #-128". But it's +; a little tricky to prove correctness: we're using the predicate with the +; wrong width, so we'd have to prove the bits which would normally be unused +; are actually zero. define @sel_16_illegal_wrong_extension( %p) { ; CHECK-LABEL: sel_16_illegal_wrong_extension: ; CHECK: // %bb.0: @@ -190,3 +192,225 @@ %sel = select %p, %vec, zeroinitializer ret %sel } + +define @sel_merge_8_positive( %p, %in) { +; CHECK-LABEL: sel_merge_8_positive: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.b, p0/m, #3 // =0x3 +; CHECK-NEXT: ret +%vec = shufflevector insertelement ( undef, i8 3, i32 0), zeroinitializer, zeroinitializer +%sel = select %p, %vec, %in +ret %sel +} + +define @sel_merge_16_positive( %p, %in) { +; CHECK-LABEL: sel_merge_16_positive: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.h, p0/m, #3 // =0x3 +; CHECK-NEXT: ret +%vec = shufflevector insertelement ( undef, i16 3, i32 0), zeroinitializer, zeroinitializer +%sel = select %p, %vec, %in +ret %sel +} + +define @sel_merge_32_positive( %p, %in) { +; CHECK-LABEL: sel_merge_32_positive: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.s, p0/m, #3 // =0x3 +; CHECK-NEXT: ret +%vec = shufflevector insertelement ( undef, i32 3, i32 0), zeroinitializer, zeroinitializer +%sel = select %p, %vec, %in +ret %sel +} + +define @sel_merge_64_positive( %p, %in) { +; CHECK-LABEL: sel_merge_64_positive: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.d, p0/m, #3 // =0x3 +; CHECK-NEXT: ret +%vec = shufflevector insertelement ( undef, i64 3, i32 0), zeroinitializer, zeroinitializer +%sel = select %p, %vec, %in +ret %sel +} + +define @sel_merge_8_negative( %p, %in) { +; CHECK-LABEL: sel_merge_8_negative: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.b, p0/m, #-128 // =0xffffffffffffff80 +; CHECK-NEXT: ret +%vec = shufflevector insertelement ( undef, i8 -128, i32 0), zeroinitializer, zeroinitializer +%sel = select %p, %vec, %in +ret %sel +} + +define @sel_merge_16_negative( %p, %in) { +; CHECK-LABEL: sel_merge_16_negative: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.h, p0/m, #-128 // =0xffffffffffffff80 +; CHECK-NEXT: ret +%vec = shufflevector insertelement ( undef, i16 -128, i32 0), zeroinitializer, zeroinitializer +%sel = select %p, %vec, %in +ret %sel +} + +define @sel_merge_32_negative( %p, %in) { +; CHECK-LABEL: sel_merge_32_negative: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.s, p0/m, #-128 // =0xffffffffffffff80 +; CHECK-NEXT: ret +%vec = shufflevector insertelement ( undef, i32 -128, i32 0), zeroinitializer, zeroinitializer +%sel = select %p, %vec, %in +ret %sel +} + +define @sel_merge_64_negative( %p, %in) { +; CHECK-LABEL: sel_merge_64_negative: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.d, p0/m, #-128 // =0xffffffffffffff80 +; CHECK-NEXT: ret +%vec = shufflevector insertelement ( undef, i64 -128, i32 0), zeroinitializer, zeroinitializer +%sel = select %p, %vec, %in +ret %sel +} + +define @sel_merge_8_zero( %p, %in) { +; CHECK-LABEL: sel_merge_8_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.b, p0/m, #0 // =0x0 +; CHECK-NEXT: ret +%sel = select %p, zeroinitializer, %in +ret %sel +} + +define @sel_merge_16_zero( %p, %in) { +; CHECK-LABEL: sel_merge_16_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.h, p0/m, #0 // =0x0 +; CHECK-NEXT: ret +%sel = select %p, zeroinitializer, %in +ret %sel +} + +define @sel_merge_32_zero( %p, %in) { +; CHECK-LABEL: sel_merge_32_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0 +; CHECK-NEXT: ret +%sel = select %p, zeroinitializer, %in +ret %sel +} + +define @sel_merge_64_zero( %p, %in) { +; CHECK-LABEL: sel_merge_64_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 +; CHECK-NEXT: ret +%sel = select %p, zeroinitializer, %in +ret %sel +} + +define @sel_merge_16_shifted( %p, %in) { +; CHECK-LABEL: sel_merge_16_shifted: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.h, p0/m, #512 // =0x200 +; CHECK-NEXT: ret +%vec = shufflevector insertelement ( undef, i16 512, i32 0), zeroinitializer, zeroinitializer +%sel = select %p, %vec, %in +ret %sel +} + +define @sel_merge_32_shifted( %p, %in) { +; CHECK-LABEL: sel_merge_32_shifted: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.s, p0/m, #512 // =0x200 +; CHECK-NEXT: ret +%vec = shufflevector insertelement ( undef, i32 512, i32 0), zeroinitializer, zeroinitializer +%sel = select %p, %vec, %in +ret %sel +} + +define @sel_merge_64_shifted( %p, %in) { +; CHECK-LABEL: sel_merge_64_shifted: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.d, p0/m, #512 // =0x200 +; CHECK-NEXT: ret +%vec = shufflevector insertelement ( undef, i64 512, i32 0), zeroinitializer, zeroinitializer +%sel = select %p, %vec, %in +ret %sel +} + +; TODO: We could actually use something like "cpy z0.b, p0/m, #-128". But it's +; a little tricky to prove correctness: we're using the predicate with the +; wrong width, so we'd have to prove the bits which would normally be unused +; are actually zero. +define @sel_merge_16_illegal_wrong_extension( %p, %in) { +; CHECK-LABEL: sel_merge_16_illegal_wrong_extension: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #128 +; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: mov z0.h, p0/m, z1.h +; CHECK-NEXT: ret +%vec = shufflevector insertelement ( undef, i16 128, i32 0), zeroinitializer, zeroinitializer +%sel = select %p, %vec, %in +ret %sel +} + +define @sel_merge_32_illegal_wrong_extension( %p, %in) { +; CHECK-LABEL: sel_merge_32_illegal_wrong_extension: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #128 +; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: mov z0.s, p0/m, z1.s +; CHECK-NEXT: ret +%vec = shufflevector insertelement ( undef, i32 128, i32 0), zeroinitializer, zeroinitializer +%sel = select %p, %vec, %in +ret %sel +} + +define @sel_merge_64_illegal_wrong_extension( %p, %in) { +; CHECK-LABEL: sel_merge_64_illegal_wrong_extension: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #128 +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: mov z0.d, p0/m, z1.d +; CHECK-NEXT: ret +%vec = shufflevector insertelement ( undef, i64 128, i32 0), zeroinitializer, zeroinitializer +%sel = select %p, %vec, %in +ret %sel +} + +define @sel_merge_16_illegal_shifted( %p, %in) { +; CHECK-LABEL: sel_merge_16_illegal_shifted: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #513 +; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: mov z0.h, p0/m, z1.h +; CHECK-NEXT: ret +%vec = shufflevector insertelement ( undef, i16 513, i32 0), zeroinitializer, zeroinitializer +%sel = select %p, %vec, %in +ret %sel +} + +define @sel_merge_32_illegal_shifted( %p, %in) { +; CHECK-LABEL: sel_merge_32_illegal_shifted: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #513 +; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: mov z0.s, p0/m, z1.s +; CHECK-NEXT: ret +%vec = shufflevector insertelement ( undef, i32 513, i32 0), zeroinitializer, zeroinitializer +%sel = select %p, %vec, %in +ret %sel +} + +define @sel_merge_64_illegal_shifted( %p, %in) { +; CHECK-LABEL: sel_merge_64_illegal_shifted: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #513 +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: mov z0.d, p0/m, z1.d +; CHECK-NEXT: ret +%vec = shufflevector insertelement ( undef, i64 513, i32 0), zeroinitializer, zeroinitializer +%sel = select %p, %vec, %in +ret %sel +}