diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -1292,8 +1292,8 @@ } class sve_int_perm_insrv sz8_64, string asm, ZPRRegOp zprty, - RegisterClass srcRegType> -: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, srcRegType:$Vm), + FPRasZPROperand srcOpType> +: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, srcOpType:$Vm), asm, "\t$Zdn, $Vm", "", []>, Sched<[]> { @@ -1310,16 +1310,31 @@ } multiclass sve_int_perm_insrv { - def _B : sve_int_perm_insrv<0b00, asm, ZPR8, FPR8>; - def _H : sve_int_perm_insrv<0b01, asm, ZPR16, FPR16>; - def _S : sve_int_perm_insrv<0b10, asm, ZPR32, FPR32>; - def _D : sve_int_perm_insrv<0b11, asm, ZPR64, FPR64>; - - def : SVE_2_Op_Pat(NAME # _H)>; - def : SVE_2_Op_Pat(NAME # _S)>; - def : SVE_2_Op_Pat(NAME # _D)>; + def _B : sve_int_perm_insrv<0b00, asm, ZPR8, FPR8asZPR>; + def _H : sve_int_perm_insrv<0b01, asm, ZPR16, FPR16asZPR>; + def _S : sve_int_perm_insrv<0b10, asm, ZPR32, FPR32asZPR>; + def _D : sve_int_perm_insrv<0b11, asm, ZPR64, FPR64asZPR>; + + def : Pat<(nxv8f16 (op nxv8f16:$Zn, f16:$Vm)), + (!cast(NAME # _H) $Zn, (INSERT_SUBREG (IMPLICIT_DEF), $Vm, hsub))>; + def : Pat<(nxv4f32 (op nxv4f32:$Zn, f32:$Vm)), + (!cast(NAME # _S) $Zn, (INSERT_SUBREG (IMPLICIT_DEF), $Vm, ssub))>; + def : Pat<(nxv2f64 (op nxv2f64:$Zn, f64:$Vm)), + (!cast(NAME # _D) $Zn, (INSERT_SUBREG (IMPLICIT_DEF), $Vm, dsub))>; + + def : Pat<(nxv8bf16 (op nxv8bf16:$Zn, bf16:$Vm)), + (!cast(NAME # _H) $Zn, (INSERT_SUBREG (IMPLICIT_DEF), $Vm, hsub))>; + + // Keep integer insertions within the vector unit. + def : Pat<(nxv16i8 (op (nxv16i8 ZPR:$Zn), (i32 (vector_extract (nxv16i8 ZPR:$Vm), 0)))), + (!cast(NAME # _B) $Zn, ZPR:$Vm)>; + def : Pat<(nxv8i16 (op (nxv8i16 ZPR:$Zn), (i32 (vector_extract (nxv8i16 ZPR:$Vm), 0)))), + (!cast(NAME # _H) $Zn, ZPR:$Vm)>; + def : Pat<(nxv4i32 (op (nxv4i32 ZPR:$Zn), (i32 (vector_extract (nxv4i32 ZPR:$Vm), 0)))), + (!cast(NAME # _S) $Zn, ZPR: $Vm)>; + def : Pat<(nxv2i64 (op (nxv2i64 ZPR:$Zn), (i64 (vector_extract (nxv2i64 ZPR:$Vm), 0)))), + (!cast(NAME # _D) $Zn, ZPR:$Vm)>; - def : SVE_2_Op_Pat(NAME # _H)>; } //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AArch64/sve-insr.ll b/llvm/test/CodeGen/AArch64/sve-insr.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-insr.ll @@ -0,0 +1,49 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s + +define @insr_zpr_only_nxv16i8( %a, %b) #0 { +; CHECK-LABEL: insr_zpr_only_nxv16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: insr z0.b, b1 +; CHECK-NEXT: ret + %t0 = extractelement %b, i64 0 + %t1 = tail call @llvm.aarch64.sve.insr.nxv16i8( %a, i8 %t0) + ret %t1 +} + +define @insr_zpr_only_nxv8i16( %a, %b) #0 { +; CHECK-LABEL: insr_zpr_only_nxv8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: insr z0.h, h1 +; CHECK-NEXT: ret + %t0 = extractelement %b, i64 0 + %t1 = tail call @llvm.aarch64.sve.insr.nxv8i16( %a, i16 %t0) + ret %t1 +} + +define @insr_zpr_only_nxv4i32( %a, %b) #0 { +; CHECK-LABEL: insr_zpr_only_nxv4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: insr z0.s, s1 +; CHECK-NEXT: ret + %t0 = extractelement %b, i64 0 + %t1 = tail call @llvm.aarch64.sve.insr.nxv4i32( %a, i32 %t0) + ret %t1 +} + +define @insr_zpr_only_nxv2i64( %a, %b) #0 { +; CHECK-LABEL: insr_zpr_only_nxv2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: insr z0.d, d1 +; CHECK-NEXT: ret + %t0 = extractelement %b, i64 0 + %t1 = tail call @llvm.aarch64.sve.insr.nxv2i64( %a, i64 %t0) + ret %t1 +} + +declare @llvm.aarch64.sve.insr.nxv16i8(, i8) +declare @llvm.aarch64.sve.insr.nxv8i16(, i16) +declare @llvm.aarch64.sve.insr.nxv4i32(, i32) +declare @llvm.aarch64.sve.insr.nxv2i64(, i64) + +attributes #0 = { "target-features"="+sve" }