diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1304,10 +1304,9 @@ setOperationAction(ISD::MSCATTER, VT, Custom); setOperationAction(ISD::MLOAD, VT, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); + setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); } - setOperationAction(ISD::SPLAT_VECTOR, MVT::nxv8bf16, Custom); - setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom); diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -642,6 +642,10 @@ (DUP_ZZI_D (INSERT_SUBREG (IMPLICIT_DEF), FPR64:$src, dsub), 0)>; def : Pat<(nxv8bf16 (AArch64dup (bf16 FPR16:$src))), (DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>; + def : Pat<(nxv4bf16 (AArch64dup (bf16 FPR16:$src))), + (DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>; + def : Pat<(nxv2bf16 (AArch64dup (bf16 FPR16:$src))), + (DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>; // Duplicate +0.0 into all vector elements def : Pat<(nxv8f16 (AArch64dup (f16 fpimm0))), (DUP_ZI_H 0, 0)>; @@ -651,6 +655,8 @@ def : Pat<(nxv2f32 (AArch64dup (f32 fpimm0))), (DUP_ZI_S 0, 0)>; def : Pat<(nxv2f64 (AArch64dup (f64 fpimm0))), (DUP_ZI_D 0, 0)>; def : Pat<(nxv8bf16 (AArch64dup (bf16 fpimm0))), (DUP_ZI_H 0, 0)>; + def : Pat<(nxv4bf16 (AArch64dup (bf16 fpimm0))), (DUP_ZI_H 0, 0)>; + def : Pat<(nxv2bf16 (AArch64dup (bf16 fpimm0))), (DUP_ZI_H 0, 0)>; // Duplicate Int immediate into all vector elements def : Pat<(nxv16i8 (AArch64dup (i32 (SVECpyDupImm8Pat i32:$a, i32:$b)))), diff --git a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll --- a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll +++ b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll @@ -276,6 +276,28 @@ ret %2 } +define @splat_nxv4bf16(bfloat %val) #0 { +; CHECK-LABEL: splat_nxv4bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 +; CHECK-NEXT: mov z0.h, h0 +; CHECK-NEXT: ret + %1 = insertelement undef, bfloat %val, i32 0 + %2 = shufflevector %1, undef, zeroinitializer + ret %2 +} + +define @splat_nxv2bf16(bfloat %val) #0 { +; CHECK-LABEL: splat_nxv2bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 +; CHECK-NEXT: mov z0.h, h0 +; CHECK-NEXT: ret + %1 = insertelement undef, bfloat %val, i32 0 + %2 = shufflevector %1, undef, zeroinitializer + ret %2 +} + define @splat_nxv8f16(half %val) { ; CHECK-LABEL: splat_nxv8f16: ; CHECK: // %bb.0: @@ -342,20 +364,36 @@ ret %2 } -define @splat_nxv8f16_zero() { -; CHECK-LABEL: splat_nxv8f16_zero: +define @splat_nxv8bf16_zero() #0 { +; CHECK-LABEL: splat_nxv8bf16_zero: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z0.h, #0 // =0x0 ; CHECK-NEXT: ret - ret zeroinitializer + ret zeroinitializer } -define @splat_nxv8bf16_zero() #0 { -; CHECK-LABEL: splat_nxv8bf16_zero: +define @splat_nxv4bf16_zero() #0 { +; CHECK-LABEL: splat_nxv4bf16_zero: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z0.h, #0 // =0x0 ; CHECK-NEXT: ret - ret zeroinitializer + ret zeroinitializer +} + +define @splat_nxv2bf16_zero() #0 { +; CHECK-LABEL: splat_nxv2bf16_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.h, #0 // =0x0 +; CHECK-NEXT: ret + ret zeroinitializer +} + +define @splat_nxv8f16_zero() { +; CHECK-LABEL: splat_nxv8f16_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.h, #0 // =0x0 +; CHECK-NEXT: ret + ret zeroinitializer } define @splat_nxv4f16_zero() { @@ -539,9 +577,9 @@ define @splat_nxv2f64_imm_out_of_range() { ; CHECK-LABEL: splat_nxv2f64_imm_out_of_range: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI51_0 +; CHECK-NEXT: adrp x8, .LCPI55_0 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: add x8, x8, :lo12:.LCPI51_0 +; CHECK-NEXT: add x8, x8, :lo12:.LCPI55_0 ; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x8] ; CHECK-NEXT: ret %1 = insertelement undef, double 3.33, i32 0