diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -457,6 +457,12 @@ defm SEL_ZPZZ : sve_int_sel_vvv<"sel", vselect>; defm SPLICE_ZPZ : sve_int_perm_splice<"splice", int_aarch64_sve_splice>; + + let Predicates = [HasSVE, HasBF16] in { + def : SVE_3_Op_Pat; + def : SVE_3_Op_Pat; + } + defm COMPACT_ZPZ : sve_int_perm_compact<"compact", int_aarch64_sve_compact>; defm INSR_ZR : sve_int_perm_insrs<"insr", AArch64insr>; defm INSR_ZV : sve_int_perm_insrv<"insr", AArch64insr>; @@ -470,6 +476,10 @@ defm REV_PP : sve_int_perm_reverse_p<"rev", AArch64rev>; defm REV_ZZ : sve_int_perm_reverse_z<"rev", AArch64rev>; + let Predicates = [HasSVE, HasBF16] in { + def : SVE_1_Op_Pat; + } + defm SUNPKLO_ZZ : sve_int_perm_unpk<0b00, "sunpklo", AArch64sunpklo>; defm SUNPKHI_ZZ : sve_int_perm_unpk<0b01, "sunpkhi", AArch64sunpkhi>; defm UUNPKLO_ZZ : sve_int_perm_unpk<0b10, "uunpklo", AArch64uunpklo>; @@ -1008,6 +1018,15 @@ defm TRN1_ZZZ : sve_int_perm_bin_perm_zz<0b100, "trn1", AArch64trn1>; defm TRN2_ZZZ : sve_int_perm_bin_perm_zz<0b101, "trn2", AArch64trn2>; + let Predicates = [HasSVE, HasBF16] in { + def : SVE_2_Op_Pat; + def : SVE_2_Op_Pat; + def : SVE_2_Op_Pat; + def : SVE_2_Op_Pat; + def : SVE_2_Op_Pat; + def : SVE_2_Op_Pat; + } + defm ZIP1_PPP : sve_int_perm_bin_perm_pp<0b000, "zip1", AArch64zip1>; defm ZIP2_PPP : sve_int_perm_bin_perm_pp<0b001, "zip2", AArch64zip2>; defm UZP1_PPP : sve_int_perm_bin_perm_pp<0b010, "uzp1", AArch64uzp1>; @@ -1985,6 +2004,15 @@ defm TRN2_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b11, 1, "trn2", int_aarch64_sve_trn2q>; } +let Predicates = [HasSVE, HasMatMulFP64, HasBF16] in { + def : SVE_2_Op_Pat; + def : SVE_2_Op_Pat; + def : SVE_2_Op_Pat; + def : SVE_2_Op_Pat; + def : SVE_2_Op_Pat; + def : SVE_2_Op_Pat; +} + let Predicates = [HasSVE2] in { // SVE2 integer multiply-add (indexed) defm MLA_ZZZI : sve2_int_mla_by_indexed_elem<0b01, 0b0, "mla", int_aarch64_sve_mla_lane>; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -1124,10 +1124,9 @@ def : SVE_1_Op_Pat(NAME # _S)>; def : SVE_1_Op_Pat(NAME # _D)>; - def : SVE_1_Op_Pat(NAME # _H)>; - def : SVE_1_Op_Pat(NAME # _H)>; - def : SVE_1_Op_Pat(NAME # _S)>; - def : SVE_1_Op_Pat(NAME # _D)>; + def : SVE_1_Op_Pat(NAME # _H)>; + def : SVE_1_Op_Pat(NAME # _S)>; + def : SVE_1_Op_Pat(NAME # _D)>; } class sve_int_perm_reverse_p sz8_64, string asm, PPRRegOp pprty> @@ -1321,11 +1320,10 @@ def : SVE_3_Op_Pat(NAME # _S)>; def : SVE_3_Op_Pat(NAME # _D)>; - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; - def : SVE_3_Op_Pat(NAME # _D)>; + def : SVE_3_Op_Pat(NAME # _H)>; + def : SVE_3_Op_Pat(NAME # _S)>; + def : SVE_3_Op_Pat(NAME # _D)>; + def : SVE_3_Op_Pat(NAME # _D)>; def : InstAlias<"mov $Zd, $Pg/m, $Zn", (!cast(NAME # _B) ZPR8:$Zd, PPRAny:$Pg, ZPR8:$Zn, ZPR8:$Zd), 1>; @@ -2214,11 +2212,10 @@ def : SVE_2_Op_Pat(NAME # _S)>; def : SVE_2_Op_Pat(NAME # _D)>; - def : SVE_2_Op_Pat(NAME # _H)>; - def : SVE_2_Op_Pat(NAME # _H)>; - def : SVE_2_Op_Pat(NAME # _S)>; - def : SVE_2_Op_Pat(NAME # _S)>; - def : SVE_2_Op_Pat(NAME # _D)>; + def : SVE_2_Op_Pat(NAME # _H)>; + def : SVE_2_Op_Pat(NAME # _S)>; + def : SVE_2_Op_Pat(NAME # _S)>; + def : SVE_2_Op_Pat(NAME # _D)>; } //===----------------------------------------------------------------------===// @@ -5809,10 +5806,9 @@ def : SVE_3_Op_Pat(NAME # _S)>; def : SVE_3_Op_Pat(NAME # _D)>; - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; + def : SVE_3_Op_Pat(NAME # _H)>; + def : SVE_3_Op_Pat(NAME # _S)>; + def : SVE_3_Op_Pat(NAME # _D)>; } class sve2_int_perm_splice_cons sz8_64, string asm, @@ -7771,7 +7767,6 @@ def : SVE_2_Op_Pat(NAME)>; def : SVE_2_Op_Pat(NAME)>; def : SVE_2_Op_Pat(NAME)>; - def : SVE_2_Op_Pat(NAME)>; def : SVE_2_Op_Pat(NAME)>; def : SVE_2_Op_Pat(NAME)>; def : SVE_2_Op_Pat(NAME)>; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select-matmul-fp64.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select-matmul-fp64.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select-matmul-fp64.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select-matmul-fp64.ll @@ -49,7 +49,7 @@ ret %out } -define @trn1_bf16( %a, %b) nounwind { +define @trn1_bf16( %a, %b) nounwind #0 { ; CHECK-LABEL: trn1_bf16: ; CHECK-NEXT: trn1 z0.q, z0.q, z1.q ; CHECK-NEXT: ret @@ -125,7 +125,7 @@ ret %out } -define @trn2_bf16( %a, %b) nounwind { +define @trn2_bf16( %a, %b) nounwind #0 { ; CHECK-LABEL: trn2_bf16: ; CHECK-NEXT: trn2 z0.q, z0.q, z1.q ; CHECK-NEXT: ret @@ -201,7 +201,7 @@ ret %out } -define @uzp1_bf16( %a, %b) nounwind { +define @uzp1_bf16( %a, %b) nounwind #0 { ; CHECK-LABEL: uzp1_bf16: ; CHECK-NEXT: uzp1 z0.q, z0.q, z1.q ; CHECK-NEXT: ret @@ -277,7 +277,7 @@ ret %out } -define @uzp2_bf16( %a, %b) nounwind { +define @uzp2_bf16( %a, %b) nounwind #0 { ; CHECK-LABEL: uzp2_bf16: ; CHECK-NEXT: uzp2 z0.q, z0.q, z1.q ; CHECK-NEXT: ret @@ -353,7 +353,7 @@ ret %out } -define @zip1_bf16( %a, %b) nounwind { +define @zip1_bf16( %a, %b) nounwind #0 { ; CHECK-LABEL: zip1_bf16: ; CHECK-NEXT: zip1 z0.q, z0.q, z1.q ; CHECK-NEXT: ret @@ -429,7 +429,7 @@ ret %out } -define @zip2_bf16( %a, %b) nounwind { +define @zip2_bf16( %a, %b) nounwind #0 { ; CHECK-LABEL: zip2_bf16: ; CHECK-NEXT: zip2 z0.q, z0.q, z1.q ; CHECK-NEXT: ret @@ -510,3 +510,6 @@ declare @llvm.aarch64.sve.zip2q.nxv8f16(, ) declare @llvm.aarch64.sve.zip2q.nxv8i16(, ) declare @llvm.aarch64.sve.zip2q.nxv16i8(, ) + +; +bf16 is required for the bfloat version. +attributes #0 = { "target-features"="+sve,+fp64mm,+bf16" } diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll @@ -806,7 +806,7 @@ ret %res } -define @rev_bf16( %a) { +define @rev_bf16( %a) #0 { ; CHECK-LABEL: rev_bf16 ; CHECK: rev z0.h, z0.h ; CHECK-NEXT: ret @@ -882,7 +882,7 @@ ret %out } -define @splice_bf16( %pg, %a, %b) { +define @splice_bf16( %pg, %a, %b) #0 { ; CHECK-LABEL: splice_bf16: ; CHECK: splice z0.h, p0, z0.h, z1.h ; CHECK-NEXT: ret @@ -1195,7 +1195,7 @@ ret %out } -define @trn1_bf16( %a, %b) { +define @trn1_bf16( %a, %b) #0 { ; CHECK-LABEL: trn1_bf16: ; CHECK: trn1 z0.h, z0.h, z1.h ; CHECK-NEXT: ret @@ -1316,7 +1316,7 @@ ret %out } -define @trn2_bf16( %a, %b) { +define @trn2_bf16( %a, %b) #0 { ; CHECK-LABEL: trn2_bf16: ; CHECK: trn2 z0.h, z0.h, z1.h ; CHECK-NEXT: ret @@ -1437,7 +1437,7 @@ ret %out } -define @uzp1_bf16( %a, %b) { +define @uzp1_bf16( %a, %b) #0 { ; CHECK-LABEL: uzp1_bf16: ; CHECK: uzp1 z0.h, z0.h, z1.h ; CHECK-NEXT: ret @@ -1558,7 +1558,7 @@ ret %out } -define @uzp2_bf16( %a, %b) { +define @uzp2_bf16( %a, %b) #0 { ; CHECK-LABEL: uzp2_bf16: ; CHECK: uzp2 z0.h, z0.h, z1.h ; CHECK-NEXT: ret @@ -1679,7 +1679,7 @@ ret %out } -define @zip1_bf16( %a, %b) { +define @zip1_bf16( %a, %b) #0 { ; CHECK-LABEL: zip1_bf16: ; CHECK: zip1 z0.h, z0.h, z1.h ; CHECK-NEXT: ret @@ -1800,7 +1800,7 @@ ret %out } -define @zip2_bf16( %a, %b) { +define @zip2_bf16( %a, %b) #0 { ; CHECK-LABEL: zip2_bf16: ; CHECK: zip2 z0.h, z0.h, z1.h ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-sel.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-sel.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-sel.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-sel.ll @@ -57,7 +57,7 @@ ret %out } -define @sel_bf16( %pg, %a, %b) { +define @sel_bf16( %pg, %a, %b) #0 { ; CHECK-LABEL: sel_bf16: ; CHECK: sel z0.h, p0, z0.h, z1.h ; CHECK-NEXT: ret @@ -106,3 +106,6 @@ declare @llvm.aarch64.sve.sel.nxv8f16(, , ) declare @llvm.aarch64.sve.sel.nxv4f32(, , ) declare @llvm.aarch64.sve.sel.nxv2f64(, , ) + +; +bf16 is required for the bfloat version. +attributes #0 = { "target-features"="+sve,+bf16" }