diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -12074,6 +12074,11 @@ EVT VT = N->getValueType(0); EVT PtrTy = N->getOperand(3).getValueType(); + if (VT == MVT::nxv8bf16) + assert( + static_cast(DAG.getSubtarget()).hasBF16() && + "Unsupported type (BF16)"); + EVT LoadVT = VT; if (VT.isFloatingPoint()) LoadVT = VT.changeTypeToInteger(); diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1550,7 +1550,10 @@ defm : pred_load; defm : pred_load; defm : pred_load; - defm : pred_load; + + let Predicates = [HasBF16, HasSVE] in { + defm : pred_load; + } // 16-element contiguous loads defm : pred_load; @@ -1737,7 +1740,10 @@ defm : ld1; defm : ld1; defm : ld1; - defm : ld1; + + let Predicates = [HasBF16, HasSVE] in { + defm : ld1; + } // 16-element contiguous loads defm : ld1; @@ -1777,7 +1783,10 @@ defm : ldnf1; defm : ldnf1; defm : ldnf1; - defm : ldnf1; + + let Predicates = [HasBF16, HasSVE] in { + defm : ldnf1; + } // 16-element contiguous non-faulting loads defm : ldnf1; @@ -1818,7 +1827,10 @@ defm : ldff1; defm : ldff1; defm : ldff1; - defm : ldff1; + + let Predicates = [HasBF16, HasSVE] in { + defm : ldff1; + } // 16-element contiguous first faulting loads defm : ldff1; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1-addressing-mode-reg-imm.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1-addressing-mode-reg-imm.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1-addressing-mode-reg-imm.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1-addressing-mode-reg-imm.ll @@ -207,7 +207,7 @@ ret %load } -define @ld1h_bf16_inbound( %pg, bfloat* %a) { +define @ld1h_bf16_inbound( %pg, bfloat* %a) #0 { ; CHECK-LABEL: ld1h_bf16_inbound: ; CHECK: ld1h { z0.h }, p0/z, [x0, #1, mul vl] ; CHECK-NEXT: ret @@ -311,3 +311,6 @@ declare @llvm.aarch64.sve.ld1.nxv2i32(, i32*) declare @llvm.aarch64.sve.ld1.nxv2i64(, i64*) declare @llvm.aarch64.sve.ld1.nxv2f64(, double*) + +; +bf16 is required for the bfloat version. +attributes #0 = { "target-features"="+sve,+bf16" } diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1-addressing-mode-reg-reg.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1-addressing-mode-reg-reg.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1-addressing-mode-reg-reg.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1-addressing-mode-reg-reg.ll @@ -95,7 +95,7 @@ ret %load } -define @ld1h_bf16( %pg, bfloat* %a, i64 %index) { +define @ld1h_bf16( %pg, bfloat* %a, i64 %index) #0 { ; CHECK-LABEL: ld1h_bf16 ; CHECK: ld1h { z0.h }, p0/z, [x0, x1, lsl #1] ; CHECK-NEXT: ret @@ -225,3 +225,6 @@ declare @llvm.aarch64.sve.ld1.nxv2i32(, i32*) declare @llvm.aarch64.sve.ld1.nxv2i64(, i64*) declare @llvm.aarch64.sve.ld1.nxv2f64(, double*) + +; +bf16 is required for the bfloat version. +attributes #0 = { "target-features"="+sve,+bf16" } diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1.ll @@ -87,7 +87,7 @@ ret %res } -define @ld1h_bf16( %pred, bfloat* %addr) { +define @ld1h_bf16( %pred, bfloat* %addr) #0 { ; CHECK-LABEL: ld1h_bf16: ; CHECK: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: ret @@ -208,3 +208,6 @@ declare @llvm.aarch64.sve.ld1.nxv2i32(, i32*) declare @llvm.aarch64.sve.ld1.nxv2i64(, i64*) declare @llvm.aarch64.sve.ld1.nxv2f64(, double*) + +; +bf16 is required for the bfloat version. +attributes #0 = { "target-features"="+sve,+bf16" } diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-ff.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-ff.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-ff.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-ff.ll @@ -206,7 +206,7 @@ ret %load } -define @ldff1h_bf16( %pg, bfloat* %a) { +define @ldff1h_bf16( %pg, bfloat* %a) #0 { ; CHECK-LABEL: ldff1h_bf16: ; CHECK: ldff1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: ret @@ -223,7 +223,7 @@ ret %load } -define @ldff1h_bf16_reg( %pg, bfloat* %a, i64 %offset) { +define @ldff1h_bf16_reg( %pg, bfloat* %a, i64 %offset) #0 { ; CHECK-LABEL: ldff1h_bf16_reg: ; CHECK: ldff1h { z0.h }, p0/z, [x0, x1, lsl #1] ; CHECK-NEXT: ret @@ -428,3 +428,6 @@ declare @llvm.aarch64.sve.ldff1.nxv2i32(, i32*) declare @llvm.aarch64.sve.ldff1.nxv2i64(, i64*) declare @llvm.aarch64.sve.ldff1.nxv2f64(, double*) + +; +bf16 is required for the bfloat version. +attributes #0 = { "target-features"="+sve,+bf16" } diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll @@ -140,7 +140,7 @@ ret %load } -define @ldnf1h_bf16( %pg, bfloat* %a) { +define @ldnf1h_bf16( %pg, bfloat* %a) #0 { ; CHECK-LABEL: ldnf1h_bf16: ; CHECK: ldnf1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: ret @@ -159,7 +159,7 @@ ret %load } -define @ldnf1h_bf16_inbound( %pg, bfloat* %a) { +define @ldnf1h_bf16_inbound( %pg, bfloat* %a) #0 { ; CHECK-LABEL: ldnf1h_bf16_inbound: ; CHECK: ldnf1h { z0.h }, p0/z, [x0, #1, mul vl] ; CHECK-NEXT: ret @@ -473,3 +473,6 @@ declare @llvm.aarch64.sve.ldnf1.nxv2i32(, i32*) declare @llvm.aarch64.sve.ldnf1.nxv2i64(, i64*) declare @llvm.aarch64.sve.ldnf1.nxv2f64(, double*) + +; +bf16 is required for the bfloat version. +attributes #0 = { "target-features"="+sve,+bf16" } diff --git a/llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll b/llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll --- a/llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll +++ b/llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll @@ -87,7 +87,7 @@ ret %load } -define @masked_load_nxv8bf16( *%a, %mask) nounwind { +define @masked_load_nxv8bf16( *%a, %mask) nounwind #0 { ; CHECK-LABEL: masked_load_nxv8bf16: ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: ret @@ -203,3 +203,6 @@ declare void @llvm.masked.store.nxv4f32(, *, i32, ) declare void @llvm.masked.store.nxv4f16(, *, i32, ) declare void @llvm.masked.store.nxv8f16(, *, i32, ) + +; +bf16 is required for the bfloat version. +attributes #0 = { "target-features"="+sve,+bf16" }