diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -2000,25 +2000,27 @@ } // 2-element contiguous loads - defm : pred_load; - defm : pred_load; - defm : pred_load; - defm : pred_load; - defm : pred_load; - defm : pred_load; - defm : pred_load; - defm : pred_load; - defm : pred_load; - defm : pred_load; + defm : pred_load; + defm : pred_load; + defm : pred_load; + defm : pred_load; + defm : pred_load; + defm : pred_load; + defm : pred_load; + defm : pred_load; + defm : pred_load; + defm : pred_load; + defm : pred_load; // 4-element contiguous loads - defm : pred_load; - defm : pred_load; - defm : pred_load; - defm : pred_load; - defm : pred_load; - defm : pred_load; - defm : pred_load; + defm : pred_load; + defm : pred_load; + defm : pred_load; + defm : pred_load; + defm : pred_load; + defm : pred_load; + defm : pred_load; + defm : pred_load; // 8-element contiguous loads defm : pred_load; @@ -2045,20 +2047,22 @@ } // 2-element contiguous stores - defm : pred_store; - defm : pred_store; - defm : pred_store; - defm : pred_store; - defm : pred_store; - defm : pred_store; - defm : pred_store; + defm : pred_store; + defm : pred_store; + defm : pred_store; + defm : pred_store; + defm : pred_store; + defm : pred_store; + defm : pred_store; + defm : pred_store; // 4-element contiguous stores - defm : pred_store; - defm : pred_store; - defm : pred_store; - defm : pred_store; - defm : pred_store; + defm : pred_store; + defm : pred_store; + defm : pred_store; + defm : pred_store; + defm : pred_store; + defm : pred_store; // 8-element contiguous stores defm : pred_store; @@ -2099,23 +2103,25 @@ (RegImmInst ZPR:$val, (PTrue 31), GPR64:$base, (i64 0))>; } - defm : unpred_store< store, nxv16i8, ST1B, ST1B_IMM, PTRUE_B, am_sve_regreg_lsl0>; - defm : unpred_store< truncstorevi8, nxv8i16, ST1B_H, ST1B_H_IMM, PTRUE_H, am_sve_regreg_lsl0>; - defm : unpred_store< truncstorevi8, nxv4i32, ST1B_S, ST1B_S_IMM, PTRUE_S, am_sve_regreg_lsl0>; - defm : unpred_store< truncstorevi8, nxv2i64, ST1B_D, ST1B_D_IMM, PTRUE_D, am_sve_regreg_lsl0>; - defm : unpred_store< store, nxv8i16, ST1H, ST1H_IMM, PTRUE_H, am_sve_regreg_lsl1>; - defm : unpred_store; - defm : unpred_store; - defm : unpred_store< store, nxv4i32, ST1W, ST1W_IMM, PTRUE_S, am_sve_regreg_lsl2>; - defm : unpred_store; - defm : unpred_store< store, nxv2i64, ST1D, ST1D_IMM, PTRUE_D, am_sve_regreg_lsl3>; - defm : unpred_store< store, nxv8f16, ST1H, ST1H_IMM, PTRUE_H, am_sve_regreg_lsl1>; - defm : unpred_store< store, nxv8bf16, ST1H, ST1H_IMM, PTRUE_H, am_sve_regreg_lsl1>; - defm : unpred_store< store, nxv4f16, ST1H_S, ST1H_S_IMM, PTRUE_S, am_sve_regreg_lsl1>; - defm : unpred_store< store, nxv2f16, ST1H_D, ST1H_D_IMM, PTRUE_D, am_sve_regreg_lsl1>; - defm : unpred_store< store, nxv4f32, ST1W, ST1W_IMM, PTRUE_S, am_sve_regreg_lsl2>; - defm : unpred_store< store, nxv2f32, ST1W_D, ST1W_D_IMM, PTRUE_D, am_sve_regreg_lsl2>; - defm : unpred_store< store, nxv2f64, ST1D, ST1D_IMM, PTRUE_D, am_sve_regreg_lsl3>; + defm : unpred_store< store, nxv16i8, ST1B, ST1B_IMM, PTRUE_B, am_sve_regreg_lsl0>; + defm : unpred_store< truncstorevi8, nxv8i16, ST1B_H, ST1B_H_IMM, PTRUE_H, am_sve_regreg_lsl0>; + defm : unpred_store< truncstorevi8, nxv4i32, ST1B_S, ST1B_S_IMM, PTRUE_S, am_sve_regreg_lsl0>; + defm : unpred_store< truncstorevi8, nxv2i64, ST1B_D, ST1B_D_IMM, PTRUE_D, am_sve_regreg_lsl0>; + defm : unpred_store< store, nxv8i16, ST1H, ST1H_IMM, PTRUE_H, am_sve_regreg_lsl1>; + defm : unpred_store; + defm : unpred_store; + defm : unpred_store< store, nxv4i32, ST1W, ST1W_IMM, PTRUE_S, am_sve_regreg_lsl2>; + defm : unpred_store; + defm : unpred_store< store, nxv2i64, ST1D, ST1D_IMM, PTRUE_D, am_sve_regreg_lsl3>; + defm : unpred_store< store, nxv8f16, ST1H, ST1H_IMM, PTRUE_H, am_sve_regreg_lsl1>; + defm : unpred_store< store, nxv8bf16, ST1H, ST1H_IMM, PTRUE_H, am_sve_regreg_lsl1>; + defm : unpred_store< store, nxv4f16, ST1H_S, ST1H_S_IMM, PTRUE_S, am_sve_regreg_lsl1>; + defm : unpred_store< store, nxv4bf16, ST1H_S, ST1H_S_IMM, PTRUE_S, am_sve_regreg_lsl1>; + defm : unpred_store< store, nxv2f16, ST1H_D, ST1H_D_IMM, PTRUE_D, am_sve_regreg_lsl1>; + defm : unpred_store< store, nxv2bf16, ST1H_D, ST1H_D_IMM, PTRUE_D, am_sve_regreg_lsl1>; + defm : unpred_store< store, nxv4f32, ST1W, ST1W_IMM, PTRUE_S, am_sve_regreg_lsl2>; + defm : unpred_store< store, nxv2f32, ST1W_D, ST1W_D_IMM, PTRUE_D, am_sve_regreg_lsl2>; + defm : unpred_store< store, nxv2f64, ST1D, ST1D_IMM, PTRUE_D, am_sve_regreg_lsl3>; multiclass unpred_load; defm : unpred_load< load, nxv8bf16, LD1H, LD1H_IMM, PTRUE_H, am_sve_regreg_lsl1>; defm : unpred_load< load, nxv4f16, LD1H_S, LD1H_S_IMM, PTRUE_S, am_sve_regreg_lsl1>; + defm : unpred_load< load, nxv4bf16, LD1H_S, LD1H_S_IMM, PTRUE_S, am_sve_regreg_lsl1>; defm : unpred_load< load, nxv2f16, LD1H_D, LD1H_D_IMM, PTRUE_D, am_sve_regreg_lsl1>; + defm : unpred_load< load, nxv2bf16, LD1H_D, LD1H_D_IMM, PTRUE_D, am_sve_regreg_lsl1>; defm : unpred_load< load, nxv4f32, LD1W, LD1W_IMM, PTRUE_S, am_sve_regreg_lsl2>; defm : unpred_load< load, nxv2f32, LD1W_D, LD1W_D_IMM, PTRUE_D, am_sve_regreg_lsl2>; defm : unpred_load< load, nxv2f64, LD1D, LD1D_IMM, PTRUE_D, am_sve_regreg_lsl3>; diff --git a/llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-reg.ll b/llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-reg.ll --- a/llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-reg.ll +++ b/llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-reg.ll @@ -231,6 +231,18 @@ ret %val } +define @ld1_nxv4bf16(bfloat* %addr, i64 %off) { +; CHECK-LABEL: ld1_nxv4bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, x1, lsl #1] +; CHECK-NEXT: ret + %ptr = getelementptr inbounds bfloat, bfloat* %addr, i64 %off + %ptrcast = bitcast bfloat* %ptr to * + %val = load volatile , * %ptrcast + ret %val +} + define @ld1_nxv2f16(half* %addr, i64 %off) { ; CHECK-LABEL: ld1_nxv2f16: ; CHECK: // %bb.0: @@ -243,6 +255,18 @@ ret %val } +define @ld1_nxv2bf16(bfloat* %addr, i64 %off) { +; CHECK-LABEL: ld1_nxv2bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, x1, lsl #1] +; CHECK-NEXT: ret + %ptr = getelementptr inbounds bfloat, bfloat* %addr, i64 %off + %ptrcast = bitcast bfloat* %ptr to * + %val = load volatile , * %ptrcast + ret %val +} + ; LD1W define @ld1_nxv4i32(i32* %addr, i64 %off) { diff --git a/llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll b/llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll --- a/llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll +++ b/llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll @@ -60,6 +60,14 @@ ret %load } +define @masked_load_nxv2bf16( *%a, %mask) nounwind #0 { +; CHECK-LABEL: masked_load_nxv2bf16: +; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.masked.load.nxv2bf16( *%a, i32 2, %mask, undef) + ret %load +} + define @masked_load_nxv4f32( *%a, %mask) nounwind { ; CHECK-LABEL: masked_load_nxv4f32: ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] @@ -76,6 +84,14 @@ ret %load } +define @masked_load_nxv4bf16( *%a, %mask) nounwind #0 { +; CHECK-LABEL: masked_load_nxv4bf16: +; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.masked.load.nxv4bf16( *%a, i32 2, %mask, undef) + ret %load +} + define @masked_load_nxv8f16( *%a, %mask) nounwind { ; CHECK-LABEL: masked_load_nxv8f16: ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] @@ -185,6 +201,22 @@ ret void } +define void @masked_store_nxv2bf16( *%a, %val, %mask) nounwind #0 { +; CHECK-LABEL: masked_store_nxv2bf16: +; CHECK-NEXT: st1h { z0.d }, p0, [x0] +; CHECK-NEXT: ret + call void @llvm.masked.store.nxv2bf16( %val, *%a, i32 2, %mask) + ret void +} + +define void @masked_store_nxv4bf16( *%a, %val, %mask) nounwind #0 { +; CHECK-LABEL: masked_store_nxv4bf16: +; CHECK-NEXT: st1h { z0.s }, p0, [x0] +; CHECK-NEXT: ret + call void @llvm.masked.store.nxv4bf16( %val, *%a, i32 2, %mask) + ret void +} + define void @masked_store_nxv8bf16( *%a, %val, %mask) nounwind #0 { ; CHECK-LABEL: masked_store_nxv8bf16: ; CHECK-NEXT: st1h { z0.h }, p0, [x0] @@ -292,6 +324,8 @@ declare @llvm.masked.load.nxv4f32(*, i32, , ) declare @llvm.masked.load.nxv4f16(*, i32, , ) declare @llvm.masked.load.nxv8f16(*, i32, , ) +declare @llvm.masked.load.nxv2bf16(*, i32, , ) +declare @llvm.masked.load.nxv4bf16(*, i32, , ) declare @llvm.masked.load.nxv8bf16(*, i32, , ) declare void @llvm.masked.store.nxv2i64(, *, i32, ) @@ -305,6 +339,8 @@ declare void @llvm.masked.store.nxv4f32(, *, i32, ) declare void @llvm.masked.store.nxv4f16(, *, i32, ) declare void @llvm.masked.store.nxv8f16(, *, i32, ) +declare void @llvm.masked.store.nxv2bf16(, *, i32, ) +declare void @llvm.masked.store.nxv4bf16(, *, i32, ) declare void @llvm.masked.store.nxv8bf16(, *, i32, ) declare @llvm.masked.load.nxv2p0i8.p0nxv2p0i8(*, i32 immarg, , ) diff --git a/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-reg.ll b/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-reg.ll --- a/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-reg.ll +++ b/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-reg.ll @@ -166,6 +166,18 @@ ret void } +define void @st1_nxv4bf16(bfloat* %addr, i64 %off, %val) { +; CHECK-LABEL: st1_nxv4bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: st1h { z0.s }, p0, [x0, x1, lsl #1] +; CHECK-NEXT: ret + %ptr = getelementptr inbounds bfloat, bfloat* %addr, i64 %off + %ptrcast = bitcast bfloat* %ptr to * + store %val, * %ptrcast + ret void +} + define void @st1_nxv2f16(half* %addr, i64 %off, %val) { ; CHECK-LABEL: st1_nxv2f16: ; CHECK: // %bb.0: @@ -178,6 +190,18 @@ ret void } +define void @st1_nxv2bf16(bfloat* %addr, i64 %off, %val) { +; CHECK-LABEL: st1_nxv2bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: st1h { z0.d }, p0, [x0, x1, lsl #1] +; CHECK-NEXT: ret + %ptr = getelementptr inbounds bfloat, bfloat* %addr, i64 %off + %ptrcast = bitcast bfloat* %ptr to * + store %val, * %ptrcast + ret void +} + ; ST1W define void @st1_nxv4i32(i32* %addr, i64 %off, %val) {