diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1831,6 +1831,24 @@ case AArch64::ST1H_IMM: case AArch64::ST1W_IMM: case AArch64::ST1D_IMM: + case AArch64::LD1B_H_IMM: + case AArch64::LD1SB_H_IMM: + case AArch64::LD1H_S_IMM: + case AArch64::LD1SH_S_IMM: + case AArch64::LD1W_D_IMM: + case AArch64::LD1SW_D_IMM: + case AArch64::ST1B_H_IMM: + case AArch64::ST1H_S_IMM: + case AArch64::ST1W_D_IMM: + case AArch64::LD1B_S_IMM: + case AArch64::LD1SB_S_IMM: + case AArch64::LD1H_D_IMM: + case AArch64::LD1SH_D_IMM: + case AArch64::ST1B_S_IMM: + case AArch64::ST1H_D_IMM: + case AArch64::LD1B_D_IMM: + case AArch64::LD1SB_D_IMM: + case AArch64::ST1B_D_IMM: return 3; case AArch64::ADDG: case AArch64::STGOffset: @@ -2289,6 +2307,45 @@ MinOffset = -8; MaxOffset = 7; break; + case AArch64::LD1B_H_IMM: + case AArch64::LD1SB_H_IMM: + case AArch64::LD1H_S_IMM: + case AArch64::LD1SH_S_IMM: + case AArch64::LD1W_D_IMM: + case AArch64::LD1SW_D_IMM: + case AArch64::ST1B_H_IMM: + case AArch64::ST1H_S_IMM: + case AArch64::ST1W_D_IMM: + // A half vector worth of data + // Width = mbytes * elements + Scale = TypeSize::Scalable(8); + Width = SVEMaxBytesPerVector / 2; + MinOffset = -8; + MaxOffset = 7; + break; + case AArch64::LD1B_S_IMM: + case AArch64::LD1SB_S_IMM: + case AArch64::LD1H_D_IMM: + case AArch64::LD1SH_D_IMM: + case AArch64::ST1B_S_IMM: + case AArch64::ST1H_D_IMM: + // A quarter vector worth of data + // Width = mbytes * elements + Scale = TypeSize::Scalable(4); + Width = SVEMaxBytesPerVector / 4; + MinOffset = -8; + MaxOffset = 7; + break; + case AArch64::LD1B_D_IMM: + case AArch64::LD1SB_D_IMM: + case AArch64::ST1B_D_IMM: + // A eighth vector worth of data + // Width = mbytes * elements + Scale = TypeSize::Scalable(2); + Width = SVEMaxBytesPerVector / 8; + MinOffset = -8; + MaxOffset = 7; + break; case AArch64::ST2GOffset: case AArch64::STZ2GOffset: Scale = TypeSize::Fixed(16); diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1560,37 +1560,69 @@ defm : pred_store; defm : pred_store; - multiclass unpred_store { - def _fi : Pat<(store (Ty ZPR:$val), (am_sve_fi GPR64sp:$base, simm4s1:$offset)), - (RegImmInst ZPR:$val, (PTrue 31), GPR64sp:$base, simm4s1:$offset)>; + multiclass unpred_store { + def : Pat<(Store (Ty ZPR:$val), (am_sve_fi GPR64sp:$base, simm4s1:$offset)), + (RegImmInst ZPR:$val, (PTrue 31), GPR64sp:$base, simm4s1:$offset)>; - def _default : Pat<(store (Ty ZPR:$val), GPR64:$base), - (RegImmInst ZPR:$val, (PTrue 31), GPR64:$base, (i64 0))>; + def : Pat<(Store (Ty ZPR:$val), GPR64:$base), + (RegImmInst ZPR:$val, (PTrue 31), GPR64:$base, (i64 0))>; } - defm Pat_ST1B : unpred_store; - defm Pat_ST1H : unpred_store; - defm Pat_ST1W : unpred_store; - defm Pat_ST1D : unpred_store; - defm Pat_ST1H_float16: unpred_store; - defm Pat_ST1W_float : unpred_store; - defm Pat_ST1D_double : unpred_store; - - multiclass unpred_load { - def _fi : Pat<(Ty (load (am_sve_fi GPR64sp:$base, simm4s1:$offset))), - (RegImmInst (PTrue 31), GPR64sp:$base, simm4s1:$offset)>; - - def _default : Pat<(Ty (load GPR64:$base)), - (RegImmInst (PTrue 31), GPR64:$base, (i64 0))>; + defm : unpred_store< store, nxv16i8, ST1B_IMM, PTRUE_B>; + defm : unpred_store< truncstorevi8, nxv8i16, ST1B_H_IMM, PTRUE_H>; + defm : unpred_store< truncstorevi8, nxv4i32, ST1B_S_IMM, PTRUE_S>; + defm : unpred_store< truncstorevi8, nxv2i64, ST1B_D_IMM, PTRUE_D>; + defm : unpred_store< store, nxv8i16, ST1H_IMM, PTRUE_H>; + defm : unpred_store; + defm : unpred_store; + defm : unpred_store< store, nxv4i32, ST1W_IMM, PTRUE_S>; + defm : unpred_store; + defm : unpred_store< store, nxv2i64, ST1D_IMM, PTRUE_D>; + defm : unpred_store< store, nxv8f16, ST1H_IMM, PTRUE_H>; + defm : unpred_store< store, nxv4f16, ST1H_S_IMM, PTRUE_S>; + defm : unpred_store< store, nxv2f16, ST1H_D_IMM, PTRUE_D>; + defm : unpred_store< store, nxv4f32, ST1W_IMM, PTRUE_S>; + defm : unpred_store< store, nxv4f32, ST1W_D_IMM, PTRUE_D>; + defm : unpred_store< store, nxv2f64, ST1D_IMM, PTRUE_D>; + + multiclass unpred_load { + def : Pat<(Ty (Load (am_sve_fi GPR64sp:$base, simm4s1:$offset))), + (RegImmInst (PTrue 31), GPR64sp:$base, simm4s1:$offset)>; + + def : Pat<(Ty (Load GPR64:$base)), + (RegImmInst (PTrue 31), GPR64:$base, (i64 0))>; } - defm Pat_LD1B : unpred_load; - defm Pat_LD1H : unpred_load; - defm Pat_LD1W : unpred_load; - defm Pat_LD1D : unpred_load; - defm Pat_LD1H_float16: unpred_load; - defm Pat_LD1W_float : unpred_load; - defm Pat_LD1D_double : unpred_load; + defm : unpred_load< load, nxv16i8, LD1B_IMM, PTRUE_B>; + defm : unpred_load< zextloadvi8, nxv8i16, LD1B_H_IMM, PTRUE_H>; + defm : unpred_load< zextloadvi8, nxv4i32, LD1B_S_IMM, PTRUE_S>; + defm : unpred_load< zextloadvi8, nxv2i64, LD1B_D_IMM, PTRUE_D>; + defm : unpred_load< extloadvi8, nxv8i16, LD1B_H_IMM, PTRUE_H>; + defm : unpred_load< extloadvi8, nxv4i32, LD1B_S_IMM, PTRUE_S>; + defm : unpred_load< extloadvi8, nxv2i64, LD1B_D_IMM, PTRUE_D>; + defm : unpred_load< sextloadvi8, nxv8i16, LD1SB_H_IMM, PTRUE_H>; + defm : unpred_load< sextloadvi8, nxv4i32, LD1SB_S_IMM, PTRUE_S>; + defm : unpred_load< sextloadvi8, nxv2i64, LD1SB_D_IMM, PTRUE_D>; + defm : unpred_load< load, nxv8i16, LD1H_IMM, PTRUE_H>; + defm : unpred_load; + defm : unpred_load; + defm : unpred_load< extloadvi16, nxv4i32, LD1H_S_IMM, PTRUE_S>; + defm : unpred_load< extloadvi16, nxv2i64, LD1H_D_IMM, PTRUE_D>; + defm : unpred_load; + defm : unpred_load; + defm : unpred_load< load, nxv4i32, LD1W_IMM, PTRUE_S>; + defm : unpred_load; + defm : unpred_load< extloadvi32, nxv2i64, LD1W_D_IMM, PTRUE_D>; + defm : unpred_load; + defm : unpred_load< load, nxv2i64, LD1D_IMM, PTRUE_D>; + defm : unpred_load< load, nxv8f16, LD1H_IMM, PTRUE_H>; + defm : unpred_load< load, nxv4f16, LD1H_S_IMM, PTRUE_S>; + defm : unpred_load< load, nxv2f16, LD1H_D_IMM, PTRUE_D>; + defm : unpred_load< load, nxv4f32, LD1W_IMM, PTRUE_S>; + defm : unpred_load< load, nxv2f32, LD1W_D_IMM, PTRUE_D>; + defm : unpred_load< load, nxv2f64, LD1D_IMM, PTRUE_D>; multiclass unpred_store_predicate { def _fi : Pat<(store (Ty PPR:$val), (am_sve_fi GPR64sp:$base, simm9:$offset)), diff --git a/llvm/test/CodeGen/AArch64/spillfill-sve.ll b/llvm/test/CodeGen/AArch64/spillfill-sve.ll --- a/llvm/test/CodeGen/AArch64/spillfill-sve.ll +++ b/llvm/test/CodeGen/AArch64/spillfill-sve.ll @@ -16,6 +16,81 @@ ret void } +define void @fill_nxv8i8() { +; CHECK-LABEL: fill_nxv8i8 +; CHECK-DAG: ld1b { z{{[01]}}.h }, p0/z, [sp] +; CHECK-DAG: ld1b { z{{[01]}}.h }, p0/z, [sp, #1, mul vl] + %local0 = alloca + %local1 = alloca + load volatile , * %local0 + load volatile , * %local1 + ret void +} + +define @fill_signed_nxv8i8() { +; CHECK-LABEL: fill_signed_nxv8i8 +; CHECK-DAG: ld1sb { z{{[01]}}.h }, p0/z, [sp] +; CHECK-DAG: ld1sb { z{{[01]}}.h }, p0/z, [sp, #1, mul vl] + %local0 = alloca + %local1 = alloca + %a = load volatile , * %local0 + %a_ext = sext %a to + %b = load volatile , * %local1 + %b_ext = sext %b to + %sum = add %a_ext, %b_ext + ret %sum +} + +define void @fill_nxv4i8() { +; CHECK-LABEL: fill_nxv4i8 +; CHECK-DAG: ld1b { z{{[01]}}.s }, p0/z, [sp, #3, mul vl] +; CHECK-DAG: ld1b { z{{[01]}}.s }, p0/z, [sp, #2, mul vl] + %local0 = alloca + %local1 = alloca + load volatile , * %local0 + load volatile , * %local1 + ret void +} + +define @fill_signed_nxv4i8() { +; CHECK-LABEL: fill_signed_nxv4i8 +; CHECK-DAG: ld1sb { z{{[01]}}.s }, p0/z, [sp, #3, mul vl] +; CHECK-DAG: ld1sb { z{{[01]}}.s }, p0/z, [sp, #2, mul vl] + %local0 = alloca + %local1 = alloca + %a = load volatile , * %local0 + %a_ext = sext %a to + %b = load volatile , * %local1 + %b_ext = sext %b to + %sum = add %a_ext, %b_ext + ret %sum +} + +define void @fill_nxv2i8() { +; CHECK-LABEL: fill_nxv2i8 +; CHECK-DAG: ld1b { z{{[01]}}.d }, p0/z, [sp, #7, mul vl] +; CHECK-DAG: ld1b { z{{[01]}}.d }, p0/z, [sp, #6, mul vl] + %local0 = alloca + %local1 = alloca + load volatile , * %local0 + load volatile , * %local1 + ret void +} + +define @fill_signed_nxv2i8() { +; CHECK-LABEL: fill_signed_nxv2i8 +; CHECK-DAG: ld1sb { z{{[01]}}.d }, p0/z, [sp, #7, mul vl] +; CHECK-DAG: ld1sb { z{{[01]}}.d }, p0/z, [sp, #6, mul vl] + %local0 = alloca + %local1 = alloca + %a = load volatile , * %local0 + %a_ext = sext %a to + %b = load volatile , * %local1 + %b_ext = sext %b to + %sum = add %a_ext, %b_ext + ret %sum +} + define void @fill_nxv8i16() { ; CHECK-LABEL: fill_nxv8i16 ; CHECK-DAG: ld1h { z{{[01]}}.h }, p0/z, [sp] @@ -27,6 +102,56 @@ ret void } +define void @fill_nxv4i16() { +; CHECK-LABEL: fill_nxv4i16 +; CHECK-DAG: ld1h { z{{[01]}}.s }, p0/z, [sp] +; CHECK-DAG: ld1h { z{{[01]}}.s }, p0/z, [sp, #1, mul vl] + %local0 = alloca + %local1 = alloca + load volatile , * %local0 + load volatile , * %local1 + ret void +} + +define @fill_signed_nxv4i16() { +; CHECK-LABEL: fill_signed_nxv4i16 +; CHECK-DAG: ld1sh { z{{[01]}}.s }, p0/z, [sp] +; CHECK-DAG: ld1sh { z{{[01]}}.s }, p0/z, [sp, #1, mul vl] + %local0 = alloca + %local1 = alloca + %a = load volatile , * %local0 + %a_ext = sext %a to + %b = load volatile , * %local1 + %b_ext = sext %b to + %sum = add %a_ext, %b_ext + ret %sum +} + +define void @fill_nxv2i16() { +; CHECK-LABEL: fill_nxv2i16 +; CHECK-DAG: ld1h { z{{[01]}}.d }, p0/z, [sp, #3, mul vl] +; CHECK-DAG: ld1h { z{{[01]}}.d }, p0/z, [sp, #2, mul vl] + %local0 = alloca + %local1 = alloca + load volatile , * %local0 + load volatile , * %local1 + ret void +} + +define @fill_signed_nxv2i16() { +; CHECK-LABEL: fill_signed_nxv2i16 +; CHECK-DAG: ld1sh { z{{[01]}}.d }, p0/z, [sp, #3, mul vl] +; CHECK-DAG: ld1sh { z{{[01]}}.d }, p0/z, [sp, #2, mul vl] + %local0 = alloca + %local1 = alloca + %a = load volatile , * %local0 + %a_ext = sext %a to + %b = load volatile , * %local1 + %b_ext = sext %b to + %sum = add %a_ext, %b_ext + ret %sum +} + define void @fill_nxv4i32() { ; CHECK-LABEL: fill_nxv4i32 ; CHECK-DAG: ld1w { z{{[01]}}.s }, p0/z, [sp] @@ -38,6 +163,31 @@ ret void } +define void @fill_nxv2i32() { +; CHECK-LABEL: fill_nxv2i32 +; CHECK-DAG: ld1w { z{{[01]}}.d }, p0/z, [sp] +; CHECK-DAG: ld1w { z{{[01]}}.d }, p0/z, [sp, #1, mul vl] + %local0 = alloca + %local1 = alloca + load volatile , * %local0 + load volatile , * %local1 + ret void +} + +define @fill_signed_nxv2i32() { +; CHECK-LABEL: fill_signed_nxv2i32 +; CHECK-DAG: ld1sw { z{{[01]}}.d }, p0/z, [sp] +; CHECK-DAG: ld1sw { z{{[01]}}.d }, p0/z, [sp, #1, mul vl] + %local0 = alloca + %local1 = alloca + %a = load volatile , * %local0 + %a_ext = sext %a to + %b = load volatile , * %local1 + %b_ext = sext %b to + %sum = add %a_ext, %b_ext + ret %sum +} + define void @fill_nxv2i64() { ; CHECK-LABEL: fill_nxv2i64 ; CHECK-DAG: ld1d { z{{[01]}}.d }, p0/z, [sp] @@ -63,6 +213,39 @@ ret void } +define void @spill_nxv8i8( %v0, %v1) { +; CHECK-LABEL: spill_nxv8i8 +; CHECK-DAG: st1b { z{{[01]}}.h }, p0, [sp] +; CHECK-DAG: st1b { z{{[01]}}.h }, p0, [sp, #1, mul vl] + %local0 = alloca + %local1 = alloca + store volatile %v0, * %local0 + store volatile %v1, * %local1 + ret void +} + +define void @spill_nxv4i8( %v0, %v1) { +; CHECK-LABEL: spill_nxv4i8 +; CHECK-DAG: st1b { z{{[01]}}.s }, p0, [sp, #3, mul vl] +; CHECK-DAG: st1b { z{{[01]}}.s }, p0, [sp, #2, mul vl] + %local0 = alloca + %local1 = alloca + store volatile %v0, * %local0 + store volatile %v1, * %local1 + ret void +} + +define void @spill_nxv2i8( %v0, %v1) { +; CHECK-LABEL: spill_nxv2i8 +; CHECK-DAG: st1b { z{{[01]}}.d }, p0, [sp, #7, mul vl] +; CHECK-DAG: st1b { z{{[01]}}.d }, p0, [sp, #6, mul vl] + %local0 = alloca + %local1 = alloca + store volatile %v0, * %local0 + store volatile %v1, * %local1 + ret void +} + define void @spill_nxv8i16( %v0, %v1) { ; CHECK-LABEL: spill_nxv8i16 ; CHECK-DAG: st1h { z{{[01]}}.h }, p0, [sp] @@ -74,6 +257,28 @@ ret void } +define void @spill_nxv4i16( %v0, %v1) { +; CHECK-LABEL: spill_nxv4i16 +; CHECK-DAG: st1h { z{{[01]}}.s }, p0, [sp] +; CHECK-DAG: st1h { z{{[01]}}.s }, p0, [sp, #1, mul vl] + %local0 = alloca + %local1 = alloca + store volatile %v0, * %local0 + store volatile %v1, * %local1 + ret void +} + +define void @spill_nxv2i16( %v0, %v1) { +; CHECK-LABEL: spill_nxv2i16 +; CHECK-DAG: st1h { z{{[01]}}.d }, p0, [sp, #3, mul vl] +; CHECK-DAG: st1h { z{{[01]}}.d }, p0, [sp, #2, mul vl] + %local0 = alloca + %local1 = alloca + store volatile %v0, * %local0 + store volatile %v1, * %local1 + ret void +} + define void @spill_nxv4i32( %v0, %v1) { ; CHECK-LABEL: spill_nxv4i32 ; CHECK-DAG: st1w { z{{[01]}}.s }, p0, [sp] @@ -85,6 +290,17 @@ ret void } +define void @spill_nxv2i32( %v0, %v1) { +; CHECK-LABEL: spill_nxv2i32 +; CHECK-DAG: st1w { z{{[01]}}.d }, p0, [sp] +; CHECK-DAG: st1w { z{{[01]}}.d }, p0, [sp, #1, mul vl] + %local0 = alloca + %local1 = alloca + store volatile %v0, * %local0 + store volatile %v1, * %local1 + ret void +} + define void @spill_nxv2i64( %v0, %v1) { ; CHECK-LABEL: spill_nxv2i64 ; CHECK-DAG: st1d { z{{[01]}}.d }, p0, [sp]