diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1917,6 +1917,11 @@ defm : unpred_store< store, nxv2f32, ST1W_D, ST1W_D_IMM, PTRUE_D, am_sve_regreg_lsl2>; defm : unpred_store< store, nxv2f64, ST1D, ST1D_IMM, PTRUE_D, am_sve_regreg_lsl3>; + let Predicates = [IsLE] in { + def : Pat<(store (nxv8i16 ZPR:$val), (am_sve_regreg_lsl0 GPR64sp:$base, GPR64:$offset)), + (ST1B ZPR:$val, (PTRUE_B 31), GPR64sp:$base, GPR64:$offset)>; + } + multiclass unpred_load { @@ -1967,6 +1972,11 @@ defm : unpred_load< load, nxv2f32, LD1W_D, LD1W_D_IMM, PTRUE_D, am_sve_regreg_lsl2>; defm : unpred_load< load, nxv2f64, LD1D, LD1D_IMM, PTRUE_D, am_sve_regreg_lsl3>; + let Predicates = [IsLE] in { + def : Pat<(nxv8i16 (load (am_sve_regreg_lsl0 GPR64sp:$base, GPR64:$offset))), + (LD1B (PTRUE_B 31), GPR64sp:$base, GPR64:$offset)>; + } + multiclass unpred_store_predicate { def _fi : Pat<(store (Ty PPR:$val), (am_sve_fi GPR64sp:$base, simm9:$offset)), (Store PPR:$val, GPR64sp:$base, simm9:$offset)>; diff --git a/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll b/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll --- a/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll +++ b/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll @@ -729,11 +729,12 @@ ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: st1h { z0.h }, p0, [sp] ; CHECK-NEXT: st1h { z1.h }, p0, [x8, #1, mul vl] ; CHECK-NEXT: addvl x8, x8, #1 -; CHECK-NEXT: sub x8, x8, #16 // =16 -; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8] +; CHECK-NEXT: mov x9, #-16 +; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -748,11 +749,12 @@ ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: st1h { z0.h }, p0, [sp] ; CHECK-NEXT: st1h { z1.h }, p0, [x8, #1, mul vl] ; CHECK-NEXT: addvl x8, x8, #1 -; CHECK-NEXT: sub x8, x8, #2 // =2 -; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8] +; CHECK-NEXT: mov x9, #-2 +; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -1152,11 +1154,12 @@ ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: mov z1.h, p1/z, #1 // =0x1 ; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: st1h { z0.h }, p0, [sp] ; CHECK-NEXT: st1h { z1.h }, p0, [x8, #1, mul vl] ; CHECK-NEXT: addvl x8, x8, #1 -; CHECK-NEXT: sub x8, x8, #2 // =2 -; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8] +; CHECK-NEXT: mov x9, #-2 +; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9] ; CHECK-NEXT: and z0.h, z0.h, #0x1 ; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 ; CHECK-NEXT: addvl sp, sp, #2 diff --git a/llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-reg.ll b/llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-reg.ll --- a/llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-reg.ll +++ b/llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-reg.ll @@ -15,6 +15,18 @@ ret %val } +define @ld1_nxv16i8_bitcast_to_i16(i8* %addr, i64 %off) { +; CHECK-LABEL: ld1_nxv16i8_bitcast_to_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, x1] +; CHECK-NEXT: ret + %ptr = getelementptr inbounds i8, i8* %addr, i64 %off + %ptrcast = bitcast i8* %ptr to * + %val = load volatile , * %ptrcast + ret %val +} + define @ld1_nxv8i16_zext8(i8* %addr, i64 %off) { ; CHECK-LABEL: ld1_nxv8i16_zext8: ; CHECK: // %bb.0: diff --git a/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-reg.ll b/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-reg.ll --- a/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-reg.ll +++ b/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-reg.ll @@ -15,6 +15,18 @@ ret void } +define void @st1_nxv16i8_bitcast_from_i16(i8* %addr, i64 %off, %val) { +; CHECK-LABEL: st1_nxv16i8_bitcast_from_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: st1b { z0.b }, p0, [x0, x1] +; CHECK-NEXT: ret + %ptr = getelementptr inbounds i8, i8* %addr, i64 %off + %ptrcast = bitcast i8* %ptr to * + store %val, * %ptrcast + ret void +} + define void @st1_nxv8i16_trunc8(i8* %addr, i64 %off, %val) { ; CHECK-LABEL: st1_nxv8i16_trunc8: ; CHECK: // %bb.0: