Index: llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -80,6 +80,9 @@ bool expandSetTagLoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator &NextMBBI); + bool expandSVESpillFill(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, unsigned Opc, + unsigned N); }; } // end anonymous namespace @@ -590,6 +593,28 @@ return true; } +bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned Opc, unsigned N) { + const TargetRegisterInfo *TRI = + MBB.getParent()->getSubtarget().getRegisterInfo(); + MachineInstr &MI = *MBBI; + for (unsigned Offset = 0; Offset < N; ++Offset) { + int ImmOffset = MI.getOperand(2).getImm() + Offset; + bool Kill = (Offset + 1 == N) ? MI.getOperand(1).isKill() : false; + assert(ImmOffset >= -256 && ImmOffset < 256 && + "Immediate spill offset out of range"); + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)) + .addReg( + TRI->getSubReg(MI.getOperand(0).getReg(), AArch64::zsub0 + Offset), + Opc == AArch64::LDR_ZXI ? RegState::Define : 0) + .addReg(MI.getOperand(1).getReg(), getKillRegState(Kill)) + .addImm(ImmOffset); + } + MI.eraseFromParent(); + return true; +} + /// If MBBI references a pseudo instruction that should be expanded here, /// do the expansion and return true. Otherwise return false. bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, @@ -965,6 +990,18 @@ report_fatal_error( "Non-writeback variants of STGloop / STZGloop should not " "survive past PrologEpilogInserter."); + case AArch64::STR_ZZZZXI: + return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 4); + case AArch64::STR_ZZZXI: + return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 3); + case AArch64::STR_ZZXI: + return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 2); + case AArch64::LDR_ZZZZXI: + return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 4); + case AArch64::LDR_ZZZXI: + return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3); + case AArch64::LDR_ZZXI: + return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2); } return false; } Index: llvm/lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -2262,6 +2262,27 @@ MinOffset = -256; MaxOffset = 255; break; + case AArch64::STR_ZZZZXI: + case AArch64::LDR_ZZZZXI: + Scale = TypeSize::Scalable(16); + Width = SVEMaxBytesPerVector * 4; + MinOffset = -256; + MaxOffset = 252; + break; + case AArch64::STR_ZZZXI: + case AArch64::LDR_ZZZXI: + Scale = TypeSize::Scalable(16); + Width = SVEMaxBytesPerVector * 3; + MinOffset = -256; + MaxOffset = 253; + break; + case AArch64::STR_ZZXI: + case AArch64::LDR_ZZXI: + Scale = TypeSize::Scalable(16); + Width = SVEMaxBytesPerVector * 2; + MinOffset = -256; + MaxOffset = 254; + break; case AArch64::LDR_PXI: case AArch64::STR_PXI: Scale = TypeSize::Scalable(2); @@ -3020,6 +3041,18 @@ assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); Opc = AArch64::STR_ZXI; StackID = TargetStackID::SVEVector; + } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) { + assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); + Opc = AArch64::STR_ZZXI; + StackID = TargetStackID::SVEVector; + } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) { + assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); + Opc = AArch64::STR_ZZZXI; + StackID = TargetStackID::SVEVector; + } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) { + assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); + Opc = AArch64::STR_ZZZZXI; + StackID = TargetStackID::SVEVector; } assert(Opc && "Unknown register class"); MFI.setStackID(FI, StackID); @@ -3163,6 +3196,18 @@ assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); Opc = AArch64::LDR_ZXI; StackID = TargetStackID::SVEVector; + } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) { + assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); + Opc = AArch64::LDR_ZZXI; + StackID = TargetStackID::SVEVector; + } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) { + assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); + Opc = AArch64::LDR_ZZZXI; + StackID = TargetStackID::SVEVector; + } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) { + assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); + Opc = AArch64::LDR_ZZZZXI; + StackID = TargetStackID::SVEVector; } assert(Opc && "Unknown register class"); MFI.setStackID(FI, StackID); Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1245,6 +1245,20 @@ def : InstAlias<"fcmlt $Zd, $Pg/z, $Zm, $Zn", (FCMGT_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>; + // Pseudo instructions representing unpredicated LDR and STR for ZPR2,3,4. + // These get expanded to individual LDR_ZXI/STR_ZXI instructions in + // AArch64ExpandPseudoInsts. + let mayLoad = 1, hasSideEffects = 0 in { + def LDR_ZZXI : Pseudo<(outs ZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; + def LDR_ZZZXI : Pseudo<(outs ZZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; + def LDR_ZZZZXI : Pseudo<(outs ZZZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; + } + let mayStore = 1, hasSideEffects = 0 in { + def STR_ZZXI : Pseudo<(outs), (ins ZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; + def STR_ZZZXI : Pseudo<(outs), (ins ZZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; + def STR_ZZZZXI : Pseudo<(outs), (ins ZZZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; + } + def : Pat<(AArch64ptest (nxv16i1 PPR:$pg), (nxv16i1 PPR:$src)), (PTEST_PP PPR:$pg, PPR:$src)>; def : Pat<(AArch64ptest (nxv8i1 PPR:$pg), (nxv8i1 PPR:$src)), Index: llvm/test/CodeGen/AArch64/spillfill-sve.mir =================================================================== --- llvm/test/CodeGen/AArch64/spillfill-sve.mir +++ llvm/test/CodeGen/AArch64/spillfill-sve.mir @@ -8,6 +8,9 @@ define aarch64_sve_vector_pcs void @spills_fills_stack_id_ppr() #0 { entry: unreachable } define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr() #0 { entry: unreachable } + define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr2() #0 { entry: unreachable } + define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr3() #0 { entry: unreachable } + define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr4() #0 { entry: unreachable } attributes #0 = { nounwind "target-features"="+sve" } @@ -90,3 +93,120 @@ $z0 = COPY %0 RET_ReallyLR ... +--- +name: spills_fills_stack_id_zpr2 +tracksRegLiveness: true +registers: + - { id: 0, class: zpr2 } +stack: +liveins: + - { reg: '$z0_z1', virtual-reg: '%0' } +body: | + bb.0.entry: + liveins: $z0_z1 + + ; CHECK-LABEL: name: spills_fills_stack_id_zpr2 + ; CHECK: stack: + ; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 32, alignment: 16 + ; CHECK-NEXT: stack-id: sve-vec + + ; EXPAND-LABEL: name: spills_fills_stack_id_zpr2 + ; EXPAND: STR_ZXI $z0, $sp, 0 + ; EXPAND: STR_ZXI $z1, $sp, 1 + ; EXPAND: $z0 = LDR_ZXI $sp, 0 + ; EXPAND: $z1 = LDR_ZXI $sp, 1 + + %0:zpr2 = COPY $z0_z1 + + $z0_z1_z2_z3 = IMPLICIT_DEF + $z4_z5_z6_z7 = IMPLICIT_DEF + $z8_z9_z10_z11 = IMPLICIT_DEF + $z12_z13_z14_z15 = IMPLICIT_DEF + $z16_z17_z18_z19 = IMPLICIT_DEF + $z20_z21_z22_z23 = IMPLICIT_DEF + $z24_z25_z26_z27 = IMPLICIT_DEF + $z28_z29_z30_z31 = IMPLICIT_DEF + + $z0_z1 = COPY %0 + RET_ReallyLR +... +--- +name: spills_fills_stack_id_zpr3 +tracksRegLiveness: true +registers: + - { id: 0, class: zpr3 } +stack: +liveins: + - { reg: '$z0_z1_z2', virtual-reg: '%0' } +body: | + bb.0.entry: + liveins: $z0_z1_z2 + + ; CHECK-LABEL: name: spills_fills_stack_id_zpr3 + ; CHECK: stack: + ; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 48, alignment: 16 + ; CHECK-NEXT: stack-id: sve-vec + + ; EXPAND-LABEL: name: spills_fills_stack_id_zpr3 + ; EXPAND: STR_ZXI $z0, $sp, 0 + ; EXPAND: STR_ZXI $z1, $sp, 1 + ; EXPAND: STR_ZXI $z2, $sp, 2 + ; EXPAND: $z0 = LDR_ZXI $sp, 0 + ; EXPAND: $z1 = LDR_ZXI $sp, 1 + ; EXPAND: $z2 = LDR_ZXI $sp, 2 + + %0:zpr3 = COPY $z0_z1_z2 + + $z0_z1_z2_z3 = IMPLICIT_DEF + $z4_z5_z6_z7 = IMPLICIT_DEF + $z8_z9_z10_z11 = IMPLICIT_DEF + $z12_z13_z14_z15 = IMPLICIT_DEF + $z16_z17_z18_z19 = IMPLICIT_DEF + $z20_z21_z22_z23 = IMPLICIT_DEF + $z24_z25_z26_z27 = IMPLICIT_DEF + $z28_z29_z30_z31 = IMPLICIT_DEF + + $z0_z1_z2 = COPY %0 + RET_ReallyLR +... +--- +name: spills_fills_stack_id_zpr4 +tracksRegLiveness: true +registers: + - { id: 0, class: zpr4 } +stack: +liveins: + - { reg: '$z0_z1_z2_z3', virtual-reg: '%0' } +body: | + bb.0.entry: + liveins: $z0_z1_z2_z3 + + ; CHECK-LABEL: name: spills_fills_stack_id_zpr4 + ; CHECK: stack: + ; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 64, alignment: 16 + ; CHECK-NEXT: stack-id: sve-vec + + ; EXPAND-LABEL: name: spills_fills_stack_id_zpr4 + ; EXPAND: STR_ZXI $z0, $sp, 0 + ; EXPAND: STR_ZXI $z1, $sp, 1 + ; EXPAND: STR_ZXI $z2, $sp, 2 + ; EXPAND: STR_ZXI $z3, $sp, 3 + ; EXPAND: $z0 = LDR_ZXI $sp, 0 + ; EXPAND: $z1 = LDR_ZXI $sp, 1 + ; EXPAND: $z2 = LDR_ZXI $sp, 2 + ; EXPAND: $z3 = LDR_ZXI $sp, 3 + + %0:zpr4 = COPY $z0_z1_z2_z3 + + $z0_z1_z2_z3 = IMPLICIT_DEF + $z4_z5_z6_z7 = IMPLICIT_DEF + $z8_z9_z10_z11 = IMPLICIT_DEF + $z12_z13_z14_z15 = IMPLICIT_DEF + $z16_z17_z18_z19 = IMPLICIT_DEF + $z20_z21_z22_z23 = IMPLICIT_DEF + $z24_z25_z26_z27 = IMPLICIT_DEF + $z28_z29_z30_z31 = IMPLICIT_DEF + + $z0_z1_z2_z3 = COPY %0 + RET_ReallyLR +...