diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp --- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -80,6 +80,9 @@ bool expandSetTagLoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator &NextMBBI); + bool expandSVESpillFill(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, unsigned Opc, + unsigned N); }; } // end anonymous namespace @@ -595,6 +598,28 @@ return true; } +bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned Opc, unsigned N) { + const TargetRegisterInfo *TRI = + MBB.getParent()->getSubtarget().getRegisterInfo(); + MachineInstr &MI = *MBBI; + for (unsigned Offset = 0; Offset < N; ++Offset) { + int ImmOffset = MI.getOperand(2).getImm() + Offset; + bool Kill = (Offset + 1 == N) ? MI.getOperand(1).isKill() : false; + assert(ImmOffset >= -256 && ImmOffset < 256 && + "Immediate spill offset out of range"); + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)) + .addReg( + TRI->getSubReg(MI.getOperand(0).getReg(), AArch64::zsub0 + Offset), + Opc == AArch64::LDR_ZXI ? RegState::Define : 0) + .addReg(MI.getOperand(1).getReg(), getKillRegState(Kill)) + .addImm(ImmOffset); + } + MI.eraseFromParent(); + return true; +} + /// If MBBI references a pseudo instruction that should be expanded here, /// do the expansion and return true. Otherwise return false. bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, @@ -970,6 +995,18 @@ report_fatal_error( "Non-writeback variants of STGloop / STZGloop should not " "survive past PrologEpilogInserter."); + case AArch64::STR_ZZZZXI: + return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 4); + case AArch64::STR_ZZZXI: + return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 3); + case AArch64::STR_ZZXI: + return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 2); + case AArch64::LDR_ZZZZXI: + return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 4); + case AArch64::LDR_ZZZXI: + return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3); + case AArch64::LDR_ZZXI: + return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2); } return false; } diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -2278,6 +2278,27 @@ MinOffset = -256; MaxOffset = 255; break; + case AArch64::STR_ZZZZXI: + case AArch64::LDR_ZZZZXI: + Scale = TypeSize::Scalable(16); + Width = SVEMaxBytesPerVector * 4; + MinOffset = -256; + MaxOffset = 252; + break; + case AArch64::STR_ZZZXI: + case AArch64::LDR_ZZZXI: + Scale = TypeSize::Scalable(16); + Width = SVEMaxBytesPerVector * 3; + MinOffset = -256; + MaxOffset = 253; + break; + case AArch64::STR_ZZXI: + case AArch64::LDR_ZZXI: + Scale = TypeSize::Scalable(16); + Width = SVEMaxBytesPerVector * 2; + MinOffset = -256; + MaxOffset = 254; + break; case AArch64::LDR_PXI: case AArch64::STR_PXI: Scale = TypeSize::Scalable(2); @@ -2984,6 +3005,7 @@ MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); unsigned Opc = 0; bool Offset = true; + unsigned StackID = TargetStackID::Default; switch (TRI->getSpillSize(*RC)) { case 1: if (AArch64::FPR8RegClass.hasSubClassEq(RC)) @@ -2992,6 +3014,11 @@ case 2: if (AArch64::FPR16RegClass.hasSubClassEq(RC)) Opc = AArch64::STRHui; + else if (AArch64::PPRRegClass.hasSubClassEq(RC)) { + assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); + Opc = AArch64::STR_PXI; + StackID = TargetStackID::SVEVector; + } break; case 4: if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) { @@ -3031,6 +3058,10 @@ get(AArch64::STPXi), SrcReg, isKill, AArch64::sube64, AArch64::subo64, FI, MMO); return; + } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) { + assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); + Opc = AArch64::STR_ZXI; + StackID = TargetStackID::SVEVector; } break; case 24: @@ -3049,6 +3080,10 @@ assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); Opc = AArch64::ST1Twov2d; Offset = false; + } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) { + assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); + Opc = AArch64::STR_ZZXI; + StackID = TargetStackID::SVEVector; } break; case 48: @@ -3056,6 +3091,10 @@ assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); Opc = AArch64::ST1Threev2d; Offset = false; + } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) { + assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); + Opc = AArch64::STR_ZZZXI; + StackID = TargetStackID::SVEVector; } break; case 64: @@ -3063,19 +3102,13 @@ assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); Opc = AArch64::ST1Fourv2d; Offset = false; + } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) { + assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); + Opc = AArch64::STR_ZZZZXI; + StackID = TargetStackID::SVEVector; } break; } - unsigned StackID = TargetStackID::Default; - if (AArch64::PPRRegClass.hasSubClassEq(RC)) { - assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); - Opc = AArch64::STR_PXI; - StackID = TargetStackID::SVEVector; - } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) { - assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); - Opc = AArch64::STR_ZXI; - StackID = TargetStackID::SVEVector; - } assert(Opc && "Unknown register class"); MFI.setStackID(FI, StackID); @@ -3126,6 +3159,7 @@ unsigned Opc = 0; bool Offset = true; + unsigned StackID = TargetStackID::Default; switch (TRI->getSpillSize(*RC)) { case 1: if (AArch64::FPR8RegClass.hasSubClassEq(RC)) @@ -3134,6 +3168,11 @@ case 2: if (AArch64::FPR16RegClass.hasSubClassEq(RC)) Opc = AArch64::LDRHui; + else if (AArch64::PPRRegClass.hasSubClassEq(RC)) { + assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); + Opc = AArch64::LDR_PXI; + StackID = TargetStackID::SVEVector; + } break; case 4: if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) { @@ -3173,6 +3212,10 @@ get(AArch64::LDPXi), DestReg, AArch64::sube64, AArch64::subo64, FI, MMO); return; + } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) { + assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); + Opc = AArch64::LDR_ZXI; + StackID = TargetStackID::SVEVector; } break; case 24: @@ -3191,6 +3234,10 @@ assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); Opc = AArch64::LD1Twov2d; Offset = false; + } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) { + assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); + Opc = AArch64::LDR_ZZXI; + StackID = TargetStackID::SVEVector; } break; case 48: @@ -3198,6 +3245,10 @@ assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); Opc = AArch64::LD1Threev2d; Offset = false; + } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) { + assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); + Opc = AArch64::LDR_ZZZXI; + StackID = TargetStackID::SVEVector; } break; case 64: @@ -3205,20 +3256,14 @@ assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); Opc = AArch64::LD1Fourv2d; Offset = false; + } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) { + assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); + Opc = AArch64::LDR_ZZZZXI; + StackID = TargetStackID::SVEVector; } break; } - unsigned StackID = TargetStackID::Default; - if (AArch64::PPRRegClass.hasSubClassEq(RC)) { - assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); - Opc = AArch64::LDR_PXI; - StackID = TargetStackID::SVEVector; - } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) { - assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); - Opc = AArch64::LDR_ZXI; - StackID = TargetStackID::SVEVector; - } assert(Opc && "Unknown register class"); MFI.setStackID(FI, StackID); diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1334,6 +1334,20 @@ def : InstAlias<"fcmlt $Zd, $Pg/z, $Zm, $Zn", (FCMGT_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>; + // Pseudo instructions representing unpredicated LDR and STR for ZPR2,3,4. + // These get expanded to individual LDR_ZXI/STR_ZXI instructions in + // AArch64ExpandPseudoInsts. + let mayLoad = 1, hasSideEffects = 0 in { + def LDR_ZZXI : Pseudo<(outs ZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; + def LDR_ZZZXI : Pseudo<(outs ZZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; + def LDR_ZZZZXI : Pseudo<(outs ZZZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; + } + let mayStore = 1, hasSideEffects = 0 in { + def STR_ZZXI : Pseudo<(outs), (ins ZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; + def STR_ZZZXI : Pseudo<(outs), (ins ZZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; + def STR_ZZZZXI : Pseudo<(outs), (ins ZZZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; + } + def : Pat<(AArch64ptest (nxv16i1 PPR:$pg), (nxv16i1 PPR:$src)), (PTEST_PP PPR:$pg, PPR:$src)>; def : Pat<(AArch64ptest (nxv8i1 PPR:$pg), (nxv8i1 PPR:$src)), diff --git a/llvm/test/CodeGen/AArch64/spillfill-sve.mir b/llvm/test/CodeGen/AArch64/spillfill-sve.mir --- a/llvm/test/CodeGen/AArch64/spillfill-sve.mir +++ b/llvm/test/CodeGen/AArch64/spillfill-sve.mir @@ -8,6 +8,9 @@ define aarch64_sve_vector_pcs void @spills_fills_stack_id_ppr() #0 { entry: unreachable } define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr() #0 { entry: unreachable } + define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr2() #0 { entry: unreachable } + define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr3() #0 { entry: unreachable } + define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr4() #0 { entry: unreachable } attributes #0 = { nounwind "target-features"="+sve" } @@ -90,3 +93,120 @@ $z0 = COPY %0 RET_ReallyLR ... +--- +name: spills_fills_stack_id_zpr2 +tracksRegLiveness: true +registers: + - { id: 0, class: zpr2 } +stack: +liveins: + - { reg: '$z0_z1', virtual-reg: '%0' } +body: | + bb.0.entry: + liveins: $z0_z1 + + ; CHECK-LABEL: name: spills_fills_stack_id_zpr2 + ; CHECK: stack: + ; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 32, alignment: 16 + ; CHECK-NEXT: stack-id: sve-vec + + ; EXPAND-LABEL: name: spills_fills_stack_id_zpr2 + ; EXPAND: STR_ZXI $z0, $sp, 0 + ; EXPAND: STR_ZXI $z1, $sp, 1 + ; EXPAND: $z0 = LDR_ZXI $sp, 0 + ; EXPAND: $z1 = LDR_ZXI $sp, 1 + + %0:zpr2 = COPY $z0_z1 + + $z0_z1_z2_z3 = IMPLICIT_DEF + $z4_z5_z6_z7 = IMPLICIT_DEF + $z8_z9_z10_z11 = IMPLICIT_DEF + $z12_z13_z14_z15 = IMPLICIT_DEF + $z16_z17_z18_z19 = IMPLICIT_DEF + $z20_z21_z22_z23 = IMPLICIT_DEF + $z24_z25_z26_z27 = IMPLICIT_DEF + $z28_z29_z30_z31 = IMPLICIT_DEF + + $z0_z1 = COPY %0 + RET_ReallyLR +... +--- +name: spills_fills_stack_id_zpr3 +tracksRegLiveness: true +registers: + - { id: 0, class: zpr3 } +stack: +liveins: + - { reg: '$z0_z1_z2', virtual-reg: '%0' } +body: | + bb.0.entry: + liveins: $z0_z1_z2 + + ; CHECK-LABEL: name: spills_fills_stack_id_zpr3 + ; CHECK: stack: + ; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 48, alignment: 16 + ; CHECK-NEXT: stack-id: sve-vec + + ; EXPAND-LABEL: name: spills_fills_stack_id_zpr3 + ; EXPAND: STR_ZXI $z0, $sp, 0 + ; EXPAND: STR_ZXI $z1, $sp, 1 + ; EXPAND: STR_ZXI $z2, $sp, 2 + ; EXPAND: $z0 = LDR_ZXI $sp, 0 + ; EXPAND: $z1 = LDR_ZXI $sp, 1 + ; EXPAND: $z2 = LDR_ZXI $sp, 2 + + %0:zpr3 = COPY $z0_z1_z2 + + $z0_z1_z2_z3 = IMPLICIT_DEF + $z4_z5_z6_z7 = IMPLICIT_DEF + $z8_z9_z10_z11 = IMPLICIT_DEF + $z12_z13_z14_z15 = IMPLICIT_DEF + $z16_z17_z18_z19 = IMPLICIT_DEF + $z20_z21_z22_z23 = IMPLICIT_DEF + $z24_z25_z26_z27 = IMPLICIT_DEF + $z28_z29_z30_z31 = IMPLICIT_DEF + + $z0_z1_z2 = COPY %0 + RET_ReallyLR +... +--- +name: spills_fills_stack_id_zpr4 +tracksRegLiveness: true +registers: + - { id: 0, class: zpr4 } +stack: +liveins: + - { reg: '$z0_z1_z2_z3', virtual-reg: '%0' } +body: | + bb.0.entry: + liveins: $z0_z1_z2_z3 + + ; CHECK-LABEL: name: spills_fills_stack_id_zpr4 + ; CHECK: stack: + ; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 64, alignment: 16 + ; CHECK-NEXT: stack-id: sve-vec + + ; EXPAND-LABEL: name: spills_fills_stack_id_zpr4 + ; EXPAND: STR_ZXI $z0, $sp, 0 + ; EXPAND: STR_ZXI $z1, $sp, 1 + ; EXPAND: STR_ZXI $z2, $sp, 2 + ; EXPAND: STR_ZXI $z3, $sp, 3 + ; EXPAND: $z0 = LDR_ZXI $sp, 0 + ; EXPAND: $z1 = LDR_ZXI $sp, 1 + ; EXPAND: $z2 = LDR_ZXI $sp, 2 + ; EXPAND: $z3 = LDR_ZXI $sp, 3 + + %0:zpr4 = COPY $z0_z1_z2_z3 + + $z0_z1_z2_z3 = IMPLICIT_DEF + $z4_z5_z6_z7 = IMPLICIT_DEF + $z8_z9_z10_z11 = IMPLICIT_DEF + $z12_z13_z14_z15 = IMPLICIT_DEF + $z16_z17_z18_z19 = IMPLICIT_DEF + $z20_z21_z22_z23 = IMPLICIT_DEF + $z24_z25_z26_z27 = IMPLICIT_DEF + $z28_z29_z30_z31 = IMPLICIT_DEF + + $z0_z1_z2_z3 = COPY %0 + RET_ReallyLR +...