Index: lib/Target/AArch64/AArch64InstrInfo.h =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.h +++ lib/Target/AArch64/AArch64InstrInfo.h @@ -166,6 +166,9 @@ int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override; + bool isSubregFoldable(MachineOperand &MO, + MachineInstr *LoadMI) const override; + using TargetInstrInfo::foldMemoryOperandImpl; MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, Index: lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.cpp +++ lib/Target/AArch64/AArch64InstrInfo.cpp @@ -2566,6 +2566,11 @@ .setMIFlag(Flag); } +bool AArch64InstrInfo::isSubregFoldable(MachineOperand &MO, + MachineInstr *LoadMI) const { + return true; +} + MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, @@ -2583,7 +2588,7 @@ // // // - if (MI.isCopy()) { + if (MI.isFullCopy()) { unsigned DstReg = MI.getOperand(0).getReg(); unsigned SrcReg = MI.getOperand(1).getReg(); if (SrcReg == AArch64::SP && @@ -2608,18 +2613,42 @@ // // STRXui %XZR, // - if (MI.isFullCopy() && Ops.size() == 1 && Ops[0] == 0) { - MachineBasicBlock *MBB = MI.getParent(); - const MachineOperand &SrcMO = MI.getOperand(1); - unsigned SrcReg = SrcMO.getReg(); - if (SrcReg == AArch64::WZR || SrcReg == AArch64::XZR) { - const TargetRegisterInfo *TRI = &getRegisterInfo(); - const TargetRegisterClass *RC = SrcReg == AArch64::WZR - ? &AArch64::GPR32RegClass - : &AArch64::GPR64RegClass; - storeRegToStackSlot(*MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex, - RC, TRI); - return &*--InsertPt; + // Also handle cases like: + // + // %vreg0:sub_32 = COPY %WZR; GPR64common:%vreg0 + // + // by generating: + // + // STRXui %XZR + // + if (MI.isCopy() && Ops.size() == 1 && Ops[0] == 0) { + const MachineOperand &DstMO = MI.getOperand(0); + assert(TargetRegisterInfo::isVirtualRegister(DstMO.getReg()) && + "expected virtual register as destination of COPY"); + // Only handle full writes to the destination register, or subreg writes + // that are undef for the non-written parts. + if (DstMO.getSubReg() == 0 || DstMO.isUndef()) { + const MachineOperand &SrcMO = MI.getOperand(1); + unsigned SrcReg = SrcMO.getReg(); + if (SrcReg == AArch64::WZR || SrcReg == AArch64::XZR) { + MachineBasicBlock &MBB = *MI.getParent(); + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + const TargetRegisterInfo *TRI = &getRegisterInfo(); + const TargetRegisterClass *DstRC = MRI.getRegClass(DstMO.getReg()); + const TargetRegisterClass *SpillRC; + unsigned SpillReg; + if (DstRC->getSize() == 4) { + SpillRC = &AArch64::GPR32RegClass; + SpillReg = AArch64::WZR; + } else { + assert(DstRC->getSize() == 8); + SpillRC = &AArch64::GPR64RegClass; + SpillReg = AArch64::XZR; + } + storeRegToStackSlot(MBB, InsertPt, SpillReg, SrcMO.isKill(), FrameIndex, + SpillRC, TRI); + return &*--InsertPt; + } } } Index: test/CodeGen/MIR/AArch64/spill-fold-zr.mir =================================================================== --- /dev/null +++ test/CodeGen/MIR/AArch64/spill-fold-zr.mir @@ -0,0 +1,18 @@ +# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass greedy -verify-machineinstrs -o - %s | FileCheck %s + +--- | + define i64 @test_zr_spill_fold(i1 %c) { ret i64 0 } +... +--- +# CHECK-LABEL: name: test_zr_spill_fold +name: test_zr_spill_fold +registers: + - { id: 0, class: gpr64 } +body: | + bb.0: + ; CHECK: STRXui %xzr + undef %0.sub_32 = COPY %wzr + INLINEASM $nop, 1, 12, implicit-def dead early-clobber %x0, 12, implicit-def dead early-clobber %x1, 12, implicit-def dead early-clobber %x2, 12, implicit-def dead early-clobber %x3, 12, implicit-def dead early-clobber %x4, 12, implicit-def dead early-clobber %x5, 12, implicit-def dead early-clobber %x6, 12, implicit-def dead early-clobber %x7, 12, implicit-def dead early-clobber %x8, 12, implicit-def dead early-clobber %x9, 12, implicit-def dead early-clobber %x10, 12, implicit-def dead early-clobber %x11, 12, implicit-def dead early-clobber %x12, 12, implicit-def dead early-clobber %x13, 12, implicit-def dead early-clobber %x14, 12, implicit-def dead early-clobber %x15, 12, implicit-def dead early-clobber %x16, 12, implicit-def dead early-clobber %x17, 12, implicit-def dead early-clobber %x18, 12, implicit-def dead early-clobber %x19, 12, implicit-def dead early-clobber %x20, 12, implicit-def dead early-clobber %x21, 12, implicit-def dead early-clobber %x22, 12, implicit-def dead early-clobber %x23, 12, implicit-def dead early-clobber %x24, 12, implicit-def dead early-clobber %x25, 12, implicit-def dead early-clobber %x26, 12, implicit-def dead early-clobber %x27, 12, implicit-def dead early-clobber %x28, 12, implicit-def dead early-clobber %fp, 12, implicit-def dead early-clobber %lr, 12, implicit-def early-clobber %sp + %x0 = COPY %0 + RET_ReallyLR implicit %x0 +...