Index: lib/Target/AArch64/AArch64.td =================================================================== --- lib/Target/AArch64/AArch64.td +++ lib/Target/AArch64/AArch64.td @@ -104,6 +104,10 @@ "Reserve X"#i#", making it unavailable " "as a GPR">; +foreach i = {8-15,18} in + def FeatureCallSavedX#i : SubtargetFeature<"call-saved-x"#i, + "CustomCallSavedXRegs["#i#"]", "true", "Make X"#i#" callee saved.">; + def FeatureUseAA : SubtargetFeature<"use-aa", "UseAA", "true", "Use alias analysis during codegen">; Index: lib/Target/AArch64/AArch64CallLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64CallLowering.cpp +++ lib/Target/AArch64/AArch64CallLowering.cpp @@ -337,6 +337,14 @@ FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true)); } + if (MF.getSubtarget().hasCustomCallingConv()) { + auto TRI = MF.getSubtarget().getRegisterInfo(); + const MCPhysReg *CSRs = TRI->getCalleeSavedRegs(&MF); + SmallVector UpdatedCSRs; + TRI->getCustomCalleeSavedRegs(MF, CSRs, UpdatedCSRs); + MRI.setCalleeSavedRegs(UpdatedCSRs); + } + // Move back to the end of the basic block. MIRBuilder.setMBB(MBB); @@ -378,7 +386,13 @@ // Tell the call which registers are clobbered. auto TRI = MF.getSubtarget().getRegisterInfo(); - MIB.addRegMask(TRI->getCallPreservedMask(MF, F.getCallingConv())); + const uint32_t *Mask = TRI->getCallPreservedMask(MF, F.getCallingConv()); + if (MF.getSubtarget().hasCustomCallingConv()) { + uint32_t *UpdatedMask = MF.allocateRegMask(); + TRI->getCustomCallPreservedMask(MF, Mask, UpdatedMask); + Mask = UpdatedMask; + } + MIB.addRegMask(Mask); if (TRI->isAnyArgRegReserved(MF)) TRI->emitReservedArgRegCallError(MF); Index: lib/Target/AArch64/AArch64FastISel.cpp =================================================================== --- lib/Target/AArch64/AArch64FastISel.cpp +++ lib/Target/AArch64/AArch64FastISel.cpp @@ -2918,6 +2918,9 @@ if (CC != CallingConv::C && CC != CallingConv::Swift) return false; + if (Subtarget->hasCustomCallingConv()) + return false; + // Only handle simple cases of up to 8 GPR and FPR each. unsigned GPRCnt = 0; unsigned FPRCnt = 0; Index: lib/Target/AArch64/AArch64FrameLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64FrameLowering.cpp +++ lib/Target/AArch64/AArch64FrameLowering.cpp @@ -356,7 +356,7 @@ LiveRegs.addLiveIns(*MBB); // Mark callee saved registers as used so we will not choose them. - const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(MF); + const MCPhysReg *CSRegs = MF->getRegInfo().getCalleeSavedRegs(); for (unsigned i = 0; CSRegs[i]; ++i) LiveRegs.addReg(CSRegs[i]); @@ -1541,7 +1541,7 @@ unsigned UnspilledCSGPRPaired = AArch64::NoRegister; MachineFrameInfo &MFI = MF.getFrameInfo(); - const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); + const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs(); unsigned BasePointerReg = RegInfo->hasBasePointer(MF) ? RegInfo->getBaseRegister() Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -3106,6 +3106,14 @@ // much is there while considering tail calls (because we can reuse it). FuncInfo->setBytesInStackArgArea(StackArgSize); + if (Subtarget->hasCustomCallingConv()) { + auto TRI = MF.getSubtarget().getRegisterInfo(); + const MCPhysReg *CSRs = TRI->getCalleeSavedRegs(&MF); + SmallVector UpdatedCSRs; + TRI->getCustomCalleeSavedRegs(MF, CSRs, UpdatedCSRs); + MF.getRegInfo().setCalleeSavedRegs(UpdatedCSRs); + } + return Chain; } @@ -3336,6 +3344,14 @@ const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); if (!CCMatch) { const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); + if (Subtarget->hasCustomCallingConv()) { + uint32_t *UpdatedCallerMask = MF.allocateRegMask(); + uint32_t *UpdatedCalleeMask = MF.allocateRegMask(); + TRI->getCustomCallPreservedMask(MF, CallerPreserved, UpdatedCallerMask); + TRI->getCustomCallPreservedMask(MF, CalleePreserved, UpdatedCalleeMask); + CallerPreserved = UpdatedCallerMask; + CalleePreserved = UpdatedCalleeMask; + } if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) return false; } @@ -3729,6 +3745,12 @@ } else Mask = TRI->getCallPreservedMask(MF, CallConv); + if (MF.getSubtarget().hasCustomCallingConv()) { + uint32_t *UpdatedMask = MF.allocateRegMask(); + TRI->getCustomCallPreservedMask(MF, Mask, UpdatedMask); + Mask = UpdatedMask; + } + if (TRI->isAnyArgRegReserved(MF)) TRI->emitReservedArgRegCallError(MF); @@ -3784,12 +3806,12 @@ const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SDLoc &DL, SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS : RetCC_AArch64_AAPCS; SmallVector RVLocs; - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, - *DAG.getContext()); + CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext()); CCInfo.AnalyzeReturn(Outs, RetCC); // Copy the result values into the output registers. @@ -3823,9 +3845,13 @@ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); - const MCPhysReg *I = - TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction()); + const MCPhysReg *I = TRI->getCalleeSavedRegsViaCopy(&MF); if (I) { + SmallVector UpdatedCSRs; + if (Subtarget->hasCustomCallingConv()) { + TRI->getCustomCalleeSavedRegs(MF, I, UpdatedCSRs); + I = UpdatedCSRs.begin(); + } for (; *I; ++I) { if (AArch64::GPR64RegClass.contains(*I)) RetOps.push_back(DAG.getRegister(*I, MVT::i64)); @@ -4023,6 +4049,13 @@ // silly). const uint32_t *Mask = Subtarget->getRegisterInfo()->getTLSCallPreservedMask(); + MachineFunction &MF = DAG.getMachineFunction(); + const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); + if (MF.getSubtarget().hasCustomCallingConv()) { + uint32_t *UpdatedMask = MF.allocateRegMask(); + TRI->getCustomCallPreservedMask(MF, Mask, UpdatedMask); + Mask = UpdatedMask; + } // Finally, we can make the call. This is just a degenerate version of a // normal AArch64 call node: x0 takes the address of the descriptor, and @@ -7747,6 +7780,13 @@ const uint32_t *Mask = Subtarget->getRegisterInfo()->getWindowsStackProbePreservedMask(); + MachineFunction &MF = DAG.getMachineFunction(); + const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); + if (MF.getSubtarget().hasCustomCallingConv()) { + uint32_t *UpdatedMask = MF.allocateRegMask(); + TRI->getCustomCallPreservedMask(MF, Mask, UpdatedMask); + Mask = UpdatedMask; + } Size = DAG.getNode(ISD::SRL, dl, MVT::i64, Size, DAG.getConstant(4, dl, MVT::i64)); @@ -11533,10 +11573,16 @@ void AArch64TargetLowering::insertCopiesSplitCSR( MachineBasicBlock *Entry, const SmallVectorImpl &Exits) const { + MachineFunction &MF = *Entry->getParent(); const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent()); if (!IStart) return; + SmallVector UpdatedCSRs; + if (Subtarget->hasCustomCallingConv()) { + TRI->getCustomCalleeSavedRegs(MF, IStart, UpdatedCSRs); + IStart = UpdatedCSRs.begin(); + } const TargetInstrInfo *TII = Subtarget->getInstrInfo(); MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo(); Index: lib/Target/AArch64/AArch64RegisterInfo.h =================================================================== --- lib/Target/AArch64/AArch64RegisterInfo.h +++ lib/Target/AArch64/AArch64RegisterInfo.h @@ -34,6 +34,13 @@ bool isAnyArgRegReserved(const MachineFunction &MF) const; void emitReservedArgRegCallError(const MachineFunction &MF) const; + void getCustomCalleeSavedRegs(const MachineFunction &MF, + const MCPhysReg *CSRs, + SmallVectorImpl &UpdatedCSRs) const; + void getCustomCallPreservedMask(const MachineFunction &MF, + const uint32_t *Mask, + uint32_t *UpdatedMask) const; + /// Code Generation virtual methods... const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; const MCPhysReg * Index: lib/Target/AArch64/AArch64RegisterInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64RegisterInfo.cpp +++ lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -75,6 +75,23 @@ return nullptr; } +void AArch64RegisterInfo::getCustomCalleeSavedRegs( + const MachineFunction &MF, + const MCPhysReg *CSRs, + SmallVectorImpl &UpdatedCSRs) const { + for (const MCPhysReg *I = CSRs; *I; ++I) + UpdatedCSRs.push_back(*I); + + for (size_t i = 0; i < AArch64::GPR64commonRegClass.getNumRegs(); ++i) { + if (MF.getSubtarget().isXRegCustomCalleeSaved(i)) { + UpdatedCSRs.push_back(AArch64::GPR64commonRegClass.getRegister(i)); + } + } + // Register lists are zero-terminated. + UpdatedCSRs.push_back(0); + return; +} + const TargetRegisterClass * AArch64RegisterInfo::getSubClassWithSubReg(const TargetRegisterClass *RC, unsigned Idx) const { @@ -122,6 +139,27 @@ return CSR_AArch64_TLS_ELF_RegMask; } +void AArch64RegisterInfo::getCustomCallPreservedMask( + const MachineFunction &MF, + const uint32_t *Mask, + uint32_t *UpdatedMask) const { + unsigned RegMaskSize = MachineOperand::getRegMaskSize(getNumRegs()); + memcpy(UpdatedMask, Mask, sizeof(Mask[0]) * RegMaskSize); + + for (size_t i = 0; i < AArch64::GPR64commonRegClass.getNumRegs(); ++i) { + if (MF.getSubtarget().isXRegCustomCalleeSaved(i)) { + for (MCSubRegIterator SubReg(AArch64::GPR64commonRegClass.getRegister(i), + this, true); + SubReg.isValid(); ++SubReg) { + // See TargetRegisterInfo::getCallPreserverMask for how to interpret the + // register mask. + UpdatedMask[*SubReg / 32] |= 1u << (*SubReg % 32); + } + } + } + return; +} + const uint32_t * AArch64RegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF, CallingConv::ID CC) const { Index: lib/Target/AArch64/AArch64Subtarget.h =================================================================== --- lib/Target/AArch64/AArch64Subtarget.h +++ lib/Target/AArch64/AArch64Subtarget.h @@ -141,6 +141,9 @@ // ReserveXRegister[i] - X#i is not available as a general purpose register. BitVector ReserveXRegister; + // CustomCallUsedXRegister[i] - X#i call saved. + BitVector CustomCallSavedXRegs; + bool IsLittle; /// TargetTriple - What processor and OS we're targeting. @@ -228,6 +231,10 @@ bool isXRegisterReserved(size_t i) const { return ReserveXRegister[i]; } unsigned getNumXRegisterReserved() const { return ReserveXRegister.count(); } + bool isXRegCustomCalleeSaved(size_t i) const { + return CustomCallSavedXRegs[i]; + } + bool hasCustomCallingConv() const { return CustomCallSavedXRegs.any(); } bool hasFPARMv8() const { return HasFPARMv8; } bool hasNEON() const { return HasNEON; } bool hasCrypto() const { return HasCrypto; } Index: lib/Target/AArch64/AArch64Subtarget.cpp =================================================================== --- lib/Target/AArch64/AArch64Subtarget.cpp +++ lib/Target/AArch64/AArch64Subtarget.cpp @@ -153,6 +153,7 @@ const TargetMachine &TM, bool LittleEndian) : AArch64GenSubtargetInfo(TT, CPU, FS), ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()), + CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()), IsLittle(LittleEndian), TargetTriple(TT), FrameLowering(), InstrInfo(initializeSubtargetDependencies(FS, CPU)), TSInfo(), Index: test/CodeGen/AArch64/arm64-custom-call-saved-reg.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/arm64-custom-call-saved-reg.ll @@ -0,0 +1,141 @@ +; RUN: llc -mtriple=arm64-linux-gnu -mattr=+call-saved-x8 -o - %s \ +; RUN: | FileCheck %s --check-prefix=CHECK-SAVED-X8 + +; RUN: llc -mtriple=arm64-linux-gnu -mattr=+call-saved-x9 -o - %s \ +; RUN: | FileCheck %s --check-prefix=CHECK-SAVED-X9 + +; RUN: llc -mtriple=arm64-linux-gnu -mattr=+call-saved-x10 -o - %s \ +; RUN: | FileCheck %s --check-prefix=CHECK-SAVED-X10 + +; RUN: llc -mtriple=arm64-linux-gnu -mattr=+call-saved-x11 -o - %s \ +; RUN: | FileCheck %s --check-prefix=CHECK-SAVED-X11 + +; RUN: llc -mtriple=arm64-linux-gnu -mattr=+call-saved-x12 -o - %s \ +; RUN: | FileCheck %s --check-prefix=CHECK-SAVED-X12 + +; RUN: llc -mtriple=arm64-linux-gnu -mattr=+call-saved-x13 -o - %s \ +; RUN: | FileCheck %s --check-prefix=CHECK-SAVED-X13 + +; RUN: llc -mtriple=arm64-linux-gnu -mattr=+call-saved-x14 -o - %s \ +; RUN: | FileCheck %s --check-prefix=CHECK-SAVED-X14 + +; RUN: llc -mtriple=arm64-linux-gnu -mattr=+call-saved-x15 -o - %s \ +; RUN: | FileCheck %s --check-prefix=CHECK-SAVED-X15 + +; RUN: llc -mtriple=arm64-linux-gnu -mattr=+call-saved-x18 -o - %s \ +; RUN: | FileCheck %s --check-prefix=CHECK-SAVED-X18 + +; Test all call-saved-x# options together. +; RUN: llc -mtriple=arm64-linux-gnu \ +; RUN: -mattr=+call-saved-x8 \ +; RUN: -mattr=+call-saved-x9 \ +; RUN: -mattr=+call-saved-x10 \ +; RUN: -mattr=+call-saved-x11 \ +; RUN: -mattr=+call-saved-x12 \ +; RUN: -mattr=+call-saved-x13 \ +; RUN: -mattr=+call-saved-x14 \ +; RUN: -mattr=+call-saved-x15 \ +; RUN: -mattr=+call-saved-x18 \ +; RUN: -o - %s | FileCheck %s \ +; RUN: --check-prefix=CHECK-SAVED-ALL + +; Test GlobalISel. +; RUN: llc -mtriple=arm64-linux-gnu \ +; RUN: -mattr=+call-saved-x8 \ +; RUN: -mattr=+call-saved-x9 \ +; RUN: -mattr=+call-saved-x10 \ +; RUN: -mattr=+call-saved-x11 \ +; RUN: -mattr=+call-saved-x12 \ +; RUN: -mattr=+call-saved-x13 \ +; RUN: -mattr=+call-saved-x14 \ +; RUN: -mattr=+call-saved-x15 \ +; RUN: -mattr=+call-saved-x18 \ +; RUN: -global-isel \ +; RUN: -o - %s | FileCheck %s \ +; RUN: --check-prefix=CHECK-SAVED-ALL + +; Used to exhaust the supply of GPRs. +@var = global [30 x i64] zeroinitializer + +; Check that callee preserves additional CSRs. +define void @callee() { +; CHECK-LABEL: callee + +; CHECK-SAVED-X8: str x8, [sp +; CHECK-SAVED-X9: str x9, [sp +; CHECK-SAVED-X10: str x10, [sp +; CHECK-SAVED-X11: str x11, [sp +; CHECK-SAVED-X12: str x12, [sp +; CHECK-SAVED-X13: str x13, [sp +; CHECK-SAVED-X14: str x14, [sp +; CHECK-SAVED-X15: str x15, [sp +; CHECK-SAVED-X18: str x18, [sp + +; CHECK-SAVED-ALL: str x18, [sp +; CHECK-SAVED-ALL-NEXT: stp x15, x14, [sp +; CHECK-SAVED-ALL-NEXT: stp x13, x12, [sp +; CHECK-SAVED-ALL-NEXT: stp x11, x10, [sp +; CHECK-SAVED-ALL-NEXT: stp x9, x8, [sp + + %val = load volatile [30 x i64], [30 x i64]* @var + store volatile [30 x i64] %val, [30 x i64]* @var + +; CHECK-SAVED-ALL: ldp x9, x8, [sp +; CHECK-SAVED-ALL-NEXT: ldp x11, x10, [sp +; CHECK-SAVED-ALL-NEXT: ldp x13, x12, [sp +; CHECK-SAVED-ALL-NEXT: ldp x15, x14, [sp +; CHECK-SAVED-ALL-NEXT: ldr x18, [sp + +; CHECK-SAVED-X8: ldr x8, [sp +; CHECK-SAVED-X9: ldr x9, [sp +; CHECK-SAVED-X10: ldr x10, [sp +; CHECK-SAVED-X11: ldr x11, [sp +; CHECK-SAVED-X12: ldr x12, [sp +; CHECK-SAVED-X13: ldr x13, [sp +; CHECK-SAVED-X14: ldr x14, [sp +; CHECK-SAVED-X15: ldr x15, [sp +; CHECK-SAVED-X18: ldr x18, [sp + + ret void +} + +; Check that caller doesn't shy away from allocating additional CSRs. +define void @caller() { +; CHECK-LABEL: caller + + %val = load volatile [30 x i64], [30 x i64]* @var +; CHECK-SAVED-X8: adrp x8, var +; CHECK-SAVED-X9: adrp x9, var +; CHECK-SAVED-X10: adrp x10, var +; CHECK-SAVED-X11: adrp x11, var +; CHECK-SAVED-X12: adrp x12, var +; CHECK-SAVED-X13: adrp x13, var +; CHECK-SAVED-X14: adrp x14, var +; CHECK-SAVED-X15: adrp x15, var +; CHECK-SAVED-X18: adrp x18, var + +; CHECK-SAVED-ALL: adrp x8, var +; CHECK-SAVED-ALL-DAG: ldr x9 +; CHECK-SAVED-ALL-DAG: ldr x10 +; CHECK-SAVED-ALL-DAG: ldr x11 +; CHECK-SAVED-ALL-DAG: ldr x12 +; CHECK-SAVED-ALL-DAG: ldr x13 +; CHECK-SAVED-ALL-DAG: ldr x14 +; CHECK-SAVED-ALL-DAG: ldr x15 +; CHECK-SAVED-ALL-DAG: ldr x18 + + call void @callee() +; CHECK: bl callee + + store volatile [30 x i64] %val, [30 x i64]* @var +; CHECK-SAVED-ALL-DAG: str x9 +; CHECK-SAVED-ALL-DAG: str x10 +; CHECK-SAVED-ALL-DAG: str x11 +; CHECK-SAVED-ALL-DAG: str x12 +; CHECK-SAVED-ALL-DAG: str x13 +; CHECK-SAVED-ALL-DAG: str x14 +; CHECK-SAVED-ALL-DAG: str x15 +; CHECK-SAVED-ALL-DAG: str x18 + + ret void +}