diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2977,7 +2977,7 @@ def fzero_call_used_regs_EQ : Joined<["-"], "fzero-call-used-regs=">, Group, Flags<[CC1Option]>, - HelpText<"Clear call-used registers upon function return.">, + HelpText<"Clear call-used registers upon function return (AArch64/x86 only)">, Values<"skip,used-gpr-arg,used-gpr,used-arg,used,all-gpr-arg,all-gpr,all-arg,all">, NormalizedValues<["Skip", "UsedGPRArg", "UsedGPR", "UsedArg", "Used", "AllGPRArg", "AllGPR", "AllArg", "All"]>, diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5987,7 +5987,7 @@ // FIXME: There's no reason for this to be restricted to X86. The backend // code needs to be changed to include the appropriate function calls // automatically. - if (!Triple.isX86()) + if (!Triple.isX86() && !Triple.isAArch64()) D.Diag(diag::err_drv_unsupported_opt_for_target) << A->getAsString(Args) << TripleStr; } diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h @@ -153,6 +153,10 @@ MachineBasicBlock::iterator MBBI) const; void emitCalleeSavedSVERestores(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const; + + /// Emit target zero call-used regs. + void emitZeroCallUsedRegs(BitVector RegsToZero, + MachineBasicBlock &MBB) const override; }; } // End llvm namespace diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -675,6 +675,138 @@ emitCalleeSavedRestores(MBB, MBBI, true); } +static MCRegister getRegisterOrZero(MCRegister Reg, bool HasSVE) { + switch (Reg.id()) { + default: + // The called routine is expected to preserve r19-r28 + // r29 and r30 are used as frame pointer and link register resp. + return 0; + + // GPRs +#define CASE(n) \ + case AArch64::W##n: \ + case AArch64::X##n: \ + return AArch64::X##n + CASE(0); + CASE(1); + CASE(2); + CASE(3); + CASE(4); + CASE(5); + CASE(6); + CASE(7); + CASE(8); + CASE(9); + CASE(10); + CASE(11); + CASE(12); + CASE(13); + CASE(14); + CASE(15); + CASE(16); + CASE(17); + CASE(18); +#undef CASE + + // FPRs +#define CASE(n) \ + case AArch64::B##n: \ + case AArch64::H##n: \ + case AArch64::S##n: \ + case AArch64::D##n: \ + case AArch64::Q##n: \ + return HasSVE ? AArch64::Z##n : AArch64::Q##n + CASE(0); + CASE(1); + CASE(2); + CASE(3); + CASE(4); + CASE(5); + CASE(6); + CASE(7); + CASE(8); + CASE(9); + CASE(10); + CASE(11); + CASE(12); + CASE(13); + CASE(14); + CASE(15); + CASE(16); + CASE(17); + CASE(18); + CASE(19); + CASE(20); + CASE(21); + CASE(22); + CASE(23); + CASE(24); + CASE(25); + CASE(26); + CASE(27); + CASE(28); + CASE(29); + CASE(30); + CASE(31); +#undef CASE + } +} + +void AArch64FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero, + MachineBasicBlock &MBB) const { + // Insertion point. + MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); + + // Fake a debug loc. + DebugLoc DL; + if (MBBI != MBB.end()) + DL = MBBI->getDebugLoc(); + + const MachineFunction &MF = *MBB.getParent(); + const AArch64Subtarget &STI = MF.getSubtarget(); + const AArch64RegisterInfo &TRI = *STI.getRegisterInfo(); + + BitVector GPRsToZero(TRI.getNumRegs()); + BitVector FPRsToZero(TRI.getNumRegs()); + bool HasSVE = STI.hasSVE(); + for (MCRegister Reg : RegsToZero.set_bits()) { + if (TRI.isGeneralPurposeRegister(MF, Reg)) { + // For GPRs, we only care to clear out the 64-bit register. + if (MCRegister XReg = getRegisterOrZero(Reg, HasSVE)) + GPRsToZero.set(XReg); + } else if (AArch64::FPR128RegClass.contains(Reg) || + AArch64::FPR64RegClass.contains(Reg) || + AArch64::FPR32RegClass.contains(Reg) || + AArch64::FPR16RegClass.contains(Reg) || + AArch64::FPR8RegClass.contains(Reg)) { + // For FPRs, + if (MCRegister XReg = getRegisterOrZero(Reg, HasSVE)) + FPRsToZero.set(XReg); + } + } + + const AArch64InstrInfo &TII = *STI.getInstrInfo(); + + // Zero out GPRs. + for (MCRegister Reg : GPRsToZero.set_bits()) + BuildMI(MBB, MBBI, DL, TII.get(AArch64::MOVi64imm), Reg).addImm(0); + + // Zero out FP/vector registers. + for (MCRegister Reg : FPRsToZero.set_bits()) + BuildMI(MBB, MBBI, DL, TII.get(AArch64::MOVID), Reg).addImm(0); + + if (STI.hasSVE()) { + for (MCRegister PReg : + {AArch64::P0, AArch64::P1, AArch64::P2, AArch64::P3, AArch64::P4, + AArch64::P5, AArch64::P6, AArch64::P7, AArch64::P8, AArch64::P9, + AArch64::P10, AArch64::P11, AArch64::P12, AArch64::P13, AArch64::P14, + AArch64::P15}) { + if (RegsToZero[PReg]) + BuildMI(MBB, MBBI, DL, TII.get(AArch64::PFALSE), PReg); + } + } +} + // Find a scratch register that we can use at the start of the prologue to // re-align the stack pointer. We avoid using callee-save registers since they // may appear to be free when this is called from canUseAsPrologue (during diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h @@ -120,6 +120,9 @@ bool hasBasePointer(const MachineFunction &MF) const; unsigned getBaseRegister() const; + bool isArgumentRegister(const MachineFunction &MF, + MCRegister Reg) const override; + // Debug information queries. Register getFrameRegister(const MachineFunction &MF) const override; diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -33,6 +33,8 @@ using namespace llvm; +#define GET_CC_REGISTER_LISTS +#include "AArch64GenCallingConv.inc" #define GET_REGINFO_TARGET_DESC #include "AArch64GenRegisterInfo.inc" @@ -418,6 +420,68 @@ return false; } +bool AArch64RegisterInfo::isArgumentRegister(const MachineFunction &MF, + MCRegister Reg) const { + CallingConv::ID CC = MF.getFunction().getCallingConv(); + const AArch64Subtarget &STI = MF.getSubtarget(); + bool IsVarArg = STI.isCallingConvWin64(MF.getFunction().getCallingConv()); + + auto HasReg = [](ArrayRef RegList, MCRegister Reg) { + return llvm::any_of(RegList, + [Reg](const MCRegister R) { return R == Reg; }); + }; + + switch (CC) { + default: + report_fatal_error("Unsupported calling convention."); + case CallingConv::WebKit_JS: + return HasReg(CC_AArch64_WebKit_JS_ArgRegs, Reg); + case CallingConv::GHC: + return HasReg(CC_AArch64_GHC_ArgRegs, Reg); + case CallingConv::C: + case CallingConv::Fast: + case CallingConv::PreserveMost: + case CallingConv::CXX_FAST_TLS: + case CallingConv::Swift: + case CallingConv::SwiftTail: + case CallingConv::Tail: + if (STI.isTargetWindows() && IsVarArg) + return HasReg(CC_AArch64_Win64_VarArg_ArgRegs, Reg); + if (!STI.isTargetDarwin()) { + switch (CC) { + default: + return HasReg(CC_AArch64_AAPCS_ArgRegs, Reg); + case CallingConv::Swift: + case CallingConv::SwiftTail: + return HasReg(CC_AArch64_AAPCS_ArgRegs, Reg) || + HasReg(CC_AArch64_AAPCS_Swift_ArgRegs, Reg); + } + } + if (!IsVarArg) { + switch (CC) { + default: + return HasReg(CC_AArch64_DarwinPCS_ArgRegs, Reg); + case CallingConv::Swift: + case CallingConv::SwiftTail: + return HasReg(CC_AArch64_DarwinPCS_ArgRegs, Reg) || + HasReg(CC_AArch64_DarwinPCS_Swift_ArgRegs, Reg); + } + } + if (STI.isTargetILP32()) + return HasReg(CC_AArch64_DarwinPCS_ILP32_VarArg_ArgRegs, Reg); + return HasReg(CC_AArch64_DarwinPCS_VarArg_ArgRegs, Reg); + case CallingConv::Win64: + if (IsVarArg) + HasReg(CC_AArch64_Win64_VarArg_ArgRegs, Reg); + return HasReg(CC_AArch64_AAPCS_ArgRegs, Reg); + case CallingConv::CFGuard_Check: + return HasReg(CC_AArch64_Win64_CFGuard_Check_ArgRegs, Reg); + case CallingConv::AArch64_VectorCall: + case CallingConv::AArch64_SVE_VectorCall: + return HasReg(CC_AArch64_AAPCS_ArgRegs, Reg); + } +} + Register AArch64RegisterInfo::getFrameRegister(const MachineFunction &MF) const { const AArch64FrameLowering *TFI = getFrameLowering(MF); diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td @@ -1385,3 +1385,12 @@ return AArch64SVCR::lookupSVCRByEncoding(MCOp.getImm()) != nullptr; }]; } + +//===----------------------------------------------------------------------===// +// Register categories. +// + +def GeneralPurposeRegisters : RegisterCategory<[GPR64, GPR32]>; + +def FIXED_REGS : RegisterClass<"AArch64", [i64], 64, (add FP, SP, VG, FFR)>; +def FixedRegisters : RegisterCategory<[CCR, FIXED_REGS]>; diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp --- a/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -656,7 +656,7 @@ [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); })) return true; - return false; + return X86GenRegisterInfo::isArgumentRegister(MF, Reg); } bool X86RegisterInfo::isFixedRegister(const MachineFunction &MF, diff --git a/llvm/test/CodeGen/AArch64/zero-call-used-regs.ll b/llvm/test/CodeGen/AArch64/zero-call-used-regs.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/zero-call-used-regs.ll @@ -0,0 +1,809 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s --check-prefix=DEFAULT +; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mattr=+sve | FileCheck %s --check-prefix=SVE + +@result = dso_local global i32 0, align 4 + +define dso_local i32 @skip(i32 noundef %a, i32 noundef %b, i32 noundef %c) local_unnamed_addr #0 "zero-call-used-regs"="skip" { +; DEFAULT-LABEL: skip: +; DEFAULT: // %bb.0: // %entry +; DEFAULT-NEXT: mul w8, w1, w0 +; DEFAULT-NEXT: orr w0, w8, w2 +; DEFAULT-NEXT: ret +; +; SVE-LABEL: skip: +; SVE: // %bb.0: // %entry +; SVE-NEXT: mul w8, w1, w0 +; SVE-NEXT: orr w0, w8, w2 +; SVE-NEXT: ret + +entry: + %mul = mul nsw i32 %b, %a + %or = or i32 %mul, %c + ret i32 %or +} + +define dso_local i32 @used_gpr_arg(i32 noundef %a, i32 noundef %b, i32 noundef %c) local_unnamed_addr #0 noinline optnone "zero-call-used-regs"="used-gpr-arg" { +; DEFAULT-LABEL: used_gpr_arg: +; DEFAULT: // %bb.0: // %entry +; DEFAULT-NEXT: mul w8, w1, w0 +; DEFAULT-NEXT: orr w0, w8, w2 +; DEFAULT-NEXT: mov x1, #0 +; DEFAULT-NEXT: mov x2, #0 +; DEFAULT-NEXT: ret +; +; SVE-LABEL: used_gpr_arg: +; SVE: // %bb.0: // %entry +; SVE-NEXT: mul w8, w1, w0 +; SVE-NEXT: orr w0, w8, w2 +; SVE-NEXT: mov x1, #0 +; SVE-NEXT: mov x2, #0 +; SVE-NEXT: ret + +entry: + %mul = mul nsw i32 %b, %a + %or = or i32 %mul, %c + ret i32 %or +} + +define dso_local i32 @used_gpr(i32 noundef %a, i32 noundef %b, i32 noundef %c) local_unnamed_addr #0 noinline optnone "zero-call-used-regs"="used-gpr" { +; DEFAULT-LABEL: used_gpr: +; DEFAULT: // %bb.0: // %entry +; DEFAULT-NEXT: mul w8, w1, w0 +; DEFAULT-NEXT: orr w0, w8, w2 +; DEFAULT-NEXT: mov x1, #0 +; DEFAULT-NEXT: mov x2, #0 +; DEFAULT-NEXT: mov x8, #0 +; DEFAULT-NEXT: ret +; +; SVE-LABEL: used_gpr: +; SVE: // %bb.0: // %entry +; SVE-NEXT: mul w8, w1, w0 +; SVE-NEXT: orr w0, w8, w2 +; SVE-NEXT: mov x1, #0 +; SVE-NEXT: mov x2, #0 +; SVE-NEXT: mov x8, #0 +; SVE-NEXT: ret + +entry: + %mul = mul nsw i32 %b, %a + %or = or i32 %mul, %c + ret i32 %or +} + +define dso_local i32 @used_arg(i32 noundef %a, i32 noundef %b, i32 noundef %c) local_unnamed_addr #0 noinline optnone "zero-call-used-regs"="used-arg" { +; DEFAULT-LABEL: used_arg: +; DEFAULT: // %bb.0: // %entry +; DEFAULT-NEXT: mul w8, w1, w0 +; DEFAULT-NEXT: orr w0, w8, w2 +; DEFAULT-NEXT: mov x1, #0 +; DEFAULT-NEXT: mov x2, #0 +; DEFAULT-NEXT: ret +; +; SVE-LABEL: used_arg: +; SVE: // %bb.0: // %entry +; SVE-NEXT: mul w8, w1, w0 +; SVE-NEXT: orr w0, w8, w2 +; SVE-NEXT: mov x1, #0 +; SVE-NEXT: mov x2, #0 +; SVE-NEXT: ret + +entry: + %mul = mul nsw i32 %b, %a + %or = or i32 %mul, %c + ret i32 %or +} + +define dso_local i32 @used(i32 noundef %a, i32 noundef %b, i32 noundef %c) local_unnamed_addr #0 noinline optnone "zero-call-used-regs"="used" { +; DEFAULT-LABEL: used: +; DEFAULT: // %bb.0: // %entry +; DEFAULT-NEXT: mul w8, w1, w0 +; DEFAULT-NEXT: orr w0, w8, w2 +; DEFAULT-NEXT: mov x1, #0 +; DEFAULT-NEXT: mov x2, #0 +; DEFAULT-NEXT: mov x8, #0 +; DEFAULT-NEXT: ret +; +; SVE-LABEL: used: +; SVE: // %bb.0: // %entry +; SVE-NEXT: mul w8, w1, w0 +; SVE-NEXT: orr w0, w8, w2 +; SVE-NEXT: mov x1, #0 +; SVE-NEXT: mov x2, #0 +; SVE-NEXT: mov x8, #0 +; SVE-NEXT: ret + +entry: + %mul = mul nsw i32 %b, %a + %or = or i32 %mul, %c + ret i32 %or +} + +define dso_local i32 @all_gpr_arg(i32 noundef %a, i32 noundef %b, i32 noundef %c) local_unnamed_addr #0 "zero-call-used-regs"="all-gpr-arg" { +; DEFAULT-LABEL: all_gpr_arg: +; DEFAULT: // %bb.0: // %entry +; DEFAULT-NEXT: mul w8, w1, w0 +; DEFAULT-NEXT: mov x1, #0 +; DEFAULT-NEXT: mov x3, #0 +; DEFAULT-NEXT: mov x4, #0 +; DEFAULT-NEXT: orr w0, w8, w2 +; DEFAULT-NEXT: mov x2, #0 +; DEFAULT-NEXT: mov x5, #0 +; DEFAULT-NEXT: mov x6, #0 +; DEFAULT-NEXT: mov x7, #0 +; DEFAULT-NEXT: mov x8, #0 +; DEFAULT-NEXT: mov x18, #0 +; DEFAULT-NEXT: ret +; +; SVE-LABEL: all_gpr_arg: +; SVE: // %bb.0: // %entry +; SVE-NEXT: mul w8, w1, w0 +; SVE-NEXT: mov x1, #0 +; SVE-NEXT: mov x3, #0 +; SVE-NEXT: mov x4, #0 +; SVE-NEXT: orr w0, w8, w2 +; SVE-NEXT: mov x2, #0 +; SVE-NEXT: mov x5, #0 +; SVE-NEXT: mov x6, #0 +; SVE-NEXT: mov x7, #0 +; SVE-NEXT: mov x8, #0 +; SVE-NEXT: mov x18, #0 +; SVE-NEXT: ret + +entry: + %mul = mul nsw i32 %b, %a + %or = or i32 %mul, %c + ret i32 %or +} + +define dso_local i32 @all_gpr(i32 noundef %a, i32 noundef %b, i32 noundef %c) local_unnamed_addr #0 "zero-call-used-regs"="all-gpr" { +; DEFAULT-LABEL: all_gpr: +; DEFAULT: // %bb.0: // %entry +; DEFAULT-NEXT: mul w8, w1, w0 +; DEFAULT-NEXT: mov x1, #0 +; DEFAULT-NEXT: mov x3, #0 +; DEFAULT-NEXT: mov x4, #0 +; DEFAULT-NEXT: orr w0, w8, w2 +; DEFAULT-NEXT: mov x2, #0 +; DEFAULT-NEXT: mov x5, #0 +; DEFAULT-NEXT: mov x6, #0 +; DEFAULT-NEXT: mov x7, #0 +; DEFAULT-NEXT: mov x8, #0 +; DEFAULT-NEXT: mov x9, #0 +; DEFAULT-NEXT: mov x10, #0 +; DEFAULT-NEXT: mov x11, #0 +; DEFAULT-NEXT: mov x12, #0 +; DEFAULT-NEXT: mov x13, #0 +; DEFAULT-NEXT: mov x14, #0 +; DEFAULT-NEXT: mov x15, #0 +; DEFAULT-NEXT: mov x16, #0 +; DEFAULT-NEXT: mov x17, #0 +; DEFAULT-NEXT: mov x18, #0 +; DEFAULT-NEXT: ret +; +; SVE-LABEL: all_gpr: +; SVE: // %bb.0: // %entry +; SVE-NEXT: mul w8, w1, w0 +; SVE-NEXT: mov x1, #0 +; SVE-NEXT: mov x3, #0 +; SVE-NEXT: mov x4, #0 +; SVE-NEXT: orr w0, w8, w2 +; SVE-NEXT: mov x2, #0 +; SVE-NEXT: mov x5, #0 +; SVE-NEXT: mov x6, #0 +; SVE-NEXT: mov x7, #0 +; SVE-NEXT: mov x8, #0 +; SVE-NEXT: mov x9, #0 +; SVE-NEXT: mov x10, #0 +; SVE-NEXT: mov x11, #0 +; SVE-NEXT: mov x12, #0 +; SVE-NEXT: mov x13, #0 +; SVE-NEXT: mov x14, #0 +; SVE-NEXT: mov x15, #0 +; SVE-NEXT: mov x16, #0 +; SVE-NEXT: mov x17, #0 +; SVE-NEXT: mov x18, #0 +; SVE-NEXT: ret + +entry: + %mul = mul nsw i32 %b, %a + %or = or i32 %mul, %c + ret i32 %or +} + +define dso_local i32 @all_arg(i32 noundef %a, i32 noundef %b, i32 noundef %c) local_unnamed_addr #0 "zero-call-used-regs"="all-arg" { +; DEFAULT-LABEL: all_arg: +; DEFAULT: // %bb.0: // %entry +; DEFAULT-NEXT: mul w8, w1, w0 +; DEFAULT-NEXT: mov x1, #0 +; DEFAULT-NEXT: mov x3, #0 +; DEFAULT-NEXT: mov x4, #0 +; DEFAULT-NEXT: orr w0, w8, w2 +; DEFAULT-NEXT: mov x2, #0 +; DEFAULT-NEXT: mov x5, #0 +; DEFAULT-NEXT: mov x6, #0 +; DEFAULT-NEXT: mov x7, #0 +; DEFAULT-NEXT: mov x8, #0 +; DEFAULT-NEXT: mov x18, #0 +; DEFAULT-NEXT: movi q0, #0000000000000000 +; DEFAULT-NEXT: movi q1, #0000000000000000 +; DEFAULT-NEXT: movi q2, #0000000000000000 +; DEFAULT-NEXT: movi q3, #0000000000000000 +; DEFAULT-NEXT: movi q4, #0000000000000000 +; DEFAULT-NEXT: movi q5, #0000000000000000 +; DEFAULT-NEXT: movi q6, #0000000000000000 +; DEFAULT-NEXT: movi q7, #0000000000000000 +; DEFAULT-NEXT: ret +; +; SVE-LABEL: all_arg: +; SVE: // %bb.0: // %entry +; SVE-NEXT: mul w8, w1, w0 +; SVE-NEXT: mov x1, #0 +; SVE-NEXT: mov x3, #0 +; SVE-NEXT: mov x4, #0 +; SVE-NEXT: orr w0, w8, w2 +; SVE-NEXT: mov x2, #0 +; SVE-NEXT: mov x5, #0 +; SVE-NEXT: mov x6, #0 +; SVE-NEXT: mov x7, #0 +; SVE-NEXT: mov x8, #0 +; SVE-NEXT: mov x18, #0 +; SVE-NEXT: movi z0, #0000000000000000 +; SVE-NEXT: movi z1, #0000000000000000 +; SVE-NEXT: movi z2, #0000000000000000 +; SVE-NEXT: movi z3, #0000000000000000 +; SVE-NEXT: movi z4, #0000000000000000 +; SVE-NEXT: movi z5, #0000000000000000 +; SVE-NEXT: movi z6, #0000000000000000 +; SVE-NEXT: movi z7, #0000000000000000 +; SVE-NEXT: pfalse p0.b +; SVE-NEXT: pfalse p1.b +; SVE-NEXT: pfalse p2.b +; SVE-NEXT: pfalse p3.b +; SVE-NEXT: ret + +entry: + %mul = mul nsw i32 %b, %a + %or = or i32 %mul, %c + ret i32 %or +} + +define dso_local i32 @all(i32 noundef %a, i32 noundef %b, i32 noundef %c) local_unnamed_addr #0 "zero-call-used-regs"="all" { +; DEFAULT-LABEL: all: +; DEFAULT: // %bb.0: // %entry +; DEFAULT-NEXT: mul w8, w1, w0 +; DEFAULT-NEXT: mov x1, #0 +; DEFAULT-NEXT: mov x3, #0 +; DEFAULT-NEXT: mov x4, #0 +; DEFAULT-NEXT: orr w0, w8, w2 +; DEFAULT-NEXT: mov x2, #0 +; DEFAULT-NEXT: mov x5, #0 +; DEFAULT-NEXT: mov x6, #0 +; DEFAULT-NEXT: mov x7, #0 +; DEFAULT-NEXT: mov x8, #0 +; DEFAULT-NEXT: mov x9, #0 +; DEFAULT-NEXT: mov x10, #0 +; DEFAULT-NEXT: mov x11, #0 +; DEFAULT-NEXT: mov x12, #0 +; DEFAULT-NEXT: mov x13, #0 +; DEFAULT-NEXT: mov x14, #0 +; DEFAULT-NEXT: mov x15, #0 +; DEFAULT-NEXT: mov x16, #0 +; DEFAULT-NEXT: mov x17, #0 +; DEFAULT-NEXT: mov x18, #0 +; DEFAULT-NEXT: movi q0, #0000000000000000 +; DEFAULT-NEXT: movi q1, #0000000000000000 +; DEFAULT-NEXT: movi q2, #0000000000000000 +; DEFAULT-NEXT: movi q3, #0000000000000000 +; DEFAULT-NEXT: movi q4, #0000000000000000 +; DEFAULT-NEXT: movi q5, #0000000000000000 +; DEFAULT-NEXT: movi q6, #0000000000000000 +; DEFAULT-NEXT: movi q7, #0000000000000000 +; DEFAULT-NEXT: movi q8, #0000000000000000 +; DEFAULT-NEXT: movi q9, #0000000000000000 +; DEFAULT-NEXT: movi q10, #0000000000000000 +; DEFAULT-NEXT: movi q11, #0000000000000000 +; DEFAULT-NEXT: movi q12, #0000000000000000 +; DEFAULT-NEXT: movi q13, #0000000000000000 +; DEFAULT-NEXT: movi q14, #0000000000000000 +; DEFAULT-NEXT: movi q15, #0000000000000000 +; DEFAULT-NEXT: movi q16, #0000000000000000 +; DEFAULT-NEXT: movi q17, #0000000000000000 +; DEFAULT-NEXT: movi q18, #0000000000000000 +; DEFAULT-NEXT: movi q19, #0000000000000000 +; DEFAULT-NEXT: movi q20, #0000000000000000 +; DEFAULT-NEXT: movi q21, #0000000000000000 +; DEFAULT-NEXT: movi q22, #0000000000000000 +; DEFAULT-NEXT: movi q23, #0000000000000000 +; DEFAULT-NEXT: movi q24, #0000000000000000 +; DEFAULT-NEXT: movi q25, #0000000000000000 +; DEFAULT-NEXT: movi q26, #0000000000000000 +; DEFAULT-NEXT: movi q27, #0000000000000000 +; DEFAULT-NEXT: movi q28, #0000000000000000 +; DEFAULT-NEXT: movi q29, #0000000000000000 +; DEFAULT-NEXT: movi q30, #0000000000000000 +; DEFAULT-NEXT: movi q31, #0000000000000000 +; DEFAULT-NEXT: ret +; +; SVE-LABEL: all: +; SVE: // %bb.0: // %entry +; SVE-NEXT: mul w8, w1, w0 +; SVE-NEXT: mov x1, #0 +; SVE-NEXT: mov x3, #0 +; SVE-NEXT: mov x4, #0 +; SVE-NEXT: orr w0, w8, w2 +; SVE-NEXT: mov x2, #0 +; SVE-NEXT: mov x5, #0 +; SVE-NEXT: mov x6, #0 +; SVE-NEXT: mov x7, #0 +; SVE-NEXT: mov x8, #0 +; SVE-NEXT: mov x9, #0 +; SVE-NEXT: mov x10, #0 +; SVE-NEXT: mov x11, #0 +; SVE-NEXT: mov x12, #0 +; SVE-NEXT: mov x13, #0 +; SVE-NEXT: mov x14, #0 +; SVE-NEXT: mov x15, #0 +; SVE-NEXT: mov x16, #0 +; SVE-NEXT: mov x17, #0 +; SVE-NEXT: mov x18, #0 +; SVE-NEXT: movi z0, #0000000000000000 +; SVE-NEXT: movi z1, #0000000000000000 +; SVE-NEXT: movi z2, #0000000000000000 +; SVE-NEXT: movi z3, #0000000000000000 +; SVE-NEXT: movi z4, #0000000000000000 +; SVE-NEXT: movi z5, #0000000000000000 +; SVE-NEXT: movi z6, #0000000000000000 +; SVE-NEXT: movi z7, #0000000000000000 +; SVE-NEXT: movi z8, #0000000000000000 +; SVE-NEXT: movi z9, #0000000000000000 +; SVE-NEXT: movi z10, #0000000000000000 +; SVE-NEXT: movi z11, #0000000000000000 +; SVE-NEXT: movi z12, #0000000000000000 +; SVE-NEXT: movi z13, #0000000000000000 +; SVE-NEXT: movi z14, #0000000000000000 +; SVE-NEXT: movi z15, #0000000000000000 +; SVE-NEXT: movi z16, #0000000000000000 +; SVE-NEXT: movi z17, #0000000000000000 +; SVE-NEXT: movi z18, #0000000000000000 +; SVE-NEXT: movi z19, #0000000000000000 +; SVE-NEXT: movi z20, #0000000000000000 +; SVE-NEXT: movi z21, #0000000000000000 +; SVE-NEXT: movi z22, #0000000000000000 +; SVE-NEXT: movi z23, #0000000000000000 +; SVE-NEXT: movi z24, #0000000000000000 +; SVE-NEXT: movi z25, #0000000000000000 +; SVE-NEXT: movi z26, #0000000000000000 +; SVE-NEXT: movi z27, #0000000000000000 +; SVE-NEXT: movi z28, #0000000000000000 +; SVE-NEXT: movi z29, #0000000000000000 +; SVE-NEXT: movi z30, #0000000000000000 +; SVE-NEXT: movi z31, #0000000000000000 +; SVE-NEXT: pfalse p0.b +; SVE-NEXT: pfalse p1.b +; SVE-NEXT: pfalse p2.b +; SVE-NEXT: pfalse p3.b +; SVE-NEXT: pfalse p4.b +; SVE-NEXT: pfalse p5.b +; SVE-NEXT: pfalse p6.b +; SVE-NEXT: pfalse p7.b +; SVE-NEXT: pfalse p8.b +; SVE-NEXT: pfalse p9.b +; SVE-NEXT: pfalse p10.b +; SVE-NEXT: pfalse p11.b +; SVE-NEXT: pfalse p12.b +; SVE-NEXT: pfalse p13.b +; SVE-NEXT: pfalse p14.b +; SVE-NEXT: pfalse p15.b +; SVE-NEXT: ret + +entry: + %mul = mul nsw i32 %b, %a + %or = or i32 %mul, %c + ret i32 %or +} + +define dso_local double @skip_float(double noundef %a, float noundef %b) local_unnamed_addr #0 "zero-call-used-regs"="skip" { +; DEFAULT-LABEL: skip_float: +; DEFAULT: // %bb.0: // %entry +; DEFAULT-NEXT: fcvt d1, s1 +; DEFAULT-NEXT: fmul d0, d1, d0 +; DEFAULT-NEXT: ret +; +; SVE-LABEL: skip_float: +; SVE: // %bb.0: // %entry +; SVE-NEXT: fcvt d1, s1 +; SVE-NEXT: fmul d0, d1, d0 +; SVE-NEXT: ret + +entry: + %conv = fpext float %b to double + %mul = fmul double %conv, %a + ret double %mul +} + +define dso_local double @used_gpr_arg_float(double noundef %a, float noundef %b) local_unnamed_addr #0 noinline optnone "zero-call-used-regs"="used-gpr-arg" { +; DEFAULT-LABEL: used_gpr_arg_float: +; DEFAULT: // %bb.0: // %entry +; DEFAULT-NEXT: fcvt d1, s1 +; DEFAULT-NEXT: fmul d0, d1, d0 +; DEFAULT-NEXT: ret +; +; SVE-LABEL: used_gpr_arg_float: +; SVE: // %bb.0: // %entry +; SVE-NEXT: fcvt d1, s1 +; SVE-NEXT: fmul d0, d1, d0 +; SVE-NEXT: ret + +entry: + %conv = fpext float %b to double + %mul = fmul double %conv, %a + ret double %mul +} + +define dso_local double @used_gpr_float(double noundef %a, float noundef %b) local_unnamed_addr #0 noinline optnone "zero-call-used-regs"="used-gpr" { +; DEFAULT-LABEL: used_gpr_float: +; DEFAULT: // %bb.0: // %entry +; DEFAULT-NEXT: fcvt d1, s1 +; DEFAULT-NEXT: fmul d0, d1, d0 +; DEFAULT-NEXT: ret +; +; SVE-LABEL: used_gpr_float: +; SVE: // %bb.0: // %entry +; SVE-NEXT: fcvt d1, s1 +; SVE-NEXT: fmul d0, d1, d0 +; SVE-NEXT: ret + +entry: + %conv = fpext float %b to double + %mul = fmul double %conv, %a + ret double %mul +} + +define dso_local double @used_arg_float(double noundef %a, float noundef %b) local_unnamed_addr #0 noinline optnone "zero-call-used-regs"="used-arg" { +; DEFAULT-LABEL: used_arg_float: +; DEFAULT: // %bb.0: // %entry +; DEFAULT-NEXT: fcvt d1, s1 +; DEFAULT-NEXT: fmul d0, d1, d0 +; DEFAULT-NEXT: movi q1, #0000000000000000 +; DEFAULT-NEXT: ret +; +; SVE-LABEL: used_arg_float: +; SVE: // %bb.0: // %entry +; SVE-NEXT: fcvt d1, s1 +; SVE-NEXT: fmul d0, d1, d0 +; SVE-NEXT: movi z1, #0000000000000000 +; SVE-NEXT: ret + +entry: + %conv = fpext float %b to double + %mul = fmul double %conv, %a + ret double %mul +} + +define dso_local double @used_float(double noundef %a, float noundef %b) local_unnamed_addr #0 noinline optnone "zero-call-used-regs"="used" { +; DEFAULT-LABEL: used_float: +; DEFAULT: // %bb.0: // %entry +; DEFAULT-NEXT: fcvt d1, s1 +; DEFAULT-NEXT: fmul d0, d1, d0 +; DEFAULT-NEXT: movi q1, #0000000000000000 +; DEFAULT-NEXT: ret +; +; SVE-LABEL: used_float: +; SVE: // %bb.0: // %entry +; SVE-NEXT: fcvt d1, s1 +; SVE-NEXT: fmul d0, d1, d0 +; SVE-NEXT: movi z1, #0000000000000000 +; SVE-NEXT: ret + +entry: + %conv = fpext float %b to double + %mul = fmul double %conv, %a + ret double %mul +} + +define dso_local double @all_gpr_arg_float(double noundef %a, float noundef %b) local_unnamed_addr #0 noinline optnone "zero-call-used-regs"="all-gpr-arg" { +; DEFAULT-LABEL: all_gpr_arg_float: +; DEFAULT: // %bb.0: // %entry +; DEFAULT-NEXT: fcvt d1, s1 +; DEFAULT-NEXT: fmul d0, d1, d0 +; DEFAULT-NEXT: mov x0, #0 +; DEFAULT-NEXT: mov x1, #0 +; DEFAULT-NEXT: mov x2, #0 +; DEFAULT-NEXT: mov x3, #0 +; DEFAULT-NEXT: mov x4, #0 +; DEFAULT-NEXT: mov x5, #0 +; DEFAULT-NEXT: mov x6, #0 +; DEFAULT-NEXT: mov x7, #0 +; DEFAULT-NEXT: mov x8, #0 +; DEFAULT-NEXT: mov x18, #0 +; DEFAULT-NEXT: ret +; +; SVE-LABEL: all_gpr_arg_float: +; SVE: // %bb.0: // %entry +; SVE-NEXT: fcvt d1, s1 +; SVE-NEXT: fmul d0, d1, d0 +; SVE-NEXT: mov x0, #0 +; SVE-NEXT: mov x1, #0 +; SVE-NEXT: mov x2, #0 +; SVE-NEXT: mov x3, #0 +; SVE-NEXT: mov x4, #0 +; SVE-NEXT: mov x5, #0 +; SVE-NEXT: mov x6, #0 +; SVE-NEXT: mov x7, #0 +; SVE-NEXT: mov x8, #0 +; SVE-NEXT: mov x18, #0 +; SVE-NEXT: ret + +entry: + %conv = fpext float %b to double + %mul = fmul double %conv, %a + ret double %mul +} + +define dso_local double @all_gpr_float(double noundef %a, float noundef %b) local_unnamed_addr #0 noinline optnone "zero-call-used-regs"="all-gpr" { +; DEFAULT-LABEL: all_gpr_float: +; DEFAULT: // %bb.0: // %entry +; DEFAULT-NEXT: fcvt d1, s1 +; DEFAULT-NEXT: fmul d0, d1, d0 +; DEFAULT-NEXT: mov x0, #0 +; DEFAULT-NEXT: mov x1, #0 +; DEFAULT-NEXT: mov x2, #0 +; DEFAULT-NEXT: mov x3, #0 +; DEFAULT-NEXT: mov x4, #0 +; DEFAULT-NEXT: mov x5, #0 +; DEFAULT-NEXT: mov x6, #0 +; DEFAULT-NEXT: mov x7, #0 +; DEFAULT-NEXT: mov x8, #0 +; DEFAULT-NEXT: mov x9, #0 +; DEFAULT-NEXT: mov x10, #0 +; DEFAULT-NEXT: mov x11, #0 +; DEFAULT-NEXT: mov x12, #0 +; DEFAULT-NEXT: mov x13, #0 +; DEFAULT-NEXT: mov x14, #0 +; DEFAULT-NEXT: mov x15, #0 +; DEFAULT-NEXT: mov x16, #0 +; DEFAULT-NEXT: mov x17, #0 +; DEFAULT-NEXT: mov x18, #0 +; DEFAULT-NEXT: ret +; +; SVE-LABEL: all_gpr_float: +; SVE: // %bb.0: // %entry +; SVE-NEXT: fcvt d1, s1 +; SVE-NEXT: fmul d0, d1, d0 +; SVE-NEXT: mov x0, #0 +; SVE-NEXT: mov x1, #0 +; SVE-NEXT: mov x2, #0 +; SVE-NEXT: mov x3, #0 +; SVE-NEXT: mov x4, #0 +; SVE-NEXT: mov x5, #0 +; SVE-NEXT: mov x6, #0 +; SVE-NEXT: mov x7, #0 +; SVE-NEXT: mov x8, #0 +; SVE-NEXT: mov x9, #0 +; SVE-NEXT: mov x10, #0 +; SVE-NEXT: mov x11, #0 +; SVE-NEXT: mov x12, #0 +; SVE-NEXT: mov x13, #0 +; SVE-NEXT: mov x14, #0 +; SVE-NEXT: mov x15, #0 +; SVE-NEXT: mov x16, #0 +; SVE-NEXT: mov x17, #0 +; SVE-NEXT: mov x18, #0 +; SVE-NEXT: ret + +entry: + %conv = fpext float %b to double + %mul = fmul double %conv, %a + ret double %mul +} + +define dso_local double @all_arg_float(double noundef %a, float noundef %b) local_unnamed_addr #0 noinline optnone "zero-call-used-regs"="all-arg" { +; DEFAULT-LABEL: all_arg_float: +; DEFAULT: // %bb.0: // %entry +; DEFAULT-NEXT: fcvt d1, s1 +; DEFAULT-NEXT: fmul d0, d1, d0 +; DEFAULT-NEXT: mov x0, #0 +; DEFAULT-NEXT: mov x1, #0 +; DEFAULT-NEXT: mov x2, #0 +; DEFAULT-NEXT: mov x3, #0 +; DEFAULT-NEXT: mov x4, #0 +; DEFAULT-NEXT: mov x5, #0 +; DEFAULT-NEXT: mov x6, #0 +; DEFAULT-NEXT: mov x7, #0 +; DEFAULT-NEXT: mov x8, #0 +; DEFAULT-NEXT: mov x18, #0 +; DEFAULT-NEXT: movi q1, #0000000000000000 +; DEFAULT-NEXT: movi q2, #0000000000000000 +; DEFAULT-NEXT: movi q3, #0000000000000000 +; DEFAULT-NEXT: movi q4, #0000000000000000 +; DEFAULT-NEXT: movi q5, #0000000000000000 +; DEFAULT-NEXT: movi q6, #0000000000000000 +; DEFAULT-NEXT: movi q7, #0000000000000000 +; DEFAULT-NEXT: ret +; +; SVE-LABEL: all_arg_float: +; SVE: // %bb.0: // %entry +; SVE-NEXT: fcvt d1, s1 +; SVE-NEXT: fmul d0, d1, d0 +; SVE-NEXT: mov x0, #0 +; SVE-NEXT: mov x1, #0 +; SVE-NEXT: mov x2, #0 +; SVE-NEXT: mov x3, #0 +; SVE-NEXT: mov x4, #0 +; SVE-NEXT: mov x5, #0 +; SVE-NEXT: mov x6, #0 +; SVE-NEXT: mov x7, #0 +; SVE-NEXT: mov x8, #0 +; SVE-NEXT: mov x18, #0 +; SVE-NEXT: movi z1, #0000000000000000 +; SVE-NEXT: movi z2, #0000000000000000 +; SVE-NEXT: movi z3, #0000000000000000 +; SVE-NEXT: movi z4, #0000000000000000 +; SVE-NEXT: movi z5, #0000000000000000 +; SVE-NEXT: movi z6, #0000000000000000 +; SVE-NEXT: movi z7, #0000000000000000 +; SVE-NEXT: pfalse p0.b +; SVE-NEXT: pfalse p1.b +; SVE-NEXT: pfalse p2.b +; SVE-NEXT: pfalse p3.b +; SVE-NEXT: ret + +entry: + %conv = fpext float %b to double + %mul = fmul double %conv, %a + ret double %mul +} + +define dso_local double @all_float(double noundef %a, float noundef %b) local_unnamed_addr #0 noinline optnone "zero-call-used-regs"="all" { +; DEFAULT-LABEL: all_float: +; DEFAULT: // %bb.0: // %entry +; DEFAULT-NEXT: fcvt d1, s1 +; DEFAULT-NEXT: fmul d0, d1, d0 +; DEFAULT-NEXT: mov x0, #0 +; DEFAULT-NEXT: mov x1, #0 +; DEFAULT-NEXT: mov x2, #0 +; DEFAULT-NEXT: mov x3, #0 +; DEFAULT-NEXT: mov x4, #0 +; DEFAULT-NEXT: mov x5, #0 +; DEFAULT-NEXT: mov x6, #0 +; DEFAULT-NEXT: mov x7, #0 +; DEFAULT-NEXT: mov x8, #0 +; DEFAULT-NEXT: mov x9, #0 +; DEFAULT-NEXT: mov x10, #0 +; DEFAULT-NEXT: mov x11, #0 +; DEFAULT-NEXT: mov x12, #0 +; DEFAULT-NEXT: mov x13, #0 +; DEFAULT-NEXT: mov x14, #0 +; DEFAULT-NEXT: mov x15, #0 +; DEFAULT-NEXT: mov x16, #0 +; DEFAULT-NEXT: mov x17, #0 +; DEFAULT-NEXT: mov x18, #0 +; DEFAULT-NEXT: movi q1, #0000000000000000 +; DEFAULT-NEXT: movi q2, #0000000000000000 +; DEFAULT-NEXT: movi q3, #0000000000000000 +; DEFAULT-NEXT: movi q4, #0000000000000000 +; DEFAULT-NEXT: movi q5, #0000000000000000 +; DEFAULT-NEXT: movi q6, #0000000000000000 +; DEFAULT-NEXT: movi q7, #0000000000000000 +; DEFAULT-NEXT: movi q8, #0000000000000000 +; DEFAULT-NEXT: movi q9, #0000000000000000 +; DEFAULT-NEXT: movi q10, #0000000000000000 +; DEFAULT-NEXT: movi q11, #0000000000000000 +; DEFAULT-NEXT: movi q12, #0000000000000000 +; DEFAULT-NEXT: movi q13, #0000000000000000 +; DEFAULT-NEXT: movi q14, #0000000000000000 +; DEFAULT-NEXT: movi q15, #0000000000000000 +; DEFAULT-NEXT: movi q16, #0000000000000000 +; DEFAULT-NEXT: movi q17, #0000000000000000 +; DEFAULT-NEXT: movi q18, #0000000000000000 +; DEFAULT-NEXT: movi q19, #0000000000000000 +; DEFAULT-NEXT: movi q20, #0000000000000000 +; DEFAULT-NEXT: movi q21, #0000000000000000 +; DEFAULT-NEXT: movi q22, #0000000000000000 +; DEFAULT-NEXT: movi q23, #0000000000000000 +; DEFAULT-NEXT: movi q24, #0000000000000000 +; DEFAULT-NEXT: movi q25, #0000000000000000 +; DEFAULT-NEXT: movi q26, #0000000000000000 +; DEFAULT-NEXT: movi q27, #0000000000000000 +; DEFAULT-NEXT: movi q28, #0000000000000000 +; DEFAULT-NEXT: movi q29, #0000000000000000 +; DEFAULT-NEXT: movi q30, #0000000000000000 +; DEFAULT-NEXT: movi q31, #0000000000000000 +; DEFAULT-NEXT: ret +; +; SVE-LABEL: all_float: +; SVE: // %bb.0: // %entry +; SVE-NEXT: fcvt d1, s1 +; SVE-NEXT: fmul d0, d1, d0 +; SVE-NEXT: mov x0, #0 +; SVE-NEXT: mov x1, #0 +; SVE-NEXT: mov x2, #0 +; SVE-NEXT: mov x3, #0 +; SVE-NEXT: mov x4, #0 +; SVE-NEXT: mov x5, #0 +; SVE-NEXT: mov x6, #0 +; SVE-NEXT: mov x7, #0 +; SVE-NEXT: mov x8, #0 +; SVE-NEXT: mov x9, #0 +; SVE-NEXT: mov x10, #0 +; SVE-NEXT: mov x11, #0 +; SVE-NEXT: mov x12, #0 +; SVE-NEXT: mov x13, #0 +; SVE-NEXT: mov x14, #0 +; SVE-NEXT: mov x15, #0 +; SVE-NEXT: mov x16, #0 +; SVE-NEXT: mov x17, #0 +; SVE-NEXT: mov x18, #0 +; SVE-NEXT: movi z1, #0000000000000000 +; SVE-NEXT: movi z2, #0000000000000000 +; SVE-NEXT: movi z3, #0000000000000000 +; SVE-NEXT: movi z4, #0000000000000000 +; SVE-NEXT: movi z5, #0000000000000000 +; SVE-NEXT: movi z6, #0000000000000000 +; SVE-NEXT: movi z7, #0000000000000000 +; SVE-NEXT: movi z8, #0000000000000000 +; SVE-NEXT: movi z9, #0000000000000000 +; SVE-NEXT: movi z10, #0000000000000000 +; SVE-NEXT: movi z11, #0000000000000000 +; SVE-NEXT: movi z12, #0000000000000000 +; SVE-NEXT: movi z13, #0000000000000000 +; SVE-NEXT: movi z14, #0000000000000000 +; SVE-NEXT: movi z15, #0000000000000000 +; SVE-NEXT: movi z16, #0000000000000000 +; SVE-NEXT: movi z17, #0000000000000000 +; SVE-NEXT: movi z18, #0000000000000000 +; SVE-NEXT: movi z19, #0000000000000000 +; SVE-NEXT: movi z20, #0000000000000000 +; SVE-NEXT: movi z21, #0000000000000000 +; SVE-NEXT: movi z22, #0000000000000000 +; SVE-NEXT: movi z23, #0000000000000000 +; SVE-NEXT: movi z24, #0000000000000000 +; SVE-NEXT: movi z25, #0000000000000000 +; SVE-NEXT: movi z26, #0000000000000000 +; SVE-NEXT: movi z27, #0000000000000000 +; SVE-NEXT: movi z28, #0000000000000000 +; SVE-NEXT: movi z29, #0000000000000000 +; SVE-NEXT: movi z30, #0000000000000000 +; SVE-NEXT: movi z31, #0000000000000000 +; SVE-NEXT: pfalse p0.b +; SVE-NEXT: pfalse p1.b +; SVE-NEXT: pfalse p2.b +; SVE-NEXT: pfalse p3.b +; SVE-NEXT: pfalse p4.b +; SVE-NEXT: pfalse p5.b +; SVE-NEXT: pfalse p6.b +; SVE-NEXT: pfalse p7.b +; SVE-NEXT: pfalse p8.b +; SVE-NEXT: pfalse p9.b +; SVE-NEXT: pfalse p10.b +; SVE-NEXT: pfalse p11.b +; SVE-NEXT: pfalse p12.b +; SVE-NEXT: pfalse p13.b +; SVE-NEXT: pfalse p14.b +; SVE-NEXT: pfalse p15.b +; SVE-NEXT: ret + +entry: + %conv = fpext float %b to double + %mul = fmul double %conv, %a + ret double %mul +} + +; Don't emit zeroing registers in "main" function. +define dso_local i32 @main() local_unnamed_addr #0 { +; DEFAULT-LABEL: main: +; DEFAULT: // %bb.0: // %entry +; DEFAULT-NEXT: mov w0, wzr +; DEFAULT-NEXT: ret +; +; SVE-LABEL: main: +; SVE: // %bb.0: // %entry +; SVE-NEXT: mov w0, wzr +; SVE-NEXT: ret + +entry: + ret i32 0 +} + +attributes #0 = { mustprogress nofree norecurse nosync nounwind readnone willreturn uwtable "frame-pointer"="non-leaf" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon,+v8a" } diff --git a/llvm/utils/TableGen/CallingConvEmitter.cpp b/llvm/utils/TableGen/CallingConvEmitter.cpp --- a/llvm/utils/TableGen/CallingConvEmitter.cpp +++ b/llvm/utils/TableGen/CallingConvEmitter.cpp @@ -20,6 +20,14 @@ namespace { class CallingConvEmitter { RecordKeeper &Records; + unsigned Counter; + std::string CurrentAction; + bool SwiftAction; + + std::map> AssignedRegsMap; + std::map> AssignedSwiftRegsMap; + std::map> DelegateToMap; + public: explicit CallingConvEmitter(RecordKeeper &R) : Records(R) {} @@ -28,7 +36,7 @@ private: void EmitCallingConv(Record *CC, raw_ostream &O); void EmitAction(Record *Action, unsigned Indent, raw_ostream &O); - unsigned Counter; + void EmitArgRegisterLists(raw_ostream &O); }; } // End anonymous namespace @@ -38,6 +46,7 @@ // Emit prototypes for all of the non-custom CC's so that they can forward ref // each other. Records.startTimer("Emit prototypes"); + O << "#ifndef GET_CC_REGISTER_LISTS\n\n"; for (Record *CC : CCs) { if (!CC->getValueAsBit("Custom")) { unsigned Pad = CC->getName().size(); @@ -58,18 +67,25 @@ // Emit each non-custom calling convention description in full. Records.startTimer("Emit full descriptions"); for (Record *CC : CCs) { - if (!CC->getValueAsBit("Custom")) + if (!CC->getValueAsBit("Custom")) { + // Call upon the creation of a map entry from the void! + CurrentAction = CC->getName().str(); + (void)AssignedRegsMap[CurrentAction]; EmitCallingConv(CC, O); + } } -} + EmitArgRegisterLists(O); + + O << "\n#endif // CC_REGISTER_LIST\n"; +} void CallingConvEmitter::EmitCallingConv(Record *CC, raw_ostream &O) { ListInit *CCActions = CC->getValueAsListInit("Actions"); Counter = 0; O << "\n\n"; - unsigned Pad = CC->getName().size(); + unsigned Pad = CurrentAction.size(); if (CC->getValueAsBit("Entry")) { O << "bool llvm::"; Pad += 12; @@ -77,13 +93,21 @@ O << "static bool "; Pad += 13; } - O << CC->getName() << "(unsigned ValNo, MVT ValVT,\n" + O << CurrentAction << "(unsigned ValNo, MVT ValVT,\n" << std::string(Pad, ' ') << "MVT LocVT, CCValAssign::LocInfo LocInfo,\n" << std::string(Pad, ' ') << "ISD::ArgFlagsTy ArgFlags, CCState &State) {\n"; // Emit all of the actions, in order. for (unsigned i = 0, e = CCActions->size(); i != e; ++i) { + Record *Action = CCActions->getElementAsRecord(i); + SwiftAction = llvm::any_of(Action->getSuperClasses(), + [](const std::pair &Class) { + std::string Name = + Class.first->getNameInitAsString(); + return StringRef(Name).startswith("CCIfSwift"); + }); + O << "\n"; - EmitAction(CCActions->getElementAsRecord(i), 2, O); + EmitAction(Action, 2, O); } O << "\n return true; // CC didn't match.\n"; @@ -93,7 +117,7 @@ void CallingConvEmitter::EmitAction(Record *Action, unsigned Indent, raw_ostream &O) { std::string IndentStr = std::string(Indent, ' '); - + if (Action->isSubClassOf("CCPredicateAction")) { O << IndentStr << "if ("; @@ -121,18 +145,30 @@ O << IndentStr << "if (!" << CC->getName() << "(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))\n" << IndentStr << " return false;\n"; + DelegateToMap[CurrentAction].insert(CC->getName().str()); } else if (Action->isSubClassOf("CCAssignToReg")) { ListInit *RegList = Action->getValueAsListInit("RegList"); if (RegList->size() == 1) { - O << IndentStr << "if (unsigned Reg = State.AllocateReg("; - O << getQualifiedName(RegList->getElementAsRecord(0)) << ")) {\n"; + std::string Name = getQualifiedName(RegList->getElementAsRecord(0)); + O << IndentStr << "if (unsigned Reg = State.AllocateReg(" << Name + << ")) {\n"; + if (SwiftAction) + AssignedSwiftRegsMap[CurrentAction].insert(Name); + else + AssignedRegsMap[CurrentAction].insert(Name); } else { O << IndentStr << "static const MCPhysReg RegList" << ++Counter << "[] = {\n"; O << IndentStr << " "; ListSeparator LS; - for (unsigned i = 0, e = RegList->size(); i != e; ++i) - O << LS << getQualifiedName(RegList->getElementAsRecord(i)); + for (unsigned i = 0, e = RegList->size(); i != e; ++i) { + std::string Name = getQualifiedName(RegList->getElementAsRecord(i)); + if (SwiftAction) + AssignedSwiftRegsMap[CurrentAction].insert(Name); + else + AssignedRegsMap[CurrentAction].insert(Name); + O << LS << Name; + } O << "\n" << IndentStr << "};\n"; O << IndentStr << "if (unsigned Reg = State.AllocateReg(RegList" << Counter << ")) {\n"; @@ -287,6 +323,65 @@ } } +void CallingConvEmitter::EmitArgRegisterLists(raw_ostream &O) { + using EntryTy = std::pair>; + + // Transitively merge all delegated CCs into AssignedRegsMap. +restart: + for (EntryTy Entry : DelegateToMap) { + if (!Entry.second.empty()) + continue; + + for (EntryTy Entry2 : DelegateToMap) { + if (Entry2.second.find(Entry.first) != Entry2.second.end()) { + AssignedRegsMap[Entry2.first].insert( + AssignedRegsMap[Entry.first].begin(), + AssignedRegsMap[Entry.first].end()); + Entry2.second.erase(Entry.first); + } + } + + DelegateToMap.erase(Entry.first); + goto restart; + } + + if (AssignedRegsMap.empty()) + return; + + O << "\n#else\n\n"; + + for (EntryTy Entry : AssignedRegsMap) { + if (Entry.first.empty()) + continue; + + O << "const MCRegister " << Entry.first << "_ArgRegs[] = { "; + + if (Entry.second.empty()) { + O << "0"; + } else { + ListSeparator LS; + for (const std::string &Reg : Entry.second) + O << LS << Reg; + } + + O << " };\n"; + } + + if (AssignedSwiftRegsMap.empty()) + return; + + O << "\n// Registers used by Swift.\n"; + for (EntryTy Entry : AssignedSwiftRegsMap) { + O << "const MCRegister " << Entry.first << "_Swift_ArgRegs[] = { "; + + ListSeparator LS; + for (const std::string &Reg : Entry.second) + O << LS << Reg; + + O << " };\n"; + } +} + namespace llvm { void EmitCallingConv(RecordKeeper &RK, raw_ostream &OS) { diff --git a/llvm/utils/TableGen/RegisterInfoEmitter.cpp b/llvm/utils/TableGen/RegisterInfoEmitter.cpp --- a/llvm/utils/TableGen/RegisterInfoEmitter.cpp +++ b/llvm/utils/TableGen/RegisterInfoEmitter.cpp @@ -1188,6 +1188,8 @@ << "MCRegister) const override;\n" << " bool isFixedRegister(const MachineFunction &, " << "MCRegister) const override;\n" + << " bool isArgumentRegister(const MachineFunction &, " + << "MCRegister) const override;\n" << " /// Devirtualized TargetFrameLowering.\n" << " static const " << TargetName << "FrameLowering *getFrameLowering(\n" << " const MachineFunction &MF);\n" @@ -1662,6 +1664,20 @@ OS << " false;\n"; OS << "}\n\n"; + OS << "bool " << ClassName << "::\n" + << "isArgumentRegister(const MachineFunction &MF, " + << "MCRegister PhysReg) const {\n" + << " return\n"; + for (const CodeGenRegisterCategory &Category : RegCategories) + if (Category.getName() == "ArgumentRegisters") { + for (const CodeGenRegisterClass *RC : Category.getClasses()) + OS << " " << RC->getQualifiedName() + << "RegClass.contains(PhysReg) ||\n"; + break; + } + OS << " false;\n"; + OS << "}\n\n"; + OS << "ArrayRef " << ClassName << "::getRegMaskNames() const {\n"; if (!CSRSets.empty()) {