Index: lib/Target/AArch64/AArch64CallingConvention.td =================================================================== --- lib/Target/AArch64/AArch64CallingConvention.td +++ lib/Target/AArch64/AArch64CallingConvention.td @@ -204,6 +204,44 @@ [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>> ]>; +//===----------------------------------------------------------------------===// +// ARM64 Calling Convention for GHC +//===----------------------------------------------------------------------===// + +// This calling convention is specific to the Glasgow Haskell Compiler. +// The only documentation is the GHC source code, specifically the C header +// file: +// +// https://github.com/ghc/ghc/blob/master/includes/stg/MachRegs.h +// +// which defines the registers for the Spineless Tagless G-Machine (STG) that +// GHC uses to implement lazy evaluation. The generic STG machine has a set of +// registers which are mapped to appropriate set of architecture specific +// registers for each CPU architecture. +// +// The STG Machine is documented here: +// +// https://ghc.haskell.org/trac/ghc/wiki/Commentary/Compiler/GeneratedCode +// +// The AArch64 register mapping is under the heading "The ARMv8/AArch64 ABI +// register mapping". + +def CC_AArch64_GHC : CallingConv<[ + // Handle all vector types as either f64 or v2f64. + CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>, + CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, f128], CCBitConvertToType>, + + CCIfType<[v2f64], CCAssignToReg<[Q4, Q5]>>, + CCIfType<[f32], CCAssignToReg<[S8, S9, S10, S11]>>, + CCIfType<[f64], CCAssignToReg<[D12, D13, D14, D15]>>, + + // Promote i8/i16/i32 arguments to i64. + CCIfType<[i8, i16, i32], CCPromoteToType>, + + // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, SpLim + CCIfType<[i64], CCAssignToReg<[X19, X20, X21, X22, X23, X24, X25, X26, X27, X28]>> +]>; + // FIXME: LR is only callee-saved in the sense that *we* preserve it and are // presumably a callee to someone. External functions may not do so, but this // is currently safe since BL has LR as an implicit-def and what happens after a @@ -249,3 +287,4 @@ (sequence "S%u", 0, 31), (sequence "D%u", 0, 31), (sequence "Q%u", 0, 31))>; +def CSR_AArch64_NoRegs : CalleeSavedRegs<(add)>; Index: lib/Target/AArch64/AArch64FastISel.cpp =================================================================== --- lib/Target/AArch64/AArch64FastISel.cpp +++ lib/Target/AArch64/AArch64FastISel.cpp @@ -302,6 +302,8 @@ CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const { if (CC == CallingConv::WebKit_JS) return CC_AArch64_WebKit_JS; + if (CC == CallingConv::GHC) + return CC_AArch64_GHC; return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS; } Index: lib/Target/AArch64/AArch64FrameLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64FrameLowering.cpp +++ lib/Target/AArch64/AArch64FrameLowering.cpp @@ -215,6 +215,11 @@ bool HasFP = hasFP(MF); DebugLoc DL = MBB.findDebugLoc(MBBI); + // All calls are tail calls in GHC calling conv, and functions have no + // prologue/epilogue. + if (MF.getFunction()->getCallingConv() == CallingConv::GHC) + return; + int NumBytes = (int)MFI->getStackSize(); if (!AFI->hasStackFrame()) { assert(!HasFP && "unexpected function without stack frame but with FP"); @@ -451,6 +456,11 @@ int NumBytes = MFI->getStackSize(); const AArch64FunctionInfo *AFI = MF.getInfo(); + // All calls are tail calls in GHC calling conv, and functions have no + // prologue/epilogue. + if (MF.getFunction()->getCallingConv() == CallingConv::GHC) + return; + // Initial and residual are named for consitency with the prologue. Note that // in the epilogue, the residual adjustment is executed first. uint64_t ArgumentPopSize = 0; Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1990,6 +1990,8 @@ llvm_unreachable("Unsupported calling convention."); case CallingConv::WebKit_JS: return CC_AArch64_WebKit_JS; + case CallingConv::GHC: + return CC_AArch64_GHC; case CallingConv::C: case CallingConv::Fast: if (!Subtarget->isTargetDarwin()) Index: lib/Target/AArch64/AArch64RegisterInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64RegisterInfo.cpp +++ lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -40,6 +40,10 @@ const MCPhysReg * AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { assert(MF && "Invalid MachineFunction pointer."); + if (MF->getFunction()->getCallingConv() == CallingConv::GHC) + // GHC set of callee saved regs is empty as all those regs are + // used for passing STG regs around + return CSR_AArch64_NoRegs_SaveList; if (MF->getFunction()->getCallingConv() == CallingConv::AnyReg) return CSR_AArch64_AllRegs_SaveList; else @@ -48,6 +52,9 @@ const uint32_t * AArch64RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { + if (CC == CallingConv::GHC) + // This is academic becase all GHC calls are (supposed to be) tail calls + return CSR_AArch64_NoRegs_RegMask; if (CC == CallingConv::AnyReg) return CSR_AArch64_AllRegs_RegMask; else @@ -63,7 +70,7 @@ } const uint32_t * -AArch64RegisterInfo::getThisReturnPreservedMask(CallingConv::ID) const { +AArch64RegisterInfo::getThisReturnPreservedMask(CallingConv::ID CC) const { // This should return a register mask that is the same as that returned by // getCallPreservedMask but that additionally preserves the register used for // the first i64 argument (which must also be the register used to return a @@ -71,6 +78,7 @@ // // In case that the calling convention does not use the same register for // both, the function should return NULL (does not currently apply) + assert(CC != CallingConv::GHC && "should not be GHC calling convention."); return CSR_AArch64_AAPCS_ThisReturn_RegMask; } Index: test/CodeGen/AArch64/ghc-cc.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/ghc-cc.ll @@ -0,0 +1,88 @@ +; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s + +; Check the GHC call convention works (aarch64) + +@base = external global i64 ; assigned to register: r19 +@sp = external global i64 ; assigned to register: r20 +@hp = external global i64 ; assigned to register: r21 +@r1 = external global i64 ; assigned to register: r22 +@r2 = external global i64 ; assigned to register: r23 +@r3 = external global i64 ; assigned to register: r24 +@r4 = external global i64 ; assigned to register: r25 +@r5 = external global i64 ; assigned to register: r26 +@r6 = external global i64 ; assigned to register: r27 +@splim = external global i64 ; assigned to register: r28 + +@f1 = external global float ; assigned to register: s8 +@f2 = external global float ; assigned to register: s9 +@f3 = external global float ; assigned to register: s10 +@f4 = external global float ; assigned to register: s11 + +@d1 = external global double ; assigned to register: d12 +@d2 = external global double ; assigned to register: d13 +@d3 = external global double ; assigned to register: d14 +@d4 = external global double ; assigned to register: d15 + +define ghccc i64 @addtwo(i64 %x, i64 %y) nounwind { +entry: + ; CHECK-LABEL: addtwo + ; CHECK: add x0, x19, x20 + ; CHECK-NEXT: ret + %0 = add i64 %x, %y + ret i64 %0 +} + +define void @zap(i64 %a, i64 %b) nounwind { +entry: + ; CHECK-NOT: mov {{x[0-9]+}}, sp + ; CHECK: bl addtwo + ; CHECK-NEXT: bl foo + %0 = call ghccc i64 @addtwo(i64 %a, i64 %b) + call void @foo() nounwind + ret void +} + +define ghccc void @foo_i64 () nounwind { +entry: + ; CHECK-LABEL: foo_i64 + ; CHECK: adrp {{x[0-9]+}}, base + ; CHECK-NEXT: ldr x19, [{{x[0-9]+}}, :lo12:base] + ; CHECK-NEXT: bl bar_i64 + ; CHECK-NEXT: ret + + %0 = load i64* @base + tail call ghccc void @bar_i64( i64 %0 ) nounwind + ret void +} + +define ghccc void @foo_float () nounwind { +entry: + ; CHECK-LABEL: foo_float + ; CHECK: adrp {{x[0-9]+}}, f1 + ; CHECK-NEXT: ldr s8, [{{x[0-9]+}}, :lo12:f1] + ; CHECK-NEXT: bl bar_float + ; CHECK-NEXT: ret + + %0 = load float* @f1 + tail call ghccc void @bar_float( float %0 ) nounwind + ret void +} + +define ghccc void @foo_double () nounwind { +entry: + ; CHECK-LABEL: foo_double + ; CHECK: adrp {{x[0-9]+}}, d1 + ; CHECK-NEXT: ldr d12, [{{x[0-9]+}}, :lo12:d1] + ; CHECK-NEXT: bl bar_double + ; CHECK-NEXT: ret + + %0 = load double* @d1 + tail call ghccc void @bar_double( double %0 ) nounwind + ret void +} + +declare ghccc void @foo () + +declare ghccc void @bar_i64 (i64) +declare ghccc void @bar_float (float) +declare ghccc void @bar_double (double)