Skip to content

Commit 92ccbf2

Browse files
author
Oren Ben Simhon
committedOct 13, 2016
[X86] Basic additions to support RegCall Calling Convention.
The Register Calling Convention (RegCall) was introduced by Intel to optimize parameter transfer on function call. This calling convention ensures that as many values as possible are passed or returned in registers. This commit presents the basic additions to LLVM CodeGen in order to support RegCall in X86. Differential Revision: http://reviews.llvm.org/D25022 llvm-svn: 284108
1 parent bee9dea commit 92ccbf2

File tree

10 files changed

+1044
-0
lines changed

10 files changed

+1044
-0
lines changed
 

‎llvm/include/llvm/IR/CallingConv.h

+3
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,9 @@ namespace CallingConv {
193193
/// Calling convention for AMDGPU code object kernels.
194194
AMDGPU_KERNEL = 91,
195195

196+
/// Register calling convention used for parameters transfer optimization
197+
X86_RegCall = 92,
198+
196199
/// The highest possible calling convention ID. Must be some 2^k - 1.
197200
MaxID = 1023
198201
};

‎llvm/lib/AsmParser/LLLexer.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -585,6 +585,7 @@ lltok::Kind LLLexer::LexIdentifier() {
585585
KEYWORD(intel_ocl_bicc);
586586
KEYWORD(x86_64_sysvcc);
587587
KEYWORD(x86_64_win64cc);
588+
KEYWORD(x86_regcallcc);
588589
KEYWORD(webkit_jscc);
589590
KEYWORD(swiftcc);
590591
KEYWORD(anyregcc);

‎llvm/lib/AsmParser/LLParser.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -1695,6 +1695,7 @@ bool LLParser::ParseOptionalCallingConv(unsigned &CC) {
16951695
case lltok::kw_coldcc: CC = CallingConv::Cold; break;
16961696
case lltok::kw_x86_stdcallcc: CC = CallingConv::X86_StdCall; break;
16971697
case lltok::kw_x86_fastcallcc: CC = CallingConv::X86_FastCall; break;
1698+
case lltok::kw_x86_regcallcc: CC = CallingConv::X86_RegCall; break;
16981699
case lltok::kw_x86_thiscallcc: CC = CallingConv::X86_ThisCall; break;
16991700
case lltok::kw_x86_vectorcallcc:CC = CallingConv::X86_VectorCall; break;
17001701
case lltok::kw_arm_apcscc: CC = CallingConv::ARM_APCS; break;

‎llvm/lib/AsmParser/LLToken.h

+1
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@ enum Kind {
127127
kw_x86_fastcallcc,
128128
kw_x86_thiscallcc,
129129
kw_x86_vectorcallcc,
130+
kw_x86_regcallcc,
130131
kw_arm_apcscc,
131132
kw_arm_aapcscc,
132133
kw_arm_aapcs_vfpcc,

‎llvm/lib/IR/AsmWriter.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,7 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) {
311311
case CallingConv::X86_StdCall: Out << "x86_stdcallcc"; break;
312312
case CallingConv::X86_FastCall: Out << "x86_fastcallcc"; break;
313313
case CallingConv::X86_ThisCall: Out << "x86_thiscallcc"; break;
314+
case CallingConv::X86_RegCall: Out << "x86_regcallcc"; break;
314315
case CallingConv::X86_VectorCall:Out << "x86_vectorcallcc"; break;
315316
case CallingConv::Intel_OCL_BI: Out << "intel_ocl_bicc"; break;
316317
case CallingConv::ARM_APCS: Out << "arm_apcscc"; break;

‎llvm/lib/Target/X86/X86CallingConv.h

+7
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,13 @@ inline bool CC_X86_AnyReg_Error(unsigned &, MVT &, MVT &,
4343
return false;
4444
}
4545

46+
inline bool CC_X86_RegCall_Error(unsigned &, MVT &, MVT &,
47+
CCValAssign::LocInfo &, ISD::ArgFlagsTy &,
48+
CCState &) {
49+
report_fatal_error("LLVM x86 RegCall calling convention implementation" \
50+
" doesn't support long double and mask types yet.");
51+
}
52+
4653
inline bool CC_X86_32_MCUInReg(unsigned &ValNo, MVT &ValVT,
4754
MVT &LocVT,
4855
CCValAssign::LocInfo &LocInfo,

‎llvm/lib/Target/X86/X86CallingConv.td

+190
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,164 @@ class CCIfSubtarget<string F, CCAction A>
1818
"(State.getMachineFunction().getSubtarget()).", F),
1919
A>;
2020

21+
// Register classes for RegCall
22+
class RC_X86_RegCall {
23+
list<Register> GPR_8 = [];
24+
list<Register> GPR_16 = [];
25+
list<Register> GPR_32 = [];
26+
list<Register> GPR_64 = [];
27+
list<Register> XMM = [];
28+
list<Register> YMM = [];
29+
list<Register> ZMM = [];
30+
}
31+
32+
// RegCall register classes for 32 bits
33+
def RC_X86_32_RegCall : RC_X86_RegCall {
34+
let GPR_8 = [AL, CL, DL, DIL, SIL];
35+
let GPR_16 = [AX, CX, DX, DI, SI];
36+
let GPR_32 = [EAX, ECX, EDX, EDI, ESI];
37+
let GPR_64 = [RAX]; ///< Not actually used, but AssignToReg can't handle []
38+
///< \todo Fix AssignToReg to enable empty lists
39+
let XMM = [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7];
40+
let YMM = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7];
41+
let ZMM = [ZMM0, ZMM1, ZMM2, ZMM3, ZMM4, ZMM5, ZMM6, ZMM7];
42+
}
43+
44+
class RC_X86_64_RegCall : RC_X86_RegCall {
45+
let XMM = [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
46+
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15];
47+
let YMM = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
48+
YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15];
49+
let ZMM = [ZMM0, ZMM1, ZMM2, ZMM3, ZMM4, ZMM5, ZMM6, ZMM7,
50+
ZMM8, ZMM9, ZMM10, ZMM11, ZMM12, ZMM13, ZMM14, ZMM15];
51+
}
52+
53+
def RC_X86_64_RegCall_Win : RC_X86_64_RegCall {
54+
let GPR_8 = [AL, CL, DL, DIL, SIL, R8B, R9B, R10B, R11B, R12B, R14B, R15B];
55+
let GPR_16 = [AX, CX, DX, DI, SI, R8W, R9W, R10W, R11W, R12W, R14W, R15W];
56+
let GPR_32 = [EAX, ECX, EDX, EDI, ESI, R8D, R9D, R10D, R11D, R12D, R14D, R15D];
57+
let GPR_64 = [RAX, RCX, RDX, RDI, RSI, R8, R9, R10, R11, R12, R14, R15];
58+
}
59+
60+
def RC_X86_64_RegCall_SysV : RC_X86_64_RegCall {
61+
let GPR_8 = [AL, CL, DL, DIL, SIL, R8B, R9B, R12B, R13B, R14B, R15B];
62+
let GPR_16 = [AX, CX, DX, DI, SI, R8W, R9W, R12W, R13W, R14W, R15W];
63+
let GPR_32 = [EAX, ECX, EDX, EDI, ESI, R8D, R9D, R12D, R13D, R14D, R15D];
64+
let GPR_64 = [RAX, RCX, RDX, RDI, RSI, R8, R9, R12, R13, R14, R15];
65+
}
66+
67+
// X86-64 Intel regcall calling convention.
68+
multiclass X86_RegCall_base<RC_X86_RegCall RC> {
69+
def CC_#NAME : CallingConv<[
70+
// Handles byval parameters.
71+
CCIfSubtarget<"is64Bit()", CCIfByVal<CCPassByVal<8, 8>>>,
72+
CCIfByVal<CCPassByVal<4, 4>>,
73+
74+
// Promote i1/i8/i16 arguments to i32.
75+
CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
76+
77+
// bool, char, int, enum, long, pointer --> GPR
78+
CCIfType<[i32], CCAssignToReg<RC.GPR_32>>,
79+
80+
// TODO: Handle the case of mask types (v*i1)
81+
// TODO: Handle the case of 32 bit machine with v64i1 argument
82+
// (split to 2 registers)
83+
CCIfType<[v8i1, v16i1, v32i1, v64i1], CCCustom<"CC_X86_RegCall_Error">>,
84+
85+
// long long, __int64 --> GPR
86+
CCIfType<[i64], CCAssignToReg<RC.GPR_64>>,
87+
88+
// TODO: Handle the case of long double (f80)
89+
CCIfType<[f80], CCCustom<"CC_X86_RegCall_Error">>,
90+
91+
// float, double, float128 --> XMM
92+
// In the case of SSE disabled --> save to stack
93+
CCIfType<[f32, f64, f128],
94+
CCIfSubtarget<"hasSSE1()", CCAssignToReg<RC.XMM>>>,
95+
96+
// __m128, __m128i, __m128d --> XMM
97+
// In the case of SSE disabled --> save to stack
98+
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
99+
CCIfSubtarget<"hasSSE1()", CCAssignToReg<RC.XMM>>>,
100+
101+
// __m256, __m256i, __m256d --> YMM
102+
// In the case of SSE disabled --> save to stack
103+
CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
104+
CCIfSubtarget<"hasAVX()", CCAssignToReg<RC.YMM>>>,
105+
106+
// __m512, __m512i, __m512d --> ZMM
107+
// In the case of SSE disabled --> save to stack
108+
CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
109+
CCIfSubtarget<"hasAVX512()",CCAssignToReg<RC.ZMM>>>,
110+
111+
// If no register was found -> assign to stack
112+
113+
// In 64 bit, assign 64/32 bit values to 8 byte stack
114+
CCIfSubtarget<"is64Bit()", CCIfType<[i32, i64, f32, f64],
115+
CCAssignToStack<8, 8>>>,
116+
117+
// In 32 bit, assign 64/32 bit values to 8/4 byte stack
118+
CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
119+
CCIfType<[f64], CCAssignToStack<8, 4>>,
120+
121+
// MMX type gets 8 byte slot in stack , while alignment depends on target
122+
CCIfSubtarget<"is64Bit()", CCIfType<[x86mmx], CCAssignToStack<8, 8>>>,
123+
CCIfType<[x86mmx], CCAssignToStack<8, 4>>,
124+
125+
// float 128 get stack slots whose size and alignment depends
126+
// on the subtarget.
127+
CCIfType<[f128], CCAssignToStack<0, 0>>,
128+
129+
// Vectors get 16-byte stack slots that are 16-byte aligned.
130+
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
131+
CCAssignToStack<16, 16>>,
132+
133+
// 256-bit vectors get 32-byte stack slots that are 32-byte aligned.
134+
CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
135+
CCAssignToStack<32, 32>>,
136+
137+
// 512-bit vectors get 64-byte stack slots that are 64-byte aligned.
138+
CCIfType<[v16i32, v8i64, v16f32, v8f64], CCAssignToStack<64, 64>>
139+
]>;
140+
141+
def RetCC_#NAME : CallingConv<[
142+
// Promote i1 arguments to i8.
143+
CCIfType<[i1], CCPromoteToType<i8>>,
144+
145+
// bool, char, int, enum, long, pointer --> GPR
146+
CCIfType<[i8], CCAssignToReg<RC.GPR_8>>,
147+
CCIfType<[i16], CCAssignToReg<RC.GPR_16>>,
148+
CCIfType<[i32], CCAssignToReg<RC.GPR_32>>,
149+
150+
// TODO: Handle the case of mask types (v*i1)
151+
// TODO: Handle the case of 32 bit machine with v64i1 argument
152+
// (split to 2 registers)
153+
CCIfType<[v8i1, v16i1, v32i1, v64i1], CCCustom<"CC_X86_RegCall_Error">>,
154+
155+
// long long, __int64 --> GPR
156+
CCIfType<[i64], CCAssignToReg<RC.GPR_64>>,
157+
158+
// long double --> FP
159+
CCIfType<[f80], CCAssignToReg<[FP0]>>,
160+
161+
// float, double, float128 --> XMM
162+
CCIfType<[f32, f64, f128],
163+
CCIfSubtarget<"hasSSE1()", CCAssignToReg<RC.XMM>>>,
164+
165+
// __m128, __m128i, __m128d --> XMM
166+
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
167+
CCIfSubtarget<"hasSSE1()", CCAssignToReg<RC.XMM>>>,
168+
169+
// __m256, __m256i, __m256d --> YMM
170+
CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
171+
CCIfSubtarget<"hasAVX()", CCAssignToReg<RC.YMM>>>,
172+
173+
// __m512, __m512i, __m512d --> ZMM
174+
CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
175+
CCIfSubtarget<"hasAVX512()", CCAssignToReg<RC.ZMM>>>
176+
]>;
177+
}
178+
21179
//===----------------------------------------------------------------------===//
22180
// Return Value Calling Conventions
23181
//===----------------------------------------------------------------------===//
@@ -237,13 +395,22 @@ def RetCC_X86_64_HHVM: CallingConv<[
237395
RAX, R10, R11, R13, R14, R15]>>
238396
]>;
239397

398+
399+
defm X86_32_RegCall :
400+
X86_RegCall_base<RC_X86_32_RegCall>;
401+
defm X86_Win64_RegCall :
402+
X86_RegCall_base<RC_X86_64_RegCall_Win>;
403+
defm X86_SysV64_RegCall :
404+
X86_RegCall_base<RC_X86_64_RegCall_SysV>;
405+
240406
// This is the root return-value convention for the X86-32 backend.
241407
def RetCC_X86_32 : CallingConv<[
242408
// If FastCC, use RetCC_X86_32_Fast.
243409
CCIfCC<"CallingConv::Fast", CCDelegateTo<RetCC_X86_32_Fast>>,
244410
// If HiPE, use RetCC_X86_32_HiPE.
245411
CCIfCC<"CallingConv::HiPE", CCDelegateTo<RetCC_X86_32_HiPE>>,
246412
CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<RetCC_X86_32_VectorCall>>,
413+
CCIfCC<"CallingConv::X86_RegCall", CCDelegateTo<RetCC_X86_32_RegCall>>,
247414

248415
// Otherwise, use RetCC_X86_32_C.
249416
CCDelegateTo<RetCC_X86_32_C>
@@ -268,6 +435,11 @@ def RetCC_X86_64 : CallingConv<[
268435
// Handle HHVM calls.
269436
CCIfCC<"CallingConv::HHVM", CCDelegateTo<RetCC_X86_64_HHVM>>,
270437

438+
CCIfCC<"CallingConv::X86_RegCall",
439+
CCIfSubtarget<"isTargetWin64()",
440+
CCDelegateTo<RetCC_X86_Win64_RegCall>>>,
441+
CCIfCC<"CallingConv::X86_RegCall", CCDelegateTo<RetCC_X86_SysV64_RegCall>>,
442+
271443
// Mingw64 and native Win64 use Win64 CC
272444
CCIfSubtarget<"isTargetWin64()", CCDelegateTo<RetCC_X86_Win64_C>>,
273445

@@ -817,6 +989,7 @@ def CC_X86_32 : CallingConv<[
817989
CCIfCC<"CallingConv::Fast", CCDelegateTo<CC_X86_32_FastCC>>,
818990
CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_32_GHC>>,
819991
CCIfCC<"CallingConv::HiPE", CCDelegateTo<CC_X86_32_HiPE>>,
992+
CCIfCC<"CallingConv::X86_RegCall", CCDelegateTo<CC_X86_32_RegCall>>,
820993

821994
// Otherwise, drop to normal X86-32 CC
822995
CCDelegateTo<CC_X86_32_C>
@@ -833,6 +1006,9 @@ def CC_X86_64 : CallingConv<[
8331006
CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<CC_X86_Win64_VectorCall>>,
8341007
CCIfCC<"CallingConv::HHVM", CCDelegateTo<CC_X86_64_HHVM>>,
8351008
CCIfCC<"CallingConv::HHVM_C", CCDelegateTo<CC_X86_64_HHVM_C>>,
1009+
CCIfCC<"CallingConv::X86_RegCall",
1010+
CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_RegCall>>>,
1011+
CCIfCC<"CallingConv::X86_RegCall", CCDelegateTo<CC_X86_SysV64_RegCall>>,
8361012
CCIfCC<"CallingConv::X86_INTR", CCDelegateTo<CC_X86_64_Intr>>,
8371013

8381014
// Mingw64 and native Win64 use Win64 CC
@@ -936,3 +1112,17 @@ def CSR_64_Intel_OCL_BI_AVX512 : CalleeSavedRegs<(add RBX, RDI, RSI, R14, R15,
9361112

9371113
// Only R12 is preserved for PHP calls in HHVM.
9381114
def CSR_64_HHVM : CalleeSavedRegs<(add R12)>;
1115+
1116+
// Register calling convention preserves few GPR and XMM8-15
1117+
def CSR_32_RegCall_NoSSE : CalleeSavedRegs<(add ESI, EDI, EBX, EBP, ESP)>;
1118+
def CSR_32_RegCall : CalleeSavedRegs<(add CSR_32_RegCall_NoSSE,
1119+
(sequence "XMM%u", 4, 7))>;
1120+
def CSR_Win64_RegCall_NoSSE : CalleeSavedRegs<(add RBX, RBP, RSP,
1121+
(sequence "R%u", 10, 15))>;
1122+
def CSR_Win64_RegCall : CalleeSavedRegs<(add CSR_Win64_RegCall_NoSSE,
1123+
(sequence "XMM%u", 8, 15))>;
1124+
def CSR_SysV64_RegCall_NoSSE : CalleeSavedRegs<(add RBX, RBP, RSP,
1125+
(sequence "R%u", 12, 15))>;
1126+
def CSR_SysV64_RegCall : CalleeSavedRegs<(add CSR_SysV64_RegCall_NoSSE,
1127+
(sequence "XMM%u", 8, 15))>;
1128+

‎llvm/lib/Target/X86/X86RegisterInfo.cpp

+26
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,19 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
305305
}
306306
case CallingConv::HHVM:
307307
return CSR_64_HHVM_SaveList;
308+
case CallingConv::X86_RegCall:
309+
if (Is64Bit) {
310+
if (IsWin64) {
311+
return (HasSSE ? CSR_Win64_RegCall_SaveList :
312+
CSR_Win64_RegCall_NoSSE_SaveList);
313+
} else {
314+
return (HasSSE ? CSR_SysV64_RegCall_SaveList :
315+
CSR_SysV64_RegCall_NoSSE_SaveList);
316+
}
317+
} else {
318+
return (HasSSE ? CSR_32_RegCall_SaveList :
319+
CSR_32_RegCall_NoSSE_SaveList);
320+
}
308321
case CallingConv::Cold:
309322
if (Is64Bit)
310323
return CSR_64_MostRegs_SaveList;
@@ -406,6 +419,19 @@ X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
406419
}
407420
case CallingConv::HHVM:
408421
return CSR_64_HHVM_RegMask;
422+
case CallingConv::X86_RegCall:
423+
if (Is64Bit) {
424+
if (IsWin64) {
425+
return (HasSSE ? CSR_Win64_RegCall_RegMask :
426+
CSR_Win64_RegCall_NoSSE_RegMask);
427+
} else {
428+
return (HasSSE ? CSR_SysV64_RegCall_RegMask :
429+
CSR_SysV64_RegCall_NoSSE_RegMask);
430+
}
431+
} else {
432+
return (HasSSE ? CSR_32_RegCall_RegMask :
433+
CSR_32_RegCall_NoSSE_RegMask);
434+
}
409435
case CallingConv::Cold:
410436
if (Is64Bit)
411437
return CSR_64_MostRegs_RegMask;

0 commit comments

Comments
 (0)
Please sign in to comment.