Skip to content

Commit 9683ecb

Browse files
author
Oren Ben Simhon
committedDec 11, 2016
[X86] Regcall - Adding support for mask types
Regcall calling convention passes mask types arguments in x86 GPR registers. The review includes the changes required in order to support v32i1, v16i1 and v8i1. Differential Revision: https://reviews.llvm.org/D27148 llvm-svn: 289383
1 parent 726774c commit 9683ecb

File tree

4 files changed

+225
-46
lines changed

4 files changed

+225
-46
lines changed
 

‎llvm/lib/Target/X86/X86CallingConv.h

-8
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,6 @@ inline bool CC_X86_32_VectorCallIndirect(unsigned &ValNo, MVT &ValVT,
4141
return false; // Continue the search, but now for i32.
4242
}
4343

44-
4544
inline bool CC_X86_AnyReg_Error(unsigned &, MVT &, MVT &,
4645
CCValAssign::LocInfo &, ISD::ArgFlagsTy &,
4746
CCState &) {
@@ -51,13 +50,6 @@ inline bool CC_X86_AnyReg_Error(unsigned &, MVT &, MVT &,
5150
return false;
5251
}
5352

54-
inline bool CC_X86_RegCall_Error(unsigned &, MVT &, MVT &,
55-
CCValAssign::LocInfo &, ISD::ArgFlagsTy &,
56-
CCState &) {
57-
report_fatal_error("LLVM x86 RegCall calling convention implementation" \
58-
" doesn't support long double and mask types yet.");
59-
}
60-
6153
inline bool CC_X86_32_MCUInReg(unsigned &ValNo, MVT &ValVT,
6254
MVT &LocVT,
6355
CCValAssign::LocInfo &LocInfo,

‎llvm/lib/Target/X86/X86CallingConv.td

+11-8
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,9 @@ def CC_#NAME : CallingConv<[
7676
// Promote i1/i8/i16 arguments to i32.
7777
CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
7878

79+
// Promote v8i1/v16i1/v32i1 arguments to i32.
80+
CCIfType<[v8i1, v16i1, v32i1], CCPromoteToType<i32>>,
81+
7982
// bool, char, int, enum, long, pointer --> GPR
8083
CCIfType<[i32], CCAssignToReg<RC.GPR_32>>,
8184

@@ -89,9 +92,6 @@ def CC_#NAME : CallingConv<[
8992
CCIfSubtarget<"is32Bit()", CCIfType<[i64],
9093
CCCustom<"CC_X86_32_RegCall_Assign2Regs">>>,
9194

92-
// TODO: Handle the case of mask types (v*i1)
93-
CCIfType<[v8i1, v16i1, v32i1], CCCustom<"CC_X86_RegCall_Error">>,
94-
9595
// float, double, float128 --> XMM
9696
// In the case of SSE disabled --> save to stack
9797
CCIfType<[f32, f64, f128],
@@ -146,8 +146,14 @@ def CC_#NAME : CallingConv<[
146146
]>;
147147

148148
def RetCC_#NAME : CallingConv<[
149-
// Promote i1 arguments to i8.
150-
CCIfType<[i1], CCPromoteToType<i8>>,
149+
// Promote i1, v8i1 arguments to i8.
150+
CCIfType<[i1, v8i1], CCPromoteToType<i8>>,
151+
152+
// Promote v16i1 arguments to i16.
153+
CCIfType<[v16i1], CCPromoteToType<i16>>,
154+
155+
// Promote v32i1 arguments to i32.
156+
CCIfType<[v32i1], CCPromoteToType<i32>>,
151157

152158
// bool, char, int, enum, long, pointer --> GPR
153159
CCIfType<[i8], CCAssignToReg<RC.GPR_8>>,
@@ -164,9 +170,6 @@ def RetCC_#NAME : CallingConv<[
164170
CCIfSubtarget<"is32Bit()", CCIfType<[i64],
165171
CCCustom<"CC_X86_32_RegCall_Assign2Regs">>>,
166172

167-
// TODO: Handle the case of mask types (v*i1)
168-
CCIfType<[v8i1, v16i1, v32i1], CCCustom<"CC_X86_RegCall_Error">>,
169-
170173
// long double --> FP
171174
CCIfType<[f80], CCAssignToReg<RC.FP_RET>>,
172175

‎llvm/lib/Target/X86/X86ISelLowering.cpp

+51-22
Original file line numberDiff line numberDiff line change
@@ -2100,14 +2100,26 @@ const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
21002100
}
21012101

21022102
/// Lowers masks values (v*i1) to the local register values
2103+
/// \returns DAG node after lowering to register type
21032104
static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
21042105
const SDLoc &Dl, SelectionDAG &DAG) {
21052106
EVT ValVT = ValArg.getValueType();
21062107

2107-
if (ValVT == MVT::v64i1 && ValLoc == MVT::i64) {
2108+
if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
2109+
(ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
2110+
// Two stage lowering might be required
2111+
// bitcast: v8i1 -> i8 / v16i1 -> i16
2112+
// anyextend: i8 -> i32 / i16 -> i32
2113+
EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
2114+
SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
2115+
if (ValLoc == MVT::i32)
2116+
ValToCopy = DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValToCopy);
2117+
return ValToCopy;
2118+
} else if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) ||
2119+
(ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
21082120
// One stage lowering is required
2109-
// bitcast: v64i1 -> i64
2110-
return DAG.getBitcast(MVT::i64, ValArg);
2121+
// bitcast: v32i1 -> i32 / v64i1 -> i64
2122+
return DAG.getBitcast(ValLoc, ValArg);
21112123
} else
21122124
return DAG.getNode(ISD::SIGN_EXTEND, Dl, ValLoc, ValArg);
21132125
}
@@ -2379,14 +2391,14 @@ EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
23792391
}
23802392

23812393
/// Reads two 32 bit registers and creates a 64 bit mask value.
2382-
/// @param VA The current 32 bit value that need to be assigned.
2383-
/// @param NextVA The next 32 bit value that need to be assigned.
2384-
/// @param Root The parent DAG node.
2385-
/// @param [in,out] InFlag Represents SDvalue in the parent DAG node for
2394+
/// \param VA The current 32 bit value that need to be assigned.
2395+
/// \param NextVA The next 32 bit value that need to be assigned.
2396+
/// \param Root The parent DAG node.
2397+
/// \param [in,out] InFlag Represents SDvalue in the parent DAG node for
23862398
/// glue purposes. In the case the DAG is already using
23872399
/// physical register instead of virtual, we should glue
23882400
/// our new SDValue to InFlag SDvalue.
2389-
/// @return a new SDvalue of size 64bit.
2401+
/// \return a new SDvalue of size 64bit.
23902402
static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
23912403
SDValue &Root, SelectionDAG &DAG,
23922404
const SDLoc &Dl, const X86Subtarget &Subtarget,
@@ -2436,23 +2448,38 @@ static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
24362448
return DAG.getNode(ISD::CONCAT_VECTORS, Dl, MVT::v64i1, Lo, Hi);
24372449
}
24382450

2451+
/// The function will lower a register of various sizes (8/16/32/64)
2452+
/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
2453+
/// \returns a DAG node contains the operand after lowering to mask type.
24392454
static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
24402455
const EVT &ValLoc, const SDLoc &Dl,
24412456
SelectionDAG &DAG) {
2442-
assert((ValLoc == MVT::i64 || ValLoc == MVT::i32) &&
2443-
"Expecting register location of size 32/64 bit");
2457+
SDValue ValReturned = ValArg;
24442458

2445-
// Currently not referenced - will be used in other mask lowering
2446-
(void)Dl;
2459+
if (ValVT == MVT::v64i1) {
2460+
// In 32 bit machine, this case is handled by getv64i1Argument
2461+
assert(ValLoc == MVT::i64 && "Expecting only i64 locations");
2462+
// In 64 bit machine, There is no need to truncate the value only bitcast
2463+
} else {
2464+
MVT maskLen;
2465+
switch (ValVT.getSimpleVT().SimpleTy) {
2466+
case MVT::v8i1:
2467+
maskLen = MVT::i8;
2468+
break;
2469+
case MVT::v16i1:
2470+
maskLen = MVT::i16;
2471+
break;
2472+
case MVT::v32i1:
2473+
maskLen = MVT::i32;
2474+
break;
2475+
default:
2476+
llvm_unreachable("Expecting a vector of i1 types");
2477+
}
24472478

2448-
// In the case of v64i1 no special handling is required due to two reasons:
2449-
// In 32 bit machine, this case is handled by getv64i1Argument
2450-
// In 64 bit machine, There is no need to truncate the value only bitcast
2451-
if (ValVT == MVT::v64i1 && ValLoc == MVT::i32) {
2452-
llvm_unreachable("Expecting only i64 locations");
2479+
ValReturned = DAG.getNode(ISD::TRUNCATE, Dl, maskLen, ValReturned);
24532480
}
24542481

2455-
return DAG.getBitcast(ValVT, ValArg);
2482+
return DAG.getBitcast(ValVT, ValReturned);
24562483
}
24572484

24582485
/// Lower the result values of a call into the
@@ -2513,8 +2540,9 @@ SDValue X86TargetLowering::LowerCallResult(
25132540

25142541
if (VA.isExtInLoc() && (VA.getValVT().getScalarType() == MVT::i1)) {
25152542
if (VA.getValVT().isVector() &&
2516-
(VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::i64)) {
2517-
// promoting a mask type (v*i1) into a register of type i64/i32
2543+
((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
2544+
(VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
2545+
// promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
25182546
Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
25192547
} else
25202548
Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
@@ -2867,8 +2895,9 @@ SDValue X86TargetLowering::LowerFormalArguments(
28672895
ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
28682896
else if (VA.getValVT().isVector() &&
28692897
VA.getValVT().getScalarType() == MVT::i1 &&
2870-
((RegVT == MVT::i32) || (RegVT == MVT::i64))) {
2871-
// Promoting a mask type (v*i1) into a register of type i64/i32
2898+
((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
2899+
(VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
2900+
// Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
28722901
ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
28732902
} else
28742903
ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);

‎llvm/test/CodeGen/X86/avx512-regcall-Mask.ll

+163-8
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
; RUN: llc < %s -mtriple=i386-pc-win32 -mattr=+avx512bw | FileCheck --check-prefix=X32 %s
2-
; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+avx512bw | FileCheck --check-prefix=WIN64 %s
3-
; RUN: llc < %s -mtriple=x86_64-linux-gnu -mattr=+avx512bw | FileCheck --check-prefix=LINUXOSX64 %s
1+
; RUN: llc < %s -mtriple=i386-pc-win32 -mattr=+avx512bw | FileCheck --check-prefix=CHECK --check-prefix=X32 %s
2+
; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+avx512bw | FileCheck --check-prefix=CHECK --check-prefix=CHECK64 --check-prefix=WIN64 %s
3+
; RUN: llc < %s -mtriple=x86_64-linux-gnu -mattr=+avx512bw | FileCheck --check-prefix=CHECK --check-prefix=CHECK64 --check-prefix=LINUXOSX64 %s
44

55
; X32-LABEL: test_argv64i1:
66
; X32: kmovd %edx, %k0
@@ -155,7 +155,7 @@ define x86_regcallcc i64 @test_argv64i1(<64 x i1> %x0, <64 x i1> %x1, <64 x i1>
155155
; LINUXOSX64: call{{.*}} test_argv64i1
156156

157157
; Test regcall when passing arguments of v64i1 type
158-
define x86_regcallcc i64 @caller_argv64i1() #0 {
158+
define i64 @caller_argv64i1() #0 {
159159
entry:
160160
%v0 = bitcast i64 4294967298 to <64 x i1>
161161
%call = call x86_regcallcc i64 @test_argv64i1(<64 x i1> %v0, <64 x i1> %v0, <64 x i1> %v0,
@@ -171,9 +171,9 @@ entry:
171171
; X32: mov{{.*}} $1, %ecx
172172
; X32: ret{{.*}}
173173

174-
; WIN64-LABEL: test_retv64i1:
175-
; WIN64: mov{{.*}} $4294967298, %rax
176-
; WIN64: ret{{.*}}
174+
; CHECK64-LABEL: test_retv64i1:
175+
; CHECK64: mov{{.*}} $4294967298, %rax
176+
; CHECK64: ret{{.*}}
177177

178178
; Test regcall when returning v64i1 type
179179
define x86_regcallcc <64 x i1> @test_retv64i1() {
@@ -187,9 +187,164 @@ define x86_regcallcc <64 x i1> @test_retv64i1() {
187187
; X32: kmov{{.*}} %ecx, %k1
188188
; X32: kunpckdq %k0, %k1, %k0
189189

190+
; CHECK64-LABEL: caller_retv64i1:
191+
; CHECK64: call{{.*}} {{_*}}test_retv64i1
192+
; CHECK64: kmovq %rax, %k0
193+
; CHECK64: ret{{.*}}
194+
190195
; Test regcall when processing result of v64i1 type
191-
define x86_regcallcc <64 x i1> @caller_retv64i1() #0 {
196+
define <64 x i1> @caller_retv64i1() #0 {
192197
entry:
193198
%call = call x86_regcallcc <64 x i1> @test_retv64i1()
194199
ret <64 x i1> %call
195200
}
201+
202+
; CHECK-LABEL: test_argv32i1:
203+
; CHECK: kmovd %edx, %k{{[0-9]+}}
204+
; CHECK: kmovd %ecx, %k{{[0-9]+}}
205+
; CHECK: kmovd %eax, %k{{[0-9]+}}
206+
; CHECK: ret{{l|q}}
207+
208+
; Test regcall when receiving arguments of v32i1 type
209+
declare i32 @test_argv32i1helper(<32 x i1> %x0, <32 x i1> %x1, <32 x i1> %x2)
210+
define x86_regcallcc i32 @test_argv32i1(<32 x i1> %x0, <32 x i1> %x1, <32 x i1> %x2) {
211+
entry:
212+
%res = call i32 @test_argv32i1helper(<32 x i1> %x0, <32 x i1> %x1, <32 x i1> %x2)
213+
ret i32 %res
214+
}
215+
216+
; CHECK-LABEL: caller_argv32i1:
217+
; CHECK: mov{{.*}} $1, %eax
218+
; CHECK: mov{{.*}} $1, %ecx
219+
; CHECK: mov{{.*}} $1, %edx
220+
; CHECK: call{{.*}} {{_*}}test_argv32i1
221+
222+
; Test regcall when passing arguments of v32i1 type
223+
define i32 @caller_argv32i1() #0 {
224+
entry:
225+
%v0 = bitcast i32 1 to <32 x i1>
226+
%call = call x86_regcallcc i32 @test_argv32i1(<32 x i1> %v0, <32 x i1> %v0, <32 x i1> %v0)
227+
ret i32 %call
228+
}
229+
230+
; CHECK-LABEL: test_retv32i1:
231+
; CHECK: movl $1, %eax
232+
; CHECK: ret{{l|q}}
233+
234+
; Test regcall when returning v32i1 type
235+
define x86_regcallcc <32 x i1> @test_retv32i1() {
236+
%a = bitcast i32 1 to <32 x i1>
237+
ret <32 x i1> %a
238+
}
239+
240+
; CHECK-LABEL: caller_retv32i1:
241+
; CHECK: call{{.*}} {{_*}}test_retv32i1
242+
; CHECK: incl %eax
243+
244+
; Test regcall when processing result of v32i1 type
245+
define i32 @caller_retv32i1() #0 {
246+
entry:
247+
%call = call x86_regcallcc <32 x i1> @test_retv32i1()
248+
%c = bitcast <32 x i1> %call to i32
249+
%add = add i32 %c, 1
250+
ret i32 %add
251+
}
252+
253+
; CHECK-LABEL: test_argv16i1:
254+
; CHECK: kmovw %edx, %k{{[0-9]+}}
255+
; CHECK: kmovw %ecx, %k{{[0-9]+}}
256+
; CHECK: kmovw %eax, %k{{[0-9]+}}
257+
; CHECK: ret{{l|q}}
258+
259+
; Test regcall when receiving arguments of v16i1 type
260+
declare i16 @test_argv16i1helper(<16 x i1> %x0, <16 x i1> %x1, <16 x i1> %x2)
261+
define x86_regcallcc i16 @test_argv16i1(<16 x i1> %x0, <16 x i1> %x1, <16 x i1> %x2) {
262+
%res = call i16 @test_argv16i1helper(<16 x i1> %x0, <16 x i1> %x1, <16 x i1> %x2)
263+
ret i16 %res
264+
}
265+
266+
; CHECK-LABEL: caller_argv16i1:
267+
; CHECK: movl $1, %eax
268+
; CHECK: movl $1, %ecx
269+
; CHECK: movl $1, %edx
270+
; CHECK: call{{l|q}} {{_*}}test_argv16i1
271+
272+
; Test regcall when passing arguments of v16i1 type
273+
define i16 @caller_argv16i1() #0 {
274+
entry:
275+
%v0 = bitcast i16 1 to <16 x i1>
276+
%call = call x86_regcallcc i16 @test_argv16i1(<16 x i1> %v0, <16 x i1> %v0, <16 x i1> %v0)
277+
ret i16 %call
278+
}
279+
280+
; CHECK-LABEL: test_retv16i1:
281+
; CHECK: movw $1, %ax
282+
; CHECK: ret{{l|q}}
283+
284+
; Test regcall when returning v16i1 type
285+
define x86_regcallcc <16 x i1> @test_retv16i1() {
286+
%a = bitcast i16 1 to <16 x i1>
287+
ret <16 x i1> %a
288+
}
289+
290+
; CHECK-LABEL: caller_retv16i1:
291+
; CHECK: call{{l|q}} {{_*}}test_retv16i1
292+
; CHECK: incl %eax
293+
294+
; Test regcall when processing result of v16i1 type
295+
define i16 @caller_retv16i1() #0 {
296+
entry:
297+
%call = call x86_regcallcc <16 x i1> @test_retv16i1()
298+
%c = bitcast <16 x i1> %call to i16
299+
%add = add i16 %c, 1
300+
ret i16 %add
301+
}
302+
303+
; CHECK-LABEL: test_argv8i1:
304+
; CHECK: kmovw %edx, %k{{[0-9]+}}
305+
; CHECK: kmovw %ecx, %k{{[0-9]+}}
306+
; CHECK: kmovw %eax, %k{{[0-9]+}}
307+
; CHECK: ret{{l|q}}
308+
309+
; Test regcall when receiving arguments of v8i1 type
310+
declare i8 @test_argv8i1helper(<8 x i1> %x0, <8 x i1> %x1, <8 x i1> %x2)
311+
define x86_regcallcc i8 @test_argv8i1(<8 x i1> %x0, <8 x i1> %x1, <8 x i1> %x2) {
312+
%res = call i8 @test_argv8i1helper(<8 x i1> %x0, <8 x i1> %x1, <8 x i1> %x2)
313+
ret i8 %res
314+
}
315+
316+
; CHECK-LABEL: caller_argv8i1:
317+
; CHECK: movl $1, %eax
318+
; CHECK: movl $1, %ecx
319+
; CHECK: movl $1, %edx
320+
; CHECK: call{{l|q}} {{_*}}test_argv8i1
321+
322+
; Test regcall when passing arguments of v8i1 type
323+
define i8 @caller_argv8i1() #0 {
324+
entry:
325+
%v0 = bitcast i8 1 to <8 x i1>
326+
%call = call x86_regcallcc i8 @test_argv8i1(<8 x i1> %v0, <8 x i1> %v0, <8 x i1> %v0)
327+
ret i8 %call
328+
}
329+
330+
; CHECK-LABEL: test_retv8i1:
331+
; CHECK: movb $1, %al
332+
; CHECK: ret{{q|l}}
333+
334+
; Test regcall when returning v8i1 type
335+
define x86_regcallcc <8 x i1> @test_retv8i1() {
336+
%a = bitcast i8 1 to <8 x i1>
337+
ret <8 x i1> %a
338+
}
339+
340+
; CHECK-LABEL: caller_retv8i1:
341+
; CHECK: call{{l|q}} {{_*}}test_retv8i1
342+
; CHECK: kmovw %eax, %k{{[0-9]+}}
343+
; CHECK: ret{{l|q}}
344+
345+
; Test regcall when processing result of v8i1 type
346+
define <8 x i1> @caller_retv8i1() #0 {
347+
entry:
348+
%call = call x86_regcallcc <8 x i1> @test_retv8i1()
349+
ret <8 x i1> %call
350+
}

0 commit comments

Comments
 (0)
Please sign in to comment.