Skip to content

Commit 3eeac2d

Browse files
committedOct 10, 2016
[x86][inline-asm][llvm] accept 'v' constraint
Commit in the name of:Coby Tayree 1.'v' constraint for (x86) non-avx arch imitates the already implemented 'x' constraint, i.e. allows XMM{0-15} & YMM{0-15} depending on the apparent arch & mode (32/64). 2.for the avx512 arch it allows [X,Y,Z]MM{0-31} (mode dependent) This patch applies the needed changes to clang clang patch: https://reviews.llvm.org/D25004 Differential Revision: D25005 llvm-svn: 283717
1 parent fe2b9b4 commit 3eeac2d

6 files changed

+618
-0
lines changed
 

‎llvm/lib/Target/X86/X86ISelLowering.cpp

+15
Original file line numberDiff line numberDiff line change
@@ -32024,6 +32024,7 @@ X86TargetLowering::getConstraintType(StringRef Constraint) const {
3202432024
case 'u':
3202532025
case 'y':
3202632026
case 'x':
32027+
case 'v':
3202732028
case 'Y':
3202832029
case 'l':
3202932030
return C_RegisterClass;
@@ -32093,6 +32094,10 @@ TargetLowering::ConstraintWeight
3209332094
if (type->isX86_MMXTy() && Subtarget.hasMMX())
3209432095
weight = CW_SpecificReg;
3209532096
break;
32097+
case 'v':
32098+
if ((type->getPrimitiveSizeInBits() == 512) && Subtarget.hasAVX512())
32099+
weight = CW_Register;
32100+
LLVM_FALLTHROUGH;
3209632101
case 'x':
3209732102
case 'Y':
3209832103
if (((type->getPrimitiveSizeInBits() == 128) && Subtarget.hasSSE1()) ||
@@ -32429,17 +32434,23 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
3242932434
case 'Y': // SSE_REGS if SSE2 allowed
3243032435
if (!Subtarget.hasSSE2()) break;
3243132436
LLVM_FALLTHROUGH;
32437+
case 'v':
3243232438
case 'x': // SSE_REGS if SSE1 allowed or AVX_REGS if AVX allowed
3243332439
if (!Subtarget.hasSSE1()) break;
32440+
bool VConstraint = (Constraint[0] == 'v');
3243432441

3243532442
switch (VT.SimpleTy) {
3243632443
default: break;
3243732444
// Scalar SSE types.
3243832445
case MVT::f32:
3243932446
case MVT::i32:
32447+
if (VConstraint && Subtarget.hasAVX512() && Subtarget.hasVLX())
32448+
return std::make_pair(0U, &X86::FR32XRegClass);
3244032449
return std::make_pair(0U, &X86::FR32RegClass);
3244132450
case MVT::f64:
3244232451
case MVT::i64:
32452+
if (VConstraint && Subtarget.hasVLX())
32453+
return std::make_pair(0U, &X86::FR64XRegClass);
3244332454
return std::make_pair(0U, &X86::FR64RegClass);
3244432455
// TODO: Handle f128 and i128 in FR128RegClass after it is tested well.
3244532456
// Vector types.
@@ -32449,6 +32460,8 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
3244932460
case MVT::v2i64:
3245032461
case MVT::v4f32:
3245132462
case MVT::v2f64:
32463+
if (VConstraint && Subtarget.hasVLX())
32464+
return std::make_pair(0U, &X86::VR128XRegClass);
3245232465
return std::make_pair(0U, &X86::VR128RegClass);
3245332466
// AVX types.
3245432467
case MVT::v32i8:
@@ -32457,6 +32470,8 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
3245732470
case MVT::v4i64:
3245832471
case MVT::v8f32:
3245932472
case MVT::v4f64:
32473+
if (VConstraint && Subtarget.hasVLX())
32474+
return std::make_pair(0U, &X86::VR256XRegClass);
3246032475
return std::make_pair(0U, &X86::VR256RegClass);
3246132476
case MVT::v8f64:
3246232477
case MVT::v16f32:
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
; RUN: not llc < %s -mtriple i386-unknown-linux-gnu -mattr +avx 1> /dev/null 2> %t
2+
; RUN: FileCheck %s --input-file %t
3+
4+
define <4 x float> @testXMM_1(<4 x float> %_xmm0, i32 %_l) {
5+
; CHECK: error: inline assembly requires more registers than available
6+
entry:
7+
%0 = tail call <4 x float> asm "vmovhlps $1, $2, $0", "=v,v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{dirflag},~{fpsr},~{flags}"(i32 %_l, <4 x float> %_xmm0)
8+
ret <4 x float> %0
9+
}
10+
11+
define <4 x float> @testXMM_2(<4 x float> %_xmm0, i32 %_l) {
12+
; CHECK: error: inline assembly requires more registers than available
13+
entry:
14+
%0 = tail call <4 x float> asm "movapd $1, $0", "=v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{dirflag},~{fpsr},~{flags}"(i32 %_l)
15+
ret <4 x float> %0
16+
}
17+
18+
define <4 x float> @testXMM_3(<4 x float> %_xmm0, i32 %_l) {
19+
; CHECK: error: inline assembly requires more registers than available
20+
entry:
21+
%0 = tail call <4 x float> asm "vmovapd $1, $0", "=v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{dirflag},~{fpsr},~{flags}"(i32 %_l)
22+
ret <4 x float> %0
23+
}
24+
25+
define <4 x float> @testXMM_4(<4 x float> %_xmm0, i32 %_l) {
26+
; CHECK: error: inline assembly requires more registers than available
27+
entry:
28+
%0 = tail call <4 x float> asm "vmpsadbw $$0, $1, $2, $0", "=v,v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{dirflag},~{fpsr},~{flags}"(i32 %_l, <4 x float> %_xmm0)
29+
ret <4 x float> %0
30+
}
31+
32+
define <4 x float> @testXMM_5(<4 x float> %_xmm0, i32 %_l) {
33+
; CHECK: error: inline assembly requires more registers than available
34+
entry:
35+
%0 = tail call <4 x float> asm "vminpd $1, $2, $0", "=v,v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{dirflag},~{fpsr},~{flags}"(i32 %_l, i32 %_l)
36+
ret <4 x float> %0
37+
}
38+
39+
define i32 @testXMM_6(i32 returned %_l) {
40+
; CHECK: error: inline assembly requires more registers than available
41+
entry:
42+
tail call void asm sideeffect "vmovd $0, %eax", "v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{dirflag},~{fpsr},~{flags}"(i32 %_l)
43+
ret i32 %_l
44+
}
45+
46+
define <4 x float> @testXMM_7(<4 x float> returned %_xmm0) {
47+
; CHECK: error: inline assembly requires more registers than available
48+
entry:
49+
tail call void asm sideeffect "vmovmskps $0, %eax", "v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{dirflag},~{fpsr},~{flags}"(<4 x float> %_xmm0)
50+
ret <4 x float> %_xmm0
51+
}
52+
53+
define i32 @testXMM_8(<4 x float> %_xmm0, i32 %_l) {
54+
; CHECK: error: inline assembly requires more registers than available
55+
entry:
56+
%0 = tail call i32 asm "vmulsd $1, $2, $0", "=v,v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{dirflag},~{fpsr},~{flags}"(i32 %_l, <4 x float> %_xmm0)
57+
ret i32 %0
58+
}
59+
60+
define <4 x float> @testXMM_9(<4 x float> %_xmm0, i32 %_l) {
61+
; CHECK: error: inline assembly requires more registers than available
62+
entry:
63+
%0 = tail call <4 x float> asm "vorpd $1, $2, $0", "=v,v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{dirflag},~{fpsr},~{flags}"(i32 %_l, <4 x float> %_xmm0)
64+
ret <4 x float> %0
65+
}
66+
67+
define <4 x float> @testXMM_10(<4 x float> %_xmm0, i32 %_l) {
68+
; CHECK: error: inline assembly requires more registers than available
69+
entry:
70+
%0 = tail call <4 x float> asm "pabsb $1, $0", "=v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{dirflag},~{fpsr},~{flags}"(i32 %_l)
71+
ret <4 x float> %0
72+
}
73+
74+
define <4 x float> @testXMM_11(<4 x float> %_xmm0, i32 %_l) {
75+
; CHECK: error: inline assembly requires more registers than available
76+
entry:
77+
%0 = tail call <4 x float> asm "vpabsd $1, $0", "=v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{dirflag},~{fpsr},~{flags}"(i32 %_l)
78+
ret <4 x float> %0
79+
}
80+
81+
define <8 x float> @testYMM_1(<8 x float> %_ymm0, <8 x float> %_ymm1) {
82+
; CHECK: error: inline assembly requires more registers than available
83+
entry:
84+
%0 = tail call <8 x float> asm "vmovsldup $1, $0", "=v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm0)
85+
ret <8 x float> %0
86+
}
87+
88+
define <8 x float> @testYMM_2(<8 x float> %_ymm0, <8 x float> %_ymm1) {
89+
; CHECK: error: inline assembly requires more registers than available
90+
entry:
91+
%0 = tail call <8 x float> asm "vmovapd $1, $0", "=v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1)
92+
ret <8 x float> %0
93+
}
94+
95+
define <8 x float> @testYMM_3(<8 x float> %_ymm0, <8 x float> %_ymm1) {
96+
; CHECK: error: inline assembly requires more registers than available
97+
entry:
98+
%0 = tail call <8 x float> asm "vminpd $1, $2, $0", "=v,v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1, <8 x float> %_ymm0)
99+
ret <8 x float> %0
100+
}
101+
102+
define <8 x float> @testYMM_4(<8 x float> %_ymm0, <8 x float> %_ymm1) {
103+
; CHECK: error: inline assembly requires more registers than available
104+
entry:
105+
%0 = tail call <8 x float> asm "vorpd $1, $2, $0", "=v,v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1, <8 x float> %_ymm0)
106+
ret <8 x float> %0
107+
}
108+
109+
define <8 x float> @testYMM(<8 x float> %_ymm0, <8 x float> %_ymm1) {
110+
; CHECK: error: inline assembly requires more registers than available
111+
entry:
112+
%0 = tail call <8 x float> asm "vmulps $1, $2, $0", "=v,v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1, <8 x float> %_ymm0)
113+
ret <8 x float> %0
114+
}
115+
116+
define <8 x float> @testYMM_6(<8 x float> %_ymm0, <8 x float> %_ymm1) {
117+
; CHECK: error: inline assembly requires more registers than available
118+
entry:
119+
%0 = tail call <8 x float> asm "vmulpd $1, $2, $0", "=v,v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1, <8 x float> %_ymm0)
120+
ret <8 x float> %0
121+
}
122+
123+
define <8 x float> @testYMM_7(<8 x float> %_ymm0, <8 x float> %_ymm1) {
124+
; CHECK: error: inline assembly requires more registers than available
125+
entry:
126+
%0 = tail call <8 x float> asm "vmovups $1, $0", "=v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1)
127+
ret <8 x float> %0
128+
}
129+
130+
define <8 x float> @testYMM_8(<8 x float> %_ymm0, <8 x float> %_ymm1) {
131+
; CHECK: error: inline assembly requires more registers than available
132+
entry:
133+
%0 = tail call <8 x float> asm "vmovupd $1, $0", "=v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1)
134+
ret <8 x float> %0
135+
}
136+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
; RUN: llc < %s -march x86-64 -mtriple x86_64-unknown-linux-gnu -mattr +avx | FileCheck %s
2+
; RUN: llc < %s -march x86-64 -mtriple x86_64-unknown-linux-gnu -mattr +avx512f | FileCheck %s
3+
4+
define <4 x float> @testXMM_1(<4 x float> %_xmm0, i64 %_l) {
5+
; CHECK: vmovhlps %xmm1, %xmm0, %xmm0
6+
entry:
7+
%0 = tail call <4 x float> asm "vmovhlps $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(i64 %_l, <4 x float> %_xmm0)
8+
ret <4 x float> %0
9+
}
10+
11+
define <4 x float> @testXMM_2(<4 x float> %_xmm0, i64 %_l) {
12+
; CHECK: movapd %xmm0, %xmm0
13+
entry:
14+
%0 = tail call <4 x float> asm "movapd $1, $0", "=v,v,~{dirflag},~{fpsr},~{flags}"(i64 %_l)
15+
ret <4 x float> %0
16+
}
17+
18+
define <4 x float> @testXMM_3(<4 x float> %_xmm0, i64 %_l) {
19+
; CHECK: vmovapd %xmm0, %xmm0
20+
entry:
21+
%0 = tail call <4 x float> asm "vmovapd $1, $0", "=v,v,~{dirflag},~{fpsr},~{flags}"(i64 %_l)
22+
ret <4 x float> %0
23+
}
24+
25+
define <4 x float> @testXMM_4(<4 x float> %_xmm0, i64 %_l) {
26+
; CHECK: vmpsadbw $0, %xmm1, %xmm0, %xmm0
27+
entry:
28+
%0 = tail call <4 x float> asm "vmpsadbw $$0, $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(i64 %_l, <4 x float> %_xmm0)
29+
ret <4 x float> %0
30+
}
31+
32+
define <4 x float> @testXMM_5(<4 x float> %_xmm0, i64 %_l) {
33+
; CHECK: vminpd %xmm0, %xmm0, %xmm0
34+
entry:
35+
%0 = tail call <4 x float> asm "vminpd $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(i64 %_l, i64 %_l)
36+
ret <4 x float> %0
37+
}
38+
39+
define i64 @testXMM_6(i64 returned %_l) {
40+
; CHECK: vmovd %xmm0, %eax
41+
entry:
42+
tail call void asm sideeffect "vmovd $0, %eax", "v,~{dirflag},~{fpsr},~{flags}"(i64 %_l)
43+
ret i64 %_l
44+
}
45+
46+
define <4 x float> @testXMM_7(<4 x float> returned %_xmm0) {
47+
; CHECK: vmovmskps %xmm0, %eax
48+
entry:
49+
tail call void asm sideeffect "vmovmskps $0, %rax", "v,~{dirflag},~{fpsr},~{flags}"(<4 x float> %_xmm0)
50+
ret <4 x float> %_xmm0
51+
}
52+
53+
define i64 @testXMM_8(<4 x float> %_xmm0, i64 %_l) {
54+
; CHECK: vmulsd %xmm1, %xmm0, %xmm0
55+
entry:
56+
%0 = tail call i64 asm "vmulsd $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(i64 %_l, <4 x float> %_xmm0)
57+
ret i64 %0
58+
}
59+
60+
define <4 x float> @testXMM_9(<4 x float> %_xmm0, i64 %_l) {
61+
; CHECK: vorpd %xmm1, %xmm0, %xmm0
62+
entry:
63+
%0 = tail call <4 x float> asm "vorpd $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(i64 %_l, <4 x float> %_xmm0)
64+
ret <4 x float> %0
65+
}
66+
67+
define <4 x float> @testXMM_10(<4 x float> %_xmm0, i64 %_l) {
68+
; CHECK: pabsb %xmm0, %xmm0
69+
entry:
70+
%0 = tail call <4 x float> asm "pabsb $1, $0", "=v,v,~{dirflag},~{fpsr},~{flags}"(i64 %_l)
71+
ret <4 x float> %0
72+
}
73+
74+
define <4 x float> @testXMM_11(<4 x float> %_xmm0, i64 %_l) {
75+
; CHECK: vpabsd %xmm0, %xmm0
76+
entry:
77+
%0 = tail call <4 x float> asm "vpabsd $1, $0", "=v,v,~{dirflag},~{fpsr},~{flags}"(i64 %_l)
78+
ret <4 x float> %0
79+
}
80+
81+
define <8 x float> @testYMM_1(<8 x float> %_ymm0, <8 x float> %_ymm1) {
82+
; CHECK: vmovsldup %ymm0, %ymm0
83+
entry:
84+
%0 = tail call <8 x float> asm "vmovsldup $1, $0", "=v,v,~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm0)
85+
ret <8 x float> %0
86+
}
87+
88+
define <8 x float> @testYMM_2(<8 x float> %_ymm0, <8 x float> %_ymm1) {
89+
; CHECK: vmovapd %ymm1, %ymm0
90+
entry:
91+
%0 = tail call <8 x float> asm "vmovapd $1, $0", "=v,v,~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1)
92+
ret <8 x float> %0
93+
}
94+
95+
define <8 x float> @testYMM_3(<8 x float> %_ymm0, <8 x float> %_ymm1) {
96+
; CHECK: vminpd %ymm1, %ymm0, %ymm0
97+
entry:
98+
%0 = tail call <8 x float> asm "vminpd $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1, <8 x float> %_ymm0)
99+
ret <8 x float> %0
100+
}
101+
102+
define <8 x float> @testYMM_4(<8 x float> %_ymm0, <8 x float> %_ymm1) {
103+
; CHECK: vorpd %ymm1, %ymm0, %ymm0
104+
entry:
105+
%0 = tail call <8 x float> asm "vorpd $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1, <8 x float> %_ymm0)
106+
ret <8 x float> %0
107+
}
108+
109+
define <8 x float> @testYMM(<8 x float> %_ymm0, <8 x float> %_ymm1) {
110+
; CHECK: vmulps %ymm1, %ymm0, %ymm0
111+
entry:
112+
%0 = tail call <8 x float> asm "vmulps $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1, <8 x float> %_ymm0)
113+
ret <8 x float> %0
114+
}
115+
116+
define <8 x float> @testYMM_6(<8 x float> %_ymm0, <8 x float> %_ymm1) {
117+
; CHECK: vmulpd %ymm1, %ymm0, %ymm0
118+
entry:
119+
%0 = tail call <8 x float> asm "vmulpd $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1, <8 x float> %_ymm0)
120+
ret <8 x float> %0
121+
}
122+
123+
define <8 x float> @testYMM_7(<8 x float> %_ymm0, <8 x float> %_ymm1) {
124+
; CHECK: vmovups %ymm1, %ymm0
125+
entry:
126+
%0 = tail call <8 x float> asm "vmovups $1, $0", "=v,v,~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1)
127+
ret <8 x float> %0
128+
}
129+
130+
define <8 x float> @testYMM_8(<8 x float> %_ymm0, <8 x float> %_ymm1) {
131+
; CHECK: vmovupd %ymm1, %ymm0
132+
entry:
133+
%0 = tail call <8 x float> asm "vmovupd $1, $0", "=v,v,~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1)
134+
ret <8 x float> %0
135+
}
136+

0 commit comments

Comments
 (0)
Please sign in to comment.