Skip to content

Commit 24e6840

Browse files
author
Erich Keane
committedFeb 2, 2018
[CodeGen][va_args] Correct Vector Struct va-arg 'in_reg' code gen
When trying to track down a different bug, we discovered that calling __builtin_va_arg on a vec3f type caused the SROA pass to issue a warning that there was an illegal access. Further research showed that the vec3f type is alloca'ed as size '12', but the _builtin_va_arg code on x86_64 was always loading this out of registers as {double, double}. Thus, the 2nd store into the vec3f was storing in bytes 12-15! This patch alters the original implementation which always assumed {double, double} to use the actual coerced type instead, so the LLVM-IR generated is a load/GEP/store of a <2 x float> and a float, rather than a double and a double. Tests were added for all combinations I could think of that would fit in 2 FP registers, and all work exactly as expected. Differential Revision: https://reviews.llvm.org/D42811 llvm-svn: 324098
1 parent b4ab4b6 commit 24e6840

File tree

2 files changed

+138
-6
lines changed

2 files changed

+138
-6
lines changed
 

‎clang/lib/CodeGen/TargetInfo.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3790,17 +3790,18 @@ Address X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
37903790
Address RegAddrHi =
37913791
CGF.Builder.CreateConstInBoundsByteGEP(RegAddrLo,
37923792
CharUnits::fromQuantity(16));
3793-
llvm::Type *DoubleTy = CGF.DoubleTy;
3794-
llvm::StructType *ST = llvm::StructType::get(DoubleTy, DoubleTy);
3793+
llvm::Type *ST = AI.canHaveCoerceToType()
3794+
? AI.getCoerceToType()
3795+
: llvm::StructType::get(CGF.DoubleTy, CGF.DoubleTy);
37953796
llvm::Value *V;
37963797
Address Tmp = CGF.CreateMemTemp(Ty);
37973798
Tmp = CGF.Builder.CreateElementBitCast(Tmp, ST);
3798-
V = CGF.Builder.CreateLoad(
3799-
CGF.Builder.CreateElementBitCast(RegAddrLo, DoubleTy));
3799+
V = CGF.Builder.CreateLoad(CGF.Builder.CreateElementBitCast(
3800+
RegAddrLo, ST->getStructElementType(0)));
38003801
CGF.Builder.CreateStore(V,
38013802
CGF.Builder.CreateStructGEP(Tmp, 0, CharUnits::Zero()));
3802-
V = CGF.Builder.CreateLoad(
3803-
CGF.Builder.CreateElementBitCast(RegAddrHi, DoubleTy));
3803+
V = CGF.Builder.CreateLoad(CGF.Builder.CreateElementBitCast(
3804+
RegAddrHi, ST->getStructElementType(1)));
38043805
CGF.Builder.CreateStore(V,
38053806
CGF.Builder.CreateStructGEP(Tmp, 1, CharUnits::fromQuantity(8)));
38063807

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s | \
2+
// RUN: FileCheck %s
3+
4+
// This test validates that the inreg branch generation for __builtin_va_arg
5+
// does not exceed the alloca size of the type, which can cause the SROA pass to
6+
// eliminate the assignment.
7+
8+
typedef struct { float x, y, z; } vec3f;
9+
10+
double Vec3FTest(__builtin_va_list ap) {
11+
vec3f vec = __builtin_va_arg(ap, vec3f);
12+
return vec.x + vec.y + vec.z;
13+
}
14+
// CHECK: define double @Vec3FTest
15+
// CHECK: vaarg.in_reg:
16+
// CHECK: [[Vec3FLoad1:%.*]] = load <2 x float>, <2 x float>*
17+
// CHECK: [[Vec3FGEP1:%.*]] = getelementptr inbounds { <2 x float>, float }, { <2 x float>, float }* {{%.*}}, i32 0, i32 0
18+
// CHECK: store <2 x float> [[Vec3FLoad1]], <2 x float>* [[Vec3FGEP1]]
19+
// CHECK: [[Vec3FLoad2:%.*]] = load float, float*
20+
// CHECK: [[Vec3FGEP2:%.*]] = getelementptr inbounds { <2 x float>, float }, { <2 x float>, float }* {{%.*}}, i32 0, i32 1
21+
// CHECK: store float [[Vec3FLoad2]], float* [[Vec3FGEP2]]
22+
// CHECK: vaarg.in_mem:
23+
24+
25+
typedef struct { float x, y, z, q; } vec4f;
26+
27+
double Vec4FTest(__builtin_va_list ap) {
28+
vec4f vec = __builtin_va_arg(ap, vec4f);
29+
return vec.x + vec.y + vec.z + vec.q;
30+
}
31+
// CHECK: define double @Vec4FTest
32+
// CHECK: vaarg.in_reg:
33+
// CHECK: [[Vec4FLoad1:%.*]] = load <2 x float>, <2 x float>*
34+
// CHECK: [[Vec4FGEP1:%.*]] = getelementptr inbounds { <2 x float>, <2 x float> }, { <2 x float>, <2 x float> }* {{%.*}}, i32 0, i32 0
35+
// CHECK: store <2 x float> [[Vec4FLoad1]], <2 x float>* [[Vec4FGEP1]]
36+
// CHECK: [[Vec4FLoad2:%.*]] = load <2 x float>, <2 x float>*
37+
// CHECK: [[Vec4FGEP2:%.*]] = getelementptr inbounds { <2 x float>, <2 x float> }, { <2 x float>, <2 x float> }* {{%.*}}, i32 0, i32 1
38+
// CHECK: store <2 x float> [[Vec4FLoad2]], <2 x float>* [[Vec4FGEP2]]
39+
// CHECK: vaarg.in_mem:
40+
41+
typedef struct { double x, y; } vec2d;
42+
43+
double Vec2DTest(__builtin_va_list ap) {
44+
vec2d vec = __builtin_va_arg(ap, vec2d);
45+
return vec.x + vec.y;
46+
}
47+
// CHECK: define double @Vec2DTest
48+
// CHECK: vaarg.in_reg:
49+
// CHECK: [[Vec2DLoad1:%.*]] = load double, double*
50+
// CHECK: [[Vec2DGEP1:%.*]] = getelementptr inbounds { double, double }, { double, double }* {{%.*}}, i32 0, i32 0
51+
// CHECK: store double [[Vec2DLoad1]], double* [[Vec2DGEP1]]
52+
// CHECK: [[Vec2DLoad2:%.*]] = load double, double*
53+
// CHECK: [[Vec2DGEP2:%.*]] = getelementptr inbounds { double, double }, { double, double }* {{%.*}}, i32 0, i32 1
54+
// CHECK: store double [[Vec2DLoad2]], double* [[Vec2DGEP2]]
55+
// CHECK: vaarg.in_mem:
56+
57+
typedef struct {
58+
float x, y;
59+
double z;
60+
} vec2f1d;
61+
62+
double Vec2F1DTest(__builtin_va_list ap) {
63+
vec2f1d vec = __builtin_va_arg(ap, vec2f1d);
64+
return vec.x + vec.y + vec.z;
65+
}
66+
// CHECK: define double @Vec2F1DTest
67+
// CHECK: vaarg.in_reg:
68+
// CHECK: [[Vec2F1DLoad1:%.*]] = load <2 x float>, <2 x float>*
69+
// CHECK: [[Vec2F1DGEP1:%.*]] = getelementptr inbounds { <2 x float>, double }, { <2 x float>, double }* {{%.*}}, i32 0, i32 0
70+
// CHECK: store <2 x float> [[Vec2F1DLoad1]], <2 x float>* [[Vec2F1DGEP1]]
71+
// CHECK: [[Vec2F1DLoad2:%.*]] = load double, double*
72+
// CHECK: [[Vec2F1DGEP2:%.*]] = getelementptr inbounds { <2 x float>, double }, { <2 x float>, double }* {{%.*}}, i32 0, i32 1
73+
// CHECK: store double [[Vec2F1DLoad2]], double* [[Vec2F1DGEP2]]
74+
// CHECK: vaarg.in_mem:
75+
76+
typedef struct {
77+
double x;
78+
float y, z;
79+
} vec1d2f;
80+
81+
double Vec1D2FTest(__builtin_va_list ap) {
82+
vec1d2f vec = __builtin_va_arg(ap, vec1d2f);
83+
return vec.x + vec.y + vec.z;
84+
}
85+
// CHECK: define double @Vec1D2FTest
86+
// CHECK: vaarg.in_reg:
87+
// CHECK: [[Vec1D2FLoad1:%.*]] = load double, double*
88+
// CHECK: [[Vec1D2FGEP1:%.*]] = getelementptr inbounds { double, <2 x float> }, { double, <2 x float> }* {{%.*}}, i32 0, i32 0
89+
// CHECK: store double [[Vec1D2FLoad1]], double* [[Vec1D2FGEP1]]
90+
// CHECK: [[Vec1D2FLoad2:%.*]] = load <2 x float>, <2 x float>*
91+
// CHECK: [[Vec1D2FGEP2:%.*]] = getelementptr inbounds { double, <2 x float> }, { double, <2 x float> }* {{%.*}}, i32 0, i32 1
92+
// CHECK: store <2 x float> [[Vec1D2FLoad2]], <2 x float>* [[Vec1D2FGEP2]]
93+
// CHECK: vaarg.in_mem:
94+
95+
typedef struct {
96+
float x;
97+
double z;
98+
} vec1f1d;
99+
100+
double Vec1F1DTest(__builtin_va_list ap) {
101+
vec1f1d vec = __builtin_va_arg(ap, vec1f1d);
102+
return vec.x + vec.z;
103+
}
104+
// CHECK: define double @Vec1F1DTest
105+
// CHECK: vaarg.in_reg:
106+
// CHECK: [[Vec1F1DLoad1:%.*]] = load float, float*
107+
// CHECK: [[Vec1F1DGEP1:%.*]] = getelementptr inbounds { float, double }, { float, double }* {{%.*}}, i32 0, i32 0
108+
// CHECK: store float [[Vec1F1DLoad1]], float* [[Vec1F1DGEP1]]
109+
// CHECK: [[Vec1F1DLoad2:%.*]] = load double, double*
110+
// CHECK: [[Vec1F1DGEP2:%.*]] = getelementptr inbounds { float, double }, { float, double }* {{%.*}}, i32 0, i32 1
111+
// CHECK: store double [[Vec1F1DLoad2]], double* [[Vec1F1DGEP2]]
112+
// CHECK: vaarg.in_mem:
113+
114+
typedef struct {
115+
double x;
116+
float z;
117+
} vec1d1f;
118+
119+
double Vec1D1FTest(__builtin_va_list ap) {
120+
vec1d1f vec = __builtin_va_arg(ap, vec1d1f);
121+
return vec.x + vec.z;
122+
}
123+
// CHECK: define double @Vec1D1FTest
124+
// CHECK: vaarg.in_reg:
125+
// CHECK: [[Vec1D1FLoad1:%.*]] = load double, double*
126+
// CHECK: [[Vec1D1FGEP1:%.*]] = getelementptr inbounds { double, float }, { double, float }* {{%.*}}, i32 0, i32 0
127+
// CHECK: store double [[Vec1D1FLoad1]], double* [[Vec1D1FGEP1]]
128+
// CHECK: [[Vec1D1FLoad2:%.*]] = load float, float*
129+
// CHECK: [[Vec1D1FGEP2:%.*]] = getelementptr inbounds { double, float }, { double, float }* {{%.*}}, i32 0, i32 1
130+
// CHECK: store float [[Vec1D1FLoad2]], float* [[Vec1D1FGEP2]]
131+
// CHECK: vaarg.in_mem:

0 commit comments

Comments
 (0)
Please sign in to comment.