Skip to content

Commit a57d64e

Browse files
committedFeb 10, 2018
[X86] Change the signature of the AVX512 packed fp compare intrinsics to return vXi1 mask. Make bitcasts to scalar explicit in IR
Summary: This is the clang equivalent of r324827 Reviewers: zvi, delena, RKSimon, spatel Reviewed By: RKSimon Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D43143 llvm-svn: 324828
1 parent 4dccffc commit a57d64e

File tree

3 files changed

+88
-33
lines changed

3 files changed

+88
-33
lines changed
 

‎clang/lib/CodeGen/CGBuiltin.cpp

+64-17
Original file line numberDiff line numberDiff line change
@@ -8060,6 +8060,29 @@ static Value *EmitX86Select(CodeGenFunction &CGF,
80608060
return CGF.Builder.CreateSelect(Mask, Op0, Op1);
80618061
}
80628062

8063+
static Value *EmitX86MaskedCompareResult(CodeGenFunction &CGF, Value *Cmp,
8064+
unsigned NumElts, Value *MaskIn) {
8065+
if (MaskIn) {
8066+
const auto *C = dyn_cast<Constant>(MaskIn);
8067+
if (!C || !C->isAllOnesValue())
8068+
Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, MaskIn, NumElts));
8069+
}
8070+
8071+
if (NumElts < 8) {
8072+
uint32_t Indices[8];
8073+
for (unsigned i = 0; i != NumElts; ++i)
8074+
Indices[i] = i;
8075+
for (unsigned i = NumElts; i != 8; ++i)
8076+
Indices[i] = i % NumElts + NumElts;
8077+
Cmp = CGF.Builder.CreateShuffleVector(
8078+
Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
8079+
}
8080+
8081+
return CGF.Builder.CreateBitCast(Cmp,
8082+
IntegerType::get(CGF.getLLVMContext(),
8083+
std::max(NumElts, 8U)));
8084+
}
8085+
80638086
static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
80648087
bool Signed, ArrayRef<Value *> Ops) {
80658088
assert((Ops.size() == 2 || Ops.size() == 4) &&
@@ -8087,24 +8110,11 @@ static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
80878110
Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
80888111
}
80898112

8090-
if (Ops.size() == 4) {
8091-
const auto *C = dyn_cast<Constant>(Ops[3]);
8092-
if (!C || !C->isAllOnesValue())
8093-
Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, Ops[3], NumElts));
8094-
}
8113+
Value *MaskIn = nullptr;
8114+
if (Ops.size() == 4)
8115+
MaskIn = Ops[3];
80958116

8096-
if (NumElts < 8) {
8097-
uint32_t Indices[8];
8098-
for (unsigned i = 0; i != NumElts; ++i)
8099-
Indices[i] = i;
8100-
for (unsigned i = NumElts; i != 8; ++i)
8101-
Indices[i] = i % NumElts + NumElts;
8102-
Cmp = CGF.Builder.CreateShuffleVector(
8103-
Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
8104-
}
8105-
return CGF.Builder.CreateBitCast(Cmp,
8106-
IntegerType::get(CGF.getLLVMContext(),
8107-
std::max(NumElts, 8U)));
8117+
return EmitX86MaskedCompareResult(CGF, Cmp, NumElts, MaskIn);
81088118
}
81098119

81108120
static Value *EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In) {
@@ -8882,6 +8892,43 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
88828892
return Builder.CreateExtractValue(Call, 1);
88838893
}
88848894

8895+
case X86::BI__builtin_ia32_cmpps128_mask:
8896+
case X86::BI__builtin_ia32_cmpps256_mask:
8897+
case X86::BI__builtin_ia32_cmpps512_mask:
8898+
case X86::BI__builtin_ia32_cmppd128_mask:
8899+
case X86::BI__builtin_ia32_cmppd256_mask:
8900+
case X86::BI__builtin_ia32_cmppd512_mask: {
8901+
unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
8902+
Value *MaskIn = Ops[3];
8903+
Ops.erase(&Ops[3]);
8904+
8905+
Intrinsic::ID ID;
8906+
switch (BuiltinID) {
8907+
default: llvm_unreachable("Unsupported intrinsic!");
8908+
case X86::BI__builtin_ia32_cmpps128_mask:
8909+
ID = Intrinsic::x86_avx512_mask_cmp_ps_128;
8910+
break;
8911+
case X86::BI__builtin_ia32_cmpps256_mask:
8912+
ID = Intrinsic::x86_avx512_mask_cmp_ps_256;
8913+
break;
8914+
case X86::BI__builtin_ia32_cmpps512_mask:
8915+
ID = Intrinsic::x86_avx512_mask_cmp_ps_512;
8916+
break;
8917+
case X86::BI__builtin_ia32_cmppd128_mask:
8918+
ID = Intrinsic::x86_avx512_mask_cmp_pd_128;
8919+
break;
8920+
case X86::BI__builtin_ia32_cmppd256_mask:
8921+
ID = Intrinsic::x86_avx512_mask_cmp_pd_256;
8922+
break;
8923+
case X86::BI__builtin_ia32_cmppd512_mask:
8924+
ID = Intrinsic::x86_avx512_mask_cmp_pd_512;
8925+
break;
8926+
}
8927+
8928+
Value *Cmp = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
8929+
return EmitX86MaskedCompareResult(*this, Cmp, NumElts, MaskIn);
8930+
}
8931+
88858932
// SSE packed comparison intrinsics
88868933
case X86::BI__builtin_ia32_cmpeqps:
88878934
case X86::BI__builtin_ia32_cmpeqpd:

‎clang/test/CodeGen/avx512f-builtins.c

+12-8
Original file line numberDiff line numberDiff line change
@@ -1001,49 +1001,53 @@ __m512 test_mm512_unpacklo_ps(__m512 a, __m512 b)
10011001

10021002
__mmask16 test_mm512_cmp_round_ps_mask(__m512 a, __m512 b) {
10031003
// CHECK-LABEL: @test_mm512_cmp_round_ps_mask
1004-
// CHECK: @llvm.x86.avx512.mask.cmp.ps.512
1004+
// CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512
10051005
return _mm512_cmp_round_ps_mask(a, b, 0, _MM_FROUND_CUR_DIRECTION);
10061006
}
10071007

10081008
__mmask16 test_mm512_mask_cmp_round_ps_mask(__mmask16 m, __m512 a, __m512 b) {
10091009
// CHECK-LABEL: @test_mm512_mask_cmp_round_ps_mask
1010-
// CHECK: @llvm.x86.avx512.mask.cmp.ps.512
1010+
// CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512
1011+
// CHECK: and <16 x i1> [[CMP]], {{.*}}
10111012
return _mm512_mask_cmp_round_ps_mask(m, a, b, 0, _MM_FROUND_CUR_DIRECTION);
10121013
}
10131014

10141015
__mmask16 test_mm512_cmp_ps_mask(__m512 a, __m512 b) {
10151016
// CHECK-LABEL: @test_mm512_cmp_ps_mask
1016-
// CHECKn: @llvm.x86.avx512.mask.cmp.ps.512
1017+
// CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512
10171018
return _mm512_cmp_ps_mask(a, b, 0);
10181019
}
10191020

10201021
__mmask16 test_mm512_mask_cmp_ps_mask(__mmask16 m, __m512 a, __m512 b) {
10211022
// CHECK-LABEL: @test_mm512_mask_cmp_ps_mask
1022-
// CHECK: @llvm.x86.avx512.mask.cmp.ps.512
1023+
// CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512
1024+
// CHECK: and <16 x i1> [[CMP]], {{.*}}
10231025
return _mm512_mask_cmp_ps_mask(m, a, b, 0);
10241026
}
10251027

10261028
__mmask8 test_mm512_cmp_round_pd_mask(__m512d a, __m512d b) {
10271029
// CHECK-LABEL: @test_mm512_cmp_round_pd_mask
1028-
// CHECK: @llvm.x86.avx512.mask.cmp.pd.512
1030+
// CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512
10291031
return _mm512_cmp_round_pd_mask(a, b, 0, _MM_FROUND_CUR_DIRECTION);
10301032
}
10311033

10321034
__mmask8 test_mm512_mask_cmp_round_pd_mask(__mmask8 m, __m512d a, __m512d b) {
10331035
// CHECK-LABEL: @test_mm512_mask_cmp_round_pd_mask
1034-
// CHECK: @llvm.x86.avx512.mask.cmp.pd.512
1036+
// CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512
1037+
// CHECK: and <8 x i1> [[CMP]], {{.*}}
10351038
return _mm512_mask_cmp_round_pd_mask(m, a, b, 0, _MM_FROUND_CUR_DIRECTION);
10361039
}
10371040

10381041
__mmask8 test_mm512_cmp_pd_mask(__m512d a, __m512d b) {
10391042
// CHECK-LABEL: @test_mm512_cmp_pd_mask
1040-
// CHECK: @llvm.x86.avx512.mask.cmp.pd.512
1043+
// CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512
10411044
return _mm512_cmp_pd_mask(a, b, 0);
10421045
}
10431046

10441047
__mmask8 test_mm512_mask_cmp_pd_mask(__mmask8 m, __m512d a, __m512d b) {
10451048
// CHECK-LABEL: @test_mm512_mask_cmp_pd_mask
1046-
// CHECK: @llvm.x86.avx512.mask.cmp.pd.512
1049+
// CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512
1050+
// CHECK: and <8 x i1> [[CMP]], {{.*}}
10471051
return _mm512_mask_cmp_pd_mask(m, a, b, 0);
10481052
}
10491053

‎clang/test/CodeGen/avx512vl-builtins.c

+12-8
Original file line numberDiff line numberDiff line change
@@ -1049,49 +1049,53 @@ __m128i test_mm_maskz_xor_epi64 (__mmask8 __U, __m128i __A, __m128i __B) {
10491049

10501050
__mmask8 test_mm256_cmp_ps_mask(__m256 __A, __m256 __B) {
10511051
// CHECK-LABEL: @test_mm256_cmp_ps_mask
1052-
// CHECK: @llvm.x86.avx512.mask.cmp.ps.256
1052+
// CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256
10531053
return (__mmask8)_mm256_cmp_ps_mask(__A, __B, 0);
10541054
}
10551055

10561056
__mmask8 test_mm256_mask_cmp_ps_mask(__mmask8 m, __m256 __A, __m256 __B) {
10571057
// CHECK-LABEL: @test_mm256_mask_cmp_ps_mask
1058-
// CHECK: @llvm.x86.avx512.mask.cmp.ps.256
1058+
// CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256
1059+
// CHECK: and <8 x i1> [[CMP]], {{.*}}
10591060
return _mm256_mask_cmp_ps_mask(m, __A, __B, 0);
10601061
}
10611062

10621063
__mmask8 test_mm_cmp_ps_mask(__m128 __A, __m128 __B) {
10631064
// CHECK-LABEL: @test_mm_cmp_ps_mask
1064-
// CHECK: @llvm.x86.avx512.mask.cmp.ps.128
1065+
// CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128
10651066
return (__mmask8)_mm_cmp_ps_mask(__A, __B, 0);
10661067
}
10671068

10681069
__mmask8 test_mm_mask_cmp_ps_mask(__mmask8 m, __m128 __A, __m128 __B) {
10691070
// CHECK-LABEL: @test_mm_mask_cmp_ps_mask
1070-
// CHECK: @llvm.x86.avx512.mask.cmp.ps.128
1071+
// CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128
1072+
// CHECK: and <4 x i1> [[CMP]], {{.*}}
10711073
return _mm_mask_cmp_ps_mask(m, __A, __B, 0);
10721074
}
10731075

10741076
__mmask8 test_mm256_cmp_pd_mask(__m256d __A, __m256d __B) {
10751077
// CHECK-LABEL: @test_mm256_cmp_pd_mask
1076-
// CHECK: @llvm.x86.avx512.mask.cmp.pd.256
1078+
// CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256
10771079
return (__mmask8)_mm256_cmp_pd_mask(__A, __B, 0);
10781080
}
10791081

10801082
__mmask8 test_mm256_mask_cmp_pd_mask(__mmask8 m, __m256d __A, __m256d __B) {
10811083
// CHECK-LABEL: @test_mm256_mask_cmp_pd_mask
1082-
// CHECK: @llvm.x86.avx512.mask.cmp.pd.256
1084+
// CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256
1085+
// CHECK: and <4 x i1> [[CMP]], {{.*}}
10831086
return _mm256_mask_cmp_pd_mask(m, __A, __B, 0);
10841087
}
10851088

10861089
__mmask8 test_mm_cmp_pd_mask(__m128d __A, __m128d __B) {
10871090
// CHECK-LABEL: @test_mm_cmp_pd_mask
1088-
// CHECK: @llvm.x86.avx512.mask.cmp.pd.128
1091+
// CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128
10891092
return (__mmask8)_mm_cmp_pd_mask(__A, __B, 0);
10901093
}
10911094

10921095
__mmask8 test_mm_mask_cmp_pd_mask(__mmask8 m, __m128d __A, __m128d __B) {
10931096
// CHECK-LABEL: @test_mm_mask_cmp_pd_mask
1094-
// CHECK: @llvm.x86.avx512.mask.cmp.pd.128
1097+
// CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128
1098+
// CHECK: and <2 x i1> [[CMP]], {{.*}}
10951099
return _mm_mask_cmp_pd_mask(m, __A, __B, 0);
10961100
}
10971101

0 commit comments

Comments
 (0)
Please sign in to comment.