Index: lib/CodeGen/CGBuiltin.cpp =================================================================== --- lib/CodeGen/CGBuiltin.cpp +++ lib/CodeGen/CGBuiltin.cpp @@ -7485,11 +7485,34 @@ static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, llvm::Type *DstTy) { - unsigned NumberOfElements = DstTy->getVectorNumElements(); + unsigned NumberOfElements = DstTy->getVectorNumElements(); Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements); return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2"); } +static Value *EmitX86Mask(CodeGenFunction &CGF, ArrayRef Ops) { + llvm::Type *Typ1 = Ops[0]->getType(); + Value *Cmp = CGF.Builder.CreateICmp(CmpInst::Predicate::ICMP_SLT, Ops[0], + llvm::ConstantVector::getNullValue(Typ1)); + Value *BitCast = CGF.Builder.CreateBitCast( + Cmp, llvm::Type::getIntNTy(CGF.Builder.getContext(), + Typ1->getVectorNumElements())); + return (Typ1->getVectorNumElements() < 32) + ? CGF.Builder.CreateZExt( + BitCast, llvm::Type::getInt32Ty(CGF.getLLVMContext())) + : BitCast; +} + +static Value *EmitX86MaskFloat(CodeGenFunction &CGF, ArrayRef Ops) { + Value *FloatOp = Ops[0]; + unsigned DstTypEle = FloatOp->getType()->getVectorNumElements(); + llvm::Type *IntTy = llvm::Type::getIntNTy( + CGF.Builder.getContext(), FloatOp->getType()->getScalarSizeInBits()); + llvm::Type *DstTyp = llvm::VectorType::get(IntTy, DstTypEle); + Value *BitCastFloatToInt = CGF.Builder.CreateBitCast(FloatOp, DstTyp); + return EmitX86Mask(CGF, BitCastFloatToInt); +} + Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) { const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts(); StringRef CPUStr = cast(CPUExpr)->getString(); @@ -7860,6 +7883,14 @@ case X86::BI__builtin_ia32_cvtmask2q512: return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType())); + case X86::BI__builtin_ia32_pmovmskb128: + case X86::BI__builtin_ia32_pmovmskb256: + return EmitX86Mask(*this, Ops); + case X86::BI__builtin_ia32_movmskps256: + case X86::BI__builtin_ia32_movmskpd256: + case X86::BI__builtin_ia32_movmskps: + case X86::BI__builtin_ia32_movmskpd: + return EmitX86MaskFloat(*this, Ops); case X86::BI__builtin_ia32_movdqa32store128_mask: case X86::BI__builtin_ia32_movdqa64store128_mask: case X86::BI__builtin_ia32_storeaps128_mask: Index: test/CodeGen/avx-builtins.c =================================================================== --- test/CodeGen/avx-builtins.c +++ test/CodeGen/avx-builtins.c @@ -611,13 +611,20 @@ int test_mm256_movemask_pd(__m256d A) { // CHECK-LABEL: test_mm256_movemask_pd - // CHECK: call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %{{.*}}) + // CHECK: bitcast <4 x double> %1 to <4 x i64> + // CHECK: icmp slt <4 x i64> %2, zeroinitializer + // CHECK: bitcast <4 x i1> %3 to i4 + // CHECK: zext i4 %4 to i32 return _mm256_movemask_pd(A); } int test_mm256_movemask_ps(__m256 A) { // CHECK-LABEL: test_mm256_movemask_ps - // CHECK: call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %{{.*}}) + // CHECK: load <8 x float>, <8 x float>* %__a.addr.i, align 32 + // CHECK: bitcast <8 x float> %1 to <8 x i32> + // CHECK: icmp slt <8 x i32> %2, zeroinitializer + // CHECK: bitcast <8 x i1> %3 to i8 + // CHECK: zext i8 %4 to i32 return _mm256_movemask_ps(A); } Index: test/CodeGen/avx2-builtins.c =================================================================== --- test/CodeGen/avx2-builtins.c +++ test/CodeGen/avx2-builtins.c @@ -823,7 +823,9 @@ int test_mm256_movemask_epi8(__m256i a) { // CHECK-LABEL: test_mm256_movemask_epi8 - // CHECK: call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> %{{.*}}) + // CHECK: bitcast <4 x i64> %1 to <32 x i8> + // CHECK: icmp slt <32 x i8> %2, zeroinitializer + // CHECK: bitcast <32 x i1> %3 to i32 return _mm256_movemask_epi8(a); } Index: test/CodeGen/sse-builtins.c =================================================================== --- test/CodeGen/sse-builtins.c +++ test/CodeGen/sse-builtins.c @@ -468,7 +468,10 @@ int test_mm_movemask_ps(__m128 A) { // CHECK-LABEL: test_mm_movemask_ps - // CHECK: call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %{{.*}}) + //CHECK: bitcast <4 x float> %1 to <4 x i32> + //CHECK: icmp slt <4 x i32> %2, zeroinitializer + //CHECK: bitcast <4 x i1> %3 to i4 + //CHECK: zext i4 %4 to i32 return _mm_movemask_ps(A); } Index: test/CodeGen/sse2-builtins.c =================================================================== --- test/CodeGen/sse2-builtins.c +++ test/CodeGen/sse2-builtins.c @@ -804,13 +804,19 @@ int test_mm_movemask_epi8(__m128i A) { // CHECK-LABEL: test_mm_movemask_epi8 - // CHECK: call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %{{.*}}) + // CHECK: bitcast <2 x i64> %1 to <16 x i8> + // CHECK: icmp slt <16 x i8> %2, zeroinitializer + // CHECK: bitcast <16 x i1> %3 to i16 + // CHECK: zext i16 %4 to i32 return _mm_movemask_epi8(A); } int test_mm_movemask_pd(__m128d A) { // CHECK-LABEL: test_mm_movemask_pd - // CHECK: call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %{{.*}}) + // CHECK: bitcast <2 x double> %1 to <2 x i64> + // CHECK: icmp slt <2 x i64> %2, zeroinitializer + // CHECK: bitcast <2 x i1> %3 to i2 + // CHECK: zext i2 %4 to i32 return _mm_movemask_pd(A); }