Index: cfe/trunk/lib/CodeGen/CGBuiltin.cpp =================================================================== --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp @@ -6419,6 +6419,36 @@ Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result)); } + // These exist so that the builtin that takes an immediate can be bounds + // checked by clang to avoid passing bad immediates to the backend. Since + // AVX has a larger immediate than SSE we would need separate builtins to + // do the different bounds checking. Rather than create a clang specific + // SSE only builtin, this implements eight separate builtins to match gcc + // implementation. + auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) { + Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm)); + llvm::Function *F = CGM.getIntrinsic(ID); + return Builder.CreateCall(F, Ops); + }; + + // For the vector forms of FP comparisons, translate the builtins directly to + // IR. + // TODO: The builtins could be removed if the SSE header files used vector + // extension comparisons directly (vector ordered/unordered may need + // additional support via __builtin_isnan()). + llvm::VectorType *V2F64 = + llvm::VectorType::get(llvm::Type::getDoubleTy(getLLVMContext()), 2); + llvm::VectorType *V4F32 = + llvm::VectorType::get(llvm::Type::getFloatTy(getLLVMContext()), 4); + + auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred, + llvm::VectorType *FPVecTy) { + Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]); + llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy); + Value *Sext = Builder.CreateSExt(Cmp, IntVecTy); + return Builder.CreateBitCast(Sext, FPVecTy); + }; + switch (BuiltinID) { default: return nullptr; case X86::BI__builtin_cpu_supports: { @@ -6857,154 +6887,74 @@ Ops[0]); return Builder.CreateExtractValue(Call, 1); } - // SSE comparison intrisics + + // SSE packed comparison intrinsics case X86::BI__builtin_ia32_cmpeqps: + return getVectorFCmpIR(CmpInst::FCMP_OEQ, V4F32); case X86::BI__builtin_ia32_cmpltps: + return getVectorFCmpIR(CmpInst::FCMP_OLT, V4F32); case X86::BI__builtin_ia32_cmpleps: + return getVectorFCmpIR(CmpInst::FCMP_OLE, V4F32); case X86::BI__builtin_ia32_cmpunordps: + return getVectorFCmpIR(CmpInst::FCMP_UNO, V4F32); case X86::BI__builtin_ia32_cmpneqps: + return getVectorFCmpIR(CmpInst::FCMP_UNE, V4F32); case X86::BI__builtin_ia32_cmpnltps: + return getVectorFCmpIR(CmpInst::FCMP_UGE, V4F32); case X86::BI__builtin_ia32_cmpnleps: + return getVectorFCmpIR(CmpInst::FCMP_UGT, V4F32); case X86::BI__builtin_ia32_cmpordps: - case X86::BI__builtin_ia32_cmpeqss: - case X86::BI__builtin_ia32_cmpltss: - case X86::BI__builtin_ia32_cmpless: - case X86::BI__builtin_ia32_cmpunordss: - case X86::BI__builtin_ia32_cmpneqss: - case X86::BI__builtin_ia32_cmpnltss: - case X86::BI__builtin_ia32_cmpnless: - case X86::BI__builtin_ia32_cmpordss: + return getVectorFCmpIR(CmpInst::FCMP_ORD, V4F32); case X86::BI__builtin_ia32_cmpeqpd: + return getVectorFCmpIR(CmpInst::FCMP_OEQ, V2F64); case X86::BI__builtin_ia32_cmpltpd: + return getVectorFCmpIR(CmpInst::FCMP_OLT, V2F64); case X86::BI__builtin_ia32_cmplepd: + return getVectorFCmpIR(CmpInst::FCMP_OLE, V2F64); case X86::BI__builtin_ia32_cmpunordpd: + return getVectorFCmpIR(CmpInst::FCMP_UNO, V2F64); case X86::BI__builtin_ia32_cmpneqpd: + return getVectorFCmpIR(CmpInst::FCMP_UNE, V2F64); case X86::BI__builtin_ia32_cmpnltpd: + return getVectorFCmpIR(CmpInst::FCMP_UGE, V2F64); case X86::BI__builtin_ia32_cmpnlepd: + return getVectorFCmpIR(CmpInst::FCMP_UGT, V2F64); case X86::BI__builtin_ia32_cmpordpd: + return getVectorFCmpIR(CmpInst::FCMP_ORD, V2F64); + + // SSE scalar comparison intrinsics + case X86::BI__builtin_ia32_cmpeqss: + return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0); + case X86::BI__builtin_ia32_cmpltss: + return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1); + case X86::BI__builtin_ia32_cmpless: + return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2); + case X86::BI__builtin_ia32_cmpunordss: + return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3); + case X86::BI__builtin_ia32_cmpneqss: + return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4); + case X86::BI__builtin_ia32_cmpnltss: + return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5); + case X86::BI__builtin_ia32_cmpnless: + return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6); + case X86::BI__builtin_ia32_cmpordss: + return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7); case X86::BI__builtin_ia32_cmpeqsd: + return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0); case X86::BI__builtin_ia32_cmpltsd: + return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1); case X86::BI__builtin_ia32_cmplesd: + return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2); case X86::BI__builtin_ia32_cmpunordsd: + return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3); case X86::BI__builtin_ia32_cmpneqsd: + return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4); case X86::BI__builtin_ia32_cmpnltsd: + return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5); case X86::BI__builtin_ia32_cmpnlesd: + return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6); case X86::BI__builtin_ia32_cmpordsd: - // These exist so that the builtin that takes an immediate can be bounds - // checked by clang to avoid passing bad immediates to the backend. Since - // AVX has a larger immediate than SSE we would need separate builtins to - // do the different bounds checking. Rather than create a clang specific - // SSE only builtin, this implements eight separate builtins to match gcc - // implementation. - - // Choose the immediate. - unsigned Imm; - switch (BuiltinID) { - default: llvm_unreachable("Unsupported intrinsic!"); - case X86::BI__builtin_ia32_cmpeqps: - case X86::BI__builtin_ia32_cmpeqss: - case X86::BI__builtin_ia32_cmpeqpd: - case X86::BI__builtin_ia32_cmpeqsd: - Imm = 0; - break; - case X86::BI__builtin_ia32_cmpltps: - case X86::BI__builtin_ia32_cmpltss: - case X86::BI__builtin_ia32_cmpltpd: - case X86::BI__builtin_ia32_cmpltsd: - Imm = 1; - break; - case X86::BI__builtin_ia32_cmpleps: - case X86::BI__builtin_ia32_cmpless: - case X86::BI__builtin_ia32_cmplepd: - case X86::BI__builtin_ia32_cmplesd: - Imm = 2; - break; - case X86::BI__builtin_ia32_cmpunordps: - case X86::BI__builtin_ia32_cmpunordss: - case X86::BI__builtin_ia32_cmpunordpd: - case X86::BI__builtin_ia32_cmpunordsd: - Imm = 3; - break; - case X86::BI__builtin_ia32_cmpneqps: - case X86::BI__builtin_ia32_cmpneqss: - case X86::BI__builtin_ia32_cmpneqpd: - case X86::BI__builtin_ia32_cmpneqsd: - Imm = 4; - break; - case X86::BI__builtin_ia32_cmpnltps: - case X86::BI__builtin_ia32_cmpnltss: - case X86::BI__builtin_ia32_cmpnltpd: - case X86::BI__builtin_ia32_cmpnltsd: - Imm = 5; - break; - case X86::BI__builtin_ia32_cmpnleps: - case X86::BI__builtin_ia32_cmpnless: - case X86::BI__builtin_ia32_cmpnlepd: - case X86::BI__builtin_ia32_cmpnlesd: - Imm = 6; - break; - case X86::BI__builtin_ia32_cmpordps: - case X86::BI__builtin_ia32_cmpordss: - case X86::BI__builtin_ia32_cmpordpd: - case X86::BI__builtin_ia32_cmpordsd: - Imm = 7; - break; - } - - // Choose the intrinsic ID. - const char *name; - Intrinsic::ID ID; - switch (BuiltinID) { - default: llvm_unreachable("Unsupported intrinsic!"); - case X86::BI__builtin_ia32_cmpeqps: - case X86::BI__builtin_ia32_cmpltps: - case X86::BI__builtin_ia32_cmpleps: - case X86::BI__builtin_ia32_cmpunordps: - case X86::BI__builtin_ia32_cmpneqps: - case X86::BI__builtin_ia32_cmpnltps: - case X86::BI__builtin_ia32_cmpnleps: - case X86::BI__builtin_ia32_cmpordps: - name = "cmpps"; - ID = Intrinsic::x86_sse_cmp_ps; - break; - case X86::BI__builtin_ia32_cmpeqss: - case X86::BI__builtin_ia32_cmpltss: - case X86::BI__builtin_ia32_cmpless: - case X86::BI__builtin_ia32_cmpunordss: - case X86::BI__builtin_ia32_cmpneqss: - case X86::BI__builtin_ia32_cmpnltss: - case X86::BI__builtin_ia32_cmpnless: - case X86::BI__builtin_ia32_cmpordss: - name = "cmpss"; - ID = Intrinsic::x86_sse_cmp_ss; - break; - case X86::BI__builtin_ia32_cmpeqpd: - case X86::BI__builtin_ia32_cmpltpd: - case X86::BI__builtin_ia32_cmplepd: - case X86::BI__builtin_ia32_cmpunordpd: - case X86::BI__builtin_ia32_cmpneqpd: - case X86::BI__builtin_ia32_cmpnltpd: - case X86::BI__builtin_ia32_cmpnlepd: - case X86::BI__builtin_ia32_cmpordpd: - name = "cmppd"; - ID = Intrinsic::x86_sse2_cmp_pd; - break; - case X86::BI__builtin_ia32_cmpeqsd: - case X86::BI__builtin_ia32_cmpltsd: - case X86::BI__builtin_ia32_cmplesd: - case X86::BI__builtin_ia32_cmpunordsd: - case X86::BI__builtin_ia32_cmpneqsd: - case X86::BI__builtin_ia32_cmpnltsd: - case X86::BI__builtin_ia32_cmpnlesd: - case X86::BI__builtin_ia32_cmpordsd: - name = "cmpsd"; - ID = Intrinsic::x86_sse2_cmp_sd; - break; - } - - Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm)); - llvm::Function *F = CGM.getIntrinsic(ID); - return Builder.CreateCall(F, Ops, name); + return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7); } } Index: cfe/trunk/test/CodeGen/avx2-builtins.c =================================================================== --- cfe/trunk/test/CodeGen/avx2-builtins.c +++ cfe/trunk/test/CodeGen/avx2-builtins.c @@ -473,7 +473,9 @@ __m128d test_mm_i32gather_pd(double const *b, __m128i c) { // CHECK-LABEL: test_mm_i32gather_pd - // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 0) + // CHECK: [[CMP:%.*]] = fcmp oeq <2 x double> + // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> + // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double> // CHECK: call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> undef, i8* %{{.*}}, <4 x i32> %{{.*}}, <2 x double> %{{.*}}, i8 2) return _mm_i32gather_pd(b, c, 2); } @@ -499,7 +501,9 @@ __m128 test_mm_i32gather_ps(float const *b, __m128i c) { // CHECK-LABEL: test_mm_i32gather_ps - // CHECK: call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 0) + // CHECK: [[CMP:%.*]] = fcmp oeq <4 x float> + // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> + // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> // CHECK: call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> undef, i8* %{{.*}}, <4 x i32> %{{.*}}, <4 x float> %{{.*}}, i8 2) return _mm_i32gather_ps(b, c, 2); } @@ -573,7 +577,9 @@ __m128d test_mm_i64gather_pd(double const *b, __m128i c) { // CHECK-LABEL: test_mm_i64gather_pd - // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 0) + // CHECK: [[CMP:%.*]] = fcmp oeq <2 x double> + // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> + // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double> // CHECK: call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> undef, i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x double> %{{.*}}, i8 2) return _mm_i64gather_pd(b, c, 2); } @@ -599,7 +605,9 @@ __m128 test_mm_i64gather_ps(float const *b, __m128i c) { // CHECK-LABEL: test_mm_i64gather_ps - // CHECK: call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 0) + // CHECK: [[CMP:%.*]] = fcmp oeq <4 x float> + // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> + // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> // CHECK: call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> undef, i8* %{{.*}}, <2 x i64> %{{.*}}, <4 x float> %{{.*}}, i8 2) return _mm_i64gather_ps(b, c, 2); } @@ -612,7 +620,9 @@ __m128 test_mm256_i64gather_ps(float const *b, __m256i c) { // CHECK-LABEL: test_mm256_i64gather_ps - // CHECK: call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 0) + // CHECK: [[CMP:%.*]] = fcmp oeq <4 x float> + // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> + // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> // CHECK: call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> undef, i8* %{{.*}}, <4 x i64> %{{.*}}, <4 x float> %{{.*}}, i8 2) return _mm256_i64gather_ps(b, c, 2); } Index: cfe/trunk/test/CodeGen/sse-builtins.c =================================================================== --- cfe/trunk/test/CodeGen/sse-builtins.c +++ cfe/trunk/test/CodeGen/sse-builtins.c @@ -37,7 +37,10 @@ __m128 test_mm_cmpeq_ps(__m128 __a, __m128 __b) { // CHECK-LABEL: @test_mm_cmpeq_ps - // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 0) + // CHECK: [[CMP:%.*]] = fcmp oeq <4 x float> + // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> + // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> + // CHECK-NEXT: ret <4 x float> [[BC]] return _mm_cmpeq_ps(__a, __b); } @@ -49,7 +52,10 @@ __m128 test_mm_cmpge_ps(__m128 __a, __m128 __b) { // CHECK-LABEL: @test_mm_cmpge_ps - // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 2) + // CHECK: [[CMP:%.*]] = fcmp ole <4 x float> + // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> + // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> + // CHECK-NEXT: ret <4 x float> [[BC]] return _mm_cmpge_ps(__a, __b); } @@ -62,7 +68,10 @@ __m128 test_mm_cmpgt_ps(__m128 __a, __m128 __b) { // CHECK-LABEL: @test_mm_cmpgt_ps - // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 1) + // CHECK: [[CMP:%.*]] = fcmp olt <4 x float> + // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> + // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> + // CHECK-NEXT: ret <4 x float> [[BC]] return _mm_cmpgt_ps(__a, __b); } @@ -75,7 +84,10 @@ __m128 test_mm_cmple_ps(__m128 __a, __m128 __b) { // CHECK-LABEL: @test_mm_cmple_ps - // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 2) + // CHECK: [[CMP:%.*]] = fcmp ole <4 x float> + // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> + // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> + // CHECK-NEXT: ret <4 x float> [[BC]] return _mm_cmple_ps(__a, __b); } @@ -87,7 +99,10 @@ __m128 test_mm_cmplt_ps(__m128 __a, __m128 __b) { // CHECK-LABEL: @test_mm_cmplt_ps - // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 1) + // CHECK: [[CMP:%.*]] = fcmp olt <4 x float> + // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> + // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> + // CHECK-NEXT: ret <4 x float> [[BC]] return _mm_cmplt_ps(__a, __b); } @@ -99,7 +114,10 @@ __m128 test_mm_cmpneq_ps(__m128 __a, __m128 __b) { // CHECK-LABEL: @test_mm_cmpneq_ps - // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 4) + // CHECK: [[CMP:%.*]] = fcmp une <4 x float> + // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> + // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> + // CHECK-NEXT: ret <4 x float> [[BC]] return _mm_cmpneq_ps(__a, __b); } @@ -111,7 +129,10 @@ __m128 test_mm_cmpnge_ps(__m128 __a, __m128 __b) { // CHECK-LABEL: @test_mm_cmpnge_ps - // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 6) + // CHECK: [[CMP:%.*]] = fcmp ugt <4 x float> + // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> + // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> + // CHECK-NEXT: ret <4 x float> [[BC]] return _mm_cmpnge_ps(__a, __b); } @@ -124,7 +145,10 @@ __m128 test_mm_cmpngt_ps(__m128 __a, __m128 __b) { // CHECK-LABEL: @test_mm_cmpngt_ps - // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 5) + // CHECK: [[CMP:%.*]] = fcmp uge <4 x float> + // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> + // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> + // CHECK-NEXT: ret <4 x float> [[BC]] return _mm_cmpngt_ps(__a, __b); } @@ -137,7 +161,10 @@ __m128 test_mm_cmpnle_ps(__m128 __a, __m128 __b) { // CHECK-LABEL: @test_mm_cmpnle_ps - // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 6) + // CHECK: [[CMP:%.*]] = fcmp ugt <4 x float> + // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> + // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> + // CHECK-NEXT: ret <4 x float> [[BC]] return _mm_cmpnle_ps(__a, __b); } @@ -149,7 +176,10 @@ __m128 test_mm_cmpnlt_ps(__m128 __a, __m128 __b) { // CHECK-LABEL: @test_mm_cmpnlt_ps - // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 5) + // CHECK: [[CMP:%.*]] = fcmp uge <4 x float> + // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> + // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> + // CHECK-NEXT: ret <4 x float> [[BC]] return _mm_cmpnlt_ps(__a, __b); } @@ -161,7 +191,10 @@ __m128 test_mm_cmpord_ps(__m128 __a, __m128 __b) { // CHECK-LABEL: @test_mm_cmpord_ps - // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 7) + // CHECK: [[CMP:%.*]] = fcmp ord <4 x float> + // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> + // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> + // CHECK-NEXT: ret <4 x float> [[BC]] return _mm_cmpord_ps(__a, __b); } @@ -173,7 +206,10 @@ __m128 test_mm_cmpunord_ps(__m128 __a, __m128 __b) { // CHECK-LABEL: @test_mm_cmpunord_ps - // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 3) + // CHECK: [[CMP:%.*]] = fcmp uno <4 x float> + // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> + // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> + // CHECK-NEXT: ret <4 x float> [[BC]] return _mm_cmpunord_ps(__a, __b); } Index: cfe/trunk/test/CodeGen/sse2-builtins.c =================================================================== --- cfe/trunk/test/CodeGen/sse2-builtins.c +++ cfe/trunk/test/CodeGen/sse2-builtins.c @@ -183,7 +183,10 @@ __m128d test_mm_cmpeq_pd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_cmpeq_pd - // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 0) + // CHECK: [[CMP:%.*]] = fcmp oeq <2 x double> + // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> + // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double> + // CHECK-NEXT: ret <2 x double> [[BC]] return _mm_cmpeq_pd(A, B); } @@ -195,7 +198,10 @@ __m128d test_mm_cmpge_pd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_cmpge_pd - // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2) + // CHECK: [[CMP:%.*]] = fcmp ole <2 x double> + // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> + // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double> + // CHECK-NEXT: ret <2 x double> [[BC]] return _mm_cmpge_pd(A, B); } @@ -229,7 +235,10 @@ __m128d test_mm_cmpgt_pd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_cmpgt_pd - // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1) + // CHECK: [[CMP:%.*]] = fcmp olt <2 x double> + // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> + // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double> + // CHECK-NEXT: ret <2 x double> [[BC]] return _mm_cmpgt_pd(A, B); } @@ -245,7 +254,10 @@ __m128d test_mm_cmple_pd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_cmple_pd - // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2) + // CHECK: [[CMP:%.*]] = fcmp ole <2 x double> + // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> + // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double> + // CHECK-NEXT: ret <2 x double> [[BC]] return _mm_cmple_pd(A, B); } @@ -275,7 +287,10 @@ __m128d test_mm_cmplt_pd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_cmplt_pd - // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1) + // CHECK: [[CMP:%.*]] = fcmp olt <2 x double> + // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> + // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double> + // CHECK-NEXT: ret <2 x double> [[BC]] return _mm_cmplt_pd(A, B); } @@ -287,7 +302,10 @@ __m128d test_mm_cmpneq_pd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_cmpneq_pd - // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 4) + // CHECK: [[CMP:%.*]] = fcmp une <2 x double> + // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> + // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double> + // CHECK-NEXT: ret <2 x double> [[BC]] return _mm_cmpneq_pd(A, B); } @@ -299,7 +317,10 @@ __m128d test_mm_cmpnge_pd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_cmpnge_pd - // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6) + // CHECK: [[CMP:%.*]] = fcmp ugt <2 x double> + // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> + // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double> + // CHECK-NEXT: ret <2 x double> [[BC]] return _mm_cmpnge_pd(A, B); } @@ -315,7 +336,10 @@ __m128d test_mm_cmpngt_pd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_cmpngt_pd - // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5) + // CHECK: [[CMP:%.*]] = fcmp uge <2 x double> + // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> + // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double> + // CHECK-NEXT: ret <2 x double> [[BC]] return _mm_cmpngt_pd(A, B); } @@ -331,7 +355,10 @@ __m128d test_mm_cmpnle_pd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_cmpnle_pd - // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6) + // CHECK: [[CMP:%.*]] = fcmp ugt <2 x double> + // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> + // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double> + // CHECK-NEXT: ret <2 x double> [[BC]] return _mm_cmpnle_pd(A, B); } @@ -343,7 +370,10 @@ __m128d test_mm_cmpnlt_pd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_cmpnlt_pd - // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5) + // CHECK: [[CMP:%.*]] = fcmp uge <2 x double> + // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> + // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double> + // CHECK-NEXT: ret <2 x double> [[BC]] return _mm_cmpnlt_pd(A, B); } @@ -355,7 +385,10 @@ __m128d test_mm_cmpord_pd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_cmpord_pd - // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 7) + // CHECK: [[CMP:%.*]] = fcmp ord <2 x double> + // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> + // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double> + // CHECK-NEXT: ret <2 x double> [[BC]] return _mm_cmpord_pd(A, B); } @@ -367,7 +400,10 @@ __m128d test_mm_cmpunord_pd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_cmpunord_pd - // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 3) + // CHECK: [[CMP:%.*]] = fcmp uno <2 x double> + // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> + // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double> + // CHECK-NEXT: ret <2 x double> [[BC]] return _mm_cmpunord_pd(A, B); }