Index: clang/lib/CodeGen/CGBuiltin.cpp =================================================================== --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -7282,7 +7282,10 @@ Op = Builder.CreateBitCast(Op, OTy); if (OTy->getScalarType()->isFloatingPointTy()) { - Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy)); + if (Fp == CmpInst::FCMP_OEQ) + Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy)); + else + Op = Builder.CreateFCmpS(Fp, Op, Constant::getNullValue(OTy)); } else { Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy)); } @@ -10275,7 +10278,10 @@ Ops.push_back(EmitScalarExpr(E->getArg(1))); Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); - Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); + if (P == llvm::FCmpInst::FCMP_OEQ) + Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); + else + Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]); return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd"); } case NEON::BI__builtin_neon_vceqs_f32: @@ -10295,7 +10301,10 @@ Ops.push_back(EmitScalarExpr(E->getArg(1))); Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy); Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy); - Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); + if (P == llvm::FCmpInst::FCMP_OEQ) + Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); + else + Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]); return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd"); } case NEON::BI__builtin_neon_vceqh_f16: @@ -10315,7 +10324,10 @@ Ops.push_back(EmitScalarExpr(E->getArg(1))); Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy); Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy); - Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); + if (P == llvm::FCmpInst::FCMP_OEQ) + Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); + else + Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]); return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd"); } case NEON::BI__builtin_neon_vceqd_s64: Index: clang/test/CodeGen/aarch64-neon-intrinsics-constrained.c =================================================================== --- clang/test/CodeGen/aarch64-neon-intrinsics-constrained.c +++ clang/test/CodeGen/aarch64-neon-intrinsics-constrained.c @@ -1,27 +1,24 @@ // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \ // RUN: -fallow-half-arguments-and-returns -S -disable-O0-optnone \ // RUN: -flax-vector-conversions=none -emit-llvm -o - %s | opt -S -mem2reg \ -// RUN: | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=UNCONSTRAINED %s +// RUN: | FileCheck --check-prefixes=COMMON,COMMONIR,UNCONSTRAINED %s // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \ // RUN: -fallow-half-arguments-and-returns -S -disable-O0-optnone \ -// RUN: -ffp-exception-behavior=strict \ +// RUN: -ffp-exception-behavior=strict -fexperimental-strict-floating-point \ // RUN: -flax-vector-conversions=none -emit-llvm -o - %s | opt -S -mem2reg \ -// RUN: | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=CONSTRAINED %s +// RUN: | FileCheck --check-prefixes=COMMON,COMMONIR,CONSTRAINED %s // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \ // RUN: -fallow-half-arguments-and-returns -S -disable-O0-optnone \ // RUN: -flax-vector-conversions=none -o - %s \ -// RUN: | FileCheck --check-prefix=COMMON --check-prefix=CHECK-ASM %s +// RUN: | FileCheck --check-prefixes=COMMON,CHECK-ASM,CHECK-ASM-UNCONSTRAINED %s // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \ // RUN: -fallow-half-arguments-and-returns -S -disable-O0-optnone \ -// RUN: -ffp-exception-behavior=strict \ +// RUN: -ffp-exception-behavior=strict -fexperimental-strict-floating-point \ // RUN: -flax-vector-conversions=none -o - %s \ -// RUN: | FileCheck --check-prefix=COMMON --check-prefix=CHECK-ASM %s +// RUN: | FileCheck --check-prefixes=COMMON,CHECK-ASM,CHECK-ASM-CONSTRAINED %s // REQUIRES: aarch64-registered-target -// Fails during instruction selection: -// XFAIL: * - // Test new aarch64 intrinsics and types but constrained #include @@ -278,7 +275,9 @@ // COMMON-LABEL: test_vceq_f32 // UNCONSTRAINED: [[CMP_I:%.*]] = fcmp oeq <2 x float> %v1, %v2 // CONSTRAINED: [[CMP_I:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f32(<2 x float> %v1, <2 x float> %v2, metadata !"oeq", metadata !"fpexcept.strict") -// CHECK-ASM: fcmeq v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s +// CHECK-ASM-UNCONSTRAINED: fcmeq v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s +// CHECK-ASM-CONSTRAINED: fcmp s{{[0-9]+}}, s{{[0-9]+}} +// CHECK-ASM-CONSTRAINED: fcmp s{{[0-9]+}}, s{{[0-9]+}} // COMMONIR: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> // COMMONIR: ret <2 x i32> [[SEXT_I]] uint32x2_t test_vceq_f32(float32x2_t v1, float32x2_t v2) { @@ -299,7 +298,11 @@ // COMMON-LABEL: test_vceqq_f32 // UNCONSTRAINED: [[CMP_I:%.*]] = fcmp oeq <4 x float> %v1, %v2 // CONSTRAINED: [[CMP_I:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %v1, <4 x float> %v2, metadata !"oeq", metadata !"fpexcept.strict") -// CHECK-ASM: fcmeq v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s +// CHECK-ASM-UNCONSTRAINED: fcmeq v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s +// CHECK-ASM-CONSTRAINED: fcmp s{{[0-9]+}}, s{{[0-9]+}} +// CHECK-ASM-CONSTRAINED: fcmp s{{[0-9]+}}, s{{[0-9]+}} +// CHECK-ASM-CONSTRAINED: fcmp s{{[0-9]+}}, s{{[0-9]+}} +// CHECK-ASM-CONSTRAINED: fcmp s{{[0-9]+}}, s{{[0-9]+}} // COMMONIR: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> // COMMONIR: ret <4 x i32> [[SEXT_I]] uint32x4_t test_vceqq_f32(float32x4_t v1, float32x4_t v2) { @@ -309,7 +312,9 @@ // COMMON-LABEL: test_vceqq_f64 // UNCONSTRAINED: [[CMP_I:%.*]] = fcmp oeq <2 x double> %v1, %v2 // CONSTRAINED: [[CMP_I:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %v1, <2 x double> %v2, metadata !"oeq", metadata !"fpexcept.strict") -// CHECK-ASM: fcmeq v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d +// CHECK-ASM-UNCONSTRAINED: fcmeq v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d +// CHECK-ASM-CONSTRAINED: fcmp d{{[0-9]+}}, d{{[0-9]+}} +// CHECK-ASM-CONSTRAINED: fcmp d{{[0-9]+}}, d{{[0-9]+}} // COMMONIR: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> // COMMONIR: ret <2 x i64> [[SEXT_I]] uint64x2_t test_vceqq_f64(float64x2_t v1, float64x2_t v2) { @@ -319,7 +324,9 @@ // COMMON-LABEL: test_vcge_f32 // UNCONSTRAINED: [[CMP_I:%.*]] = fcmp oge <2 x float> %v1, %v2 // CONSTRAINED: [[CMP_I:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f32(<2 x float> %v1, <2 x float> %v2, metadata !"oge", metadata !"fpexcept.strict") -// CHECK-ASM: fcmge v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s +// CHECK-ASM-UNCONSTRAINED: fcmge v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s +// CHECK-ASM-CONSTRAINED: fcmpe s{{[0-9]+}}, s{{[0-9]+}} +// CHECK-ASM-CONSTRAINED: fcmpe s{{[0-9]+}}, s{{[0-9]+}} // COMMONIR: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> // COMMONIR: ret <2 x i32> [[SEXT_I]] uint32x2_t test_vcge_f32(float32x2_t v1, float32x2_t v2) { @@ -329,7 +336,8 @@ // COMMON-LABEL: test_vcge_f64 // UNCONSTRAINED: [[CMP_I:%.*]] = fcmp oge <1 x double> %a, %b // CONSTRAINED: [[CMP_I:%.*]] = call <1 x i1> @llvm.experimental.constrained.fcmps.v1f64(<1 x double> %a, <1 x double> %b, metadata !"oge", metadata !"fpexcept.strict") -// CHECK-ASM: fcmp d{{[0-9]+}}, d{{[0-9]+}} +// CHECK-ASM-UNCONSTRAINED: fcmp d{{[0-9]+}}, d{{[0-9]+}} +// CHECK-ASM-CONSTRAINED: fcmpe d{{[0-9]+}}, d{{[0-9]+}} // CHECK-ASM-NEXT:cset {{w[0-9]+}}, ge // COMMONIR: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> // COMMONIR: ret <1 x i64> [[SEXT_I]] @@ -340,7 +348,11 @@ // COMMON-LABEL: test_vcgeq_f32 // UNCONSTRAINED: [[CMP_I:%.*]] = fcmp oge <4 x float> %v1, %v2 // CONSTRAINED: [[CMP_I:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %v1, <4 x float> %v2, metadata !"oge", metadata !"fpexcept.strict") -// CHECK-ASM: fcmge v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s +// CHECK-ASM-UNCONSTRAINED: fcmge v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s +// CHECK-ASM-CONSTRAINED: fcmpe s{{[0-9]+}}, s{{[0-9]+}} +// CHECK-ASM-CONSTRAINED: fcmpe s{{[0-9]+}}, s{{[0-9]+}} +// CHECK-ASM-CONSTRAINED: fcmpe s{{[0-9]+}}, s{{[0-9]+}} +// CHECK-ASM-CONSTRAINED: fcmpe s{{[0-9]+}}, s{{[0-9]+}} // COMMONIR: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> // COMMONIR: ret <4 x i32> [[SEXT_I]] uint32x4_t test_vcgeq_f32(float32x4_t v1, float32x4_t v2) { @@ -350,7 +362,9 @@ // COMMON-LABEL: test_vcgeq_f64 // UNCONSTRAINED: [[CMP_I:%.*]] = fcmp oge <2 x double> %v1, %v2 // CONSTRAINED: [[CMP_I:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %v1, <2 x double> %v2, metadata !"oge", metadata !"fpexcept.strict") -// CHECK-ASM: fcmge v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d +// CHECK-ASM-UNCONSTRAINED: fcmge v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d +// CHECK-ASM-CONSTRAINED: fcmpe d{{[0-9]+}}, d{{[0-9]+}} +// CHECK-ASM-CONSTRAINED: fcmpe d{{[0-9]+}}, d{{[0-9]+}} // COMMONIR: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> // COMMONIR: ret <2 x i64> [[SEXT_I]] uint64x2_t test_vcgeq_f64(float64x2_t v1, float64x2_t v2) { @@ -360,7 +374,9 @@ // COMMON-LABEL: test_vcle_f32 // UNCONSTRAINED: [[CMP_I:%.*]] = fcmp ole <2 x float> %v1, %v2 // CONSTRAINED: [[CMP_I:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f32(<2 x float> %v1, <2 x float> %v2, metadata !"ole", metadata !"fpexcept.strict") -// CHECK-ASM: fcmge v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s +// CHECK-ASM-UNCONSTRAINED: fcmge v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s +// CHECK-ASM-CONSTRAINED: fcmpe s{{[0-9]+}}, s{{[0-9]+}} +// CHECK-ASM-CONSTRAINED: fcmpe s{{[0-9]+}}, s{{[0-9]+}} // COMMONIR: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> // COMMONIR: ret <2 x i32> [[SEXT_I]] uint32x2_t test_vcle_f32(float32x2_t v1, float32x2_t v2) { @@ -370,7 +386,8 @@ // COMMON-LABEL: test_vcle_f64 // UNCONSTRAINED: [[CMP_I:%.*]] = fcmp ole <1 x double> %a, %b // CONSTRAINED: [[CMP_I:%.*]] = call <1 x i1> @llvm.experimental.constrained.fcmps.v1f64(<1 x double> %a, <1 x double> %b, metadata !"ole", metadata !"fpexcept.strict") -// CHECK-ASM: fcmp d{{[0-9]+}}, d{{[0-9]+}} +// CHECK-ASM-UNCONSTRAINED: fcmp d{{[0-9]+}}, d{{[0-9]+}} +// CHECK-ASM-CONSTRAINED: fcmpe d{{[0-9]+}}, d{{[0-9]+}} // CHECK-ASM-NEXT:cset {{w[0-9]+}}, ls // COMMONIR: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> // COMMONIR: ret <1 x i64> [[SEXT_I]] @@ -381,7 +398,11 @@ // COMMON-LABEL: test_vcleq_f32 // UNCONSTRAINED: [[CMP_I:%.*]] = fcmp ole <4 x float> %v1, %v2 // CONSTRAINED: [[CMP_I:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %v1, <4 x float> %v2, metadata !"ole", metadata !"fpexcept.strict") -// CHECK-ASM: fcmge v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s +// CHECK-ASM-UNCONSTRAINED: fcmge v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s +// CHECK-ASM-CONSTRAINED: fcmpe s{{[0-9]+}}, s{{[0-9]+}} +// CHECK-ASM-CONSTRAINED: fcmpe s{{[0-9]+}}, s{{[0-9]+}} +// CHECK-ASM-CONSTRAINED: fcmpe s{{[0-9]+}}, s{{[0-9]+}} +// CHECK-ASM-CONSTRAINED: fcmpe s{{[0-9]+}}, s{{[0-9]+}} // COMMONIR: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> // COMMONIR: ret <4 x i32> [[SEXT_I]] uint32x4_t test_vcleq_f32(float32x4_t v1, float32x4_t v2) { @@ -391,7 +412,9 @@ // COMMON-LABEL: test_vcleq_f64 // UNCONSTRAINED: [[CMP_I:%.*]] = fcmp ole <2 x double> %v1, %v2 // CONSTRAINED: [[CMP_I:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %v1, <2 x double> %v2, metadata !"ole", metadata !"fpexcept.strict") -// CHECK-ASM: fcmge v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d +// CHECK-ASM-UNCONSTRAINED: fcmge v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d +// CHECK-ASM-CONSTRAINED: fcmpe d{{[0-9]+}}, d{{[0-9]+}} +// CHECK-ASM-CONSTRAINED: fcmpe d{{[0-9]+}}, d{{[0-9]+}} // COMMONIR: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> // COMMONIR: ret <2 x i64> [[SEXT_I]] uint64x2_t test_vcleq_f64(float64x2_t v1, float64x2_t v2) { @@ -401,7 +424,9 @@ // COMMON-LABEL: test_vcgt_f32 // UNCONSTRAINED: [[CMP_I:%.*]] = fcmp ogt <2 x float> %v1, %v2 // CONSTRAINED: [[CMP_I:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f32(<2 x float> %v1, <2 x float> %v2, metadata !"ogt", metadata !"fpexcept.strict") -// CHECK-ASM: fcmgt v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s +// CHECK-ASM-UNCONSTRAINED: fcmgt v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s +// CHECK-ASM-CONSTRAINED: fcmpe s{{[0-9]+}}, s{{[0-9]+}} +// CHECK-ASM-CONSTRAINED: fcmpe s{{[0-9]+}}, s{{[0-9]+}} // COMMONIR: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> // COMMONIR: ret <2 x i32> [[SEXT_I]] uint32x2_t test_vcgt_f32(float32x2_t v1, float32x2_t v2) { @@ -411,7 +436,8 @@ // COMMON-LABEL: test_vcgt_f64 // UNCONSTRAINED: [[CMP_I:%.*]] = fcmp ogt <1 x double> %a, %b // CONSTRAINED: [[CMP_I:%.*]] = call <1 x i1> @llvm.experimental.constrained.fcmps.v1f64(<1 x double> %a, <1 x double> %b, metadata !"ogt", metadata !"fpexcept.strict") -// CHECK-ASM: fcmp d{{[0-9]+}}, d{{[0-9]+}} +// CHECK-ASM-UNCONSTRAINED: fcmp d{{[0-9]+}}, d{{[0-9]+}} +// CHECK-ASM-CONSTRAINED: fcmpe d{{[0-9]+}}, d{{[0-9]+}} // CHECK-ASM-NEXT:cset {{w[0-9]+}}, gt // COMMONIR: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> // COMMONIR: ret <1 x i64> [[SEXT_I]] @@ -422,7 +448,11 @@ // COMMON-LABEL: test_vcgtq_f32 // UNCONSTRAINED: [[CMP_I:%.*]] = fcmp ogt <4 x float> %v1, %v2 // CONSTRAINED: [[CMP_I:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %v1, <4 x float> %v2, metadata !"ogt", metadata !"fpexcept.strict") -// CHECK-ASM: fcmgt v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s +// CHECK-ASM-UNCONSTRAINED: fcmgt v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s +// CHECK-ASM-CONSTRAINED: fcmpe s{{[0-9]+}}, s{{[0-9]+}} +// CHECK-ASM-CONSTRAINED: fcmpe s{{[0-9]+}}, s{{[0-9]+}} +// CHECK-ASM-CONSTRAINED: fcmpe s{{[0-9]+}}, s{{[0-9]+}} +// CHECK-ASM-CONSTRAINED: fcmpe s{{[0-9]+}}, s{{[0-9]+}} // COMMONIR: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> // COMMONIR: ret <4 x i32> [[SEXT_I]] uint32x4_t test_vcgtq_f32(float32x4_t v1, float32x4_t v2) { @@ -432,7 +462,9 @@ // COMMON-LABEL: test_vcgtq_f64 // UNCONSTRAINED: [[CMP_I:%.*]] = fcmp ogt <2 x double> %v1, %v2 // CONSTRAINED: [[CMP_I:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %v1, <2 x double> %v2, metadata !"ogt", metadata !"fpexcept.strict") -// CHECK-ASM: fcmgt v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d +// CHECK-ASM-UNCONSTRAINED: fcmgt v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d +// CHECK-ASM-CONSTRAINED: fcmpe d{{[0-9]+}}, d{{[0-9]+}} +// CHECK-ASM-CONSTRAINED: fcmpe d{{[0-9]+}}, d{{[0-9]+}} // COMMONIR: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> // COMMONIR: ret <2 x i64> [[SEXT_I]] uint64x2_t test_vcgtq_f64(float64x2_t v1, float64x2_t v2) { @@ -442,7 +474,9 @@ // COMMON-LABEL: test_vclt_f32 // UNCONSTRAINED: [[CMP_I:%.*]] = fcmp olt <2 x float> %v1, %v2 // CONSTRAINED: [[CMP_I:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f32(<2 x float> %v1, <2 x float> %v2, metadata !"olt", metadata !"fpexcept.strict") -// CHECK-ASM: fcmgt v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s +// CHECK-ASM-UNCONSTRAINED: fcmgt v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s +// CHECK-ASM-CONSTRAINED: fcmpe s{{[0-9]+}}, s{{[0-9]+}} +// CHECK-ASM-CONSTRAINED: fcmpe s{{[0-9]+}}, s{{[0-9]+}} // COMMONIR: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> // COMMONIR: ret <2 x i32> [[SEXT_I]] uint32x2_t test_vclt_f32(float32x2_t v1, float32x2_t v2) { @@ -452,7 +486,8 @@ // COMMON-LABEL: test_vclt_f64 // UNCONSTRAINED: [[CMP_I:%.*]] = fcmp olt <1 x double> %a, %b // CONSTRAINED: [[CMP_I:%.*]] = call <1 x i1> @llvm.experimental.constrained.fcmps.v1f64(<1 x double> %a, <1 x double> %b, metadata !"olt", metadata !"fpexcept.strict") -// CHECK-ASM: fcmp d{{[0-9]+}}, d{{[0-9]+}} +// CHECK-ASM-UNCONSTRAINED: fcmp d{{[0-9]+}}, d{{[0-9]+}} +// CHECK-ASM-CONSTRAINED: fcmpe d{{[0-9]+}}, d{{[0-9]+}} // CHECK-ASM-NEXT:cset {{w[0-9]+}}, mi // COMMONIR: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> // COMMONIR: ret <1 x i64> [[SEXT_I]] @@ -463,7 +498,11 @@ // COMMON-LABEL: test_vcltq_f32 // UNCONSTRAINED: [[CMP_I:%.*]] = fcmp olt <4 x float> %v1, %v2 // CONSTRAINED: [[CMP_I:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %v1, <4 x float> %v2, metadata !"olt", metadata !"fpexcept.strict") -// CHECK-ASM: fcmgt v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s +// CHECK-ASM-UNCONSTRAINED: fcmgt v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s +// CHECK-ASM-CONSTRAINED: fcmpe s{{[0-9]+}}, s{{[0-9]+}} +// CHECK-ASM-CONSTRAINED: fcmpe s{{[0-9]+}}, s{{[0-9]+}} +// CHECK-ASM-CONSTRAINED: fcmpe s{{[0-9]+}}, s{{[0-9]+}} +// CHECK-ASM-CONSTRAINED: fcmpe s{{[0-9]+}}, s{{[0-9]+}} // COMMONIR: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> // COMMONIR: ret <4 x i32> [[SEXT_I]] uint32x4_t test_vcltq_f32(float32x4_t v1, float32x4_t v2) { @@ -473,7 +512,9 @@ // COMMON-LABEL: test_vcltq_f64 // UNCONSTRAINED: [[CMP_I:%.*]] = fcmp olt <2 x double> %v1, %v2 // CONSTRAINED: [[CMP_I:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %v1, <2 x double> %v2, metadata !"olt", metadata !"fpexcept.strict") -// CHECK-ASM: fcmgt v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d +// CHECK-ASM-UNCONSTRAINED: fcmgt v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d +// CHECK-ASM-CONSTRAINED: fcmpe d{{[0-9]+}}, d{{[0-9]+}} +// CHECK-ASM-CONSTRAINED: fcmpe d{{[0-9]+}}, d{{[0-9]+}} // COMMONIR: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> // COMMONIR: ret <2 x i64> [[SEXT_I]] uint64x2_t test_vcltq_f64(float64x2_t v1, float64x2_t v2) { @@ -485,7 +526,7 @@ // COMMONIR: [[LANE1_I:%.*]] = extractelement <2 x float> %a, i64 1 // UNCONSTRAINED: [[VPADDD_I:%.*]] = fadd float [[LANE0_I]], [[LANE1_I]] // CONSTRAINED: [[VPADDD_I:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[LANE0_I]], float [[LANE1_I]], metadata !"round.tonearest", metadata !"fpexcept.strict" -// CHECK-ASM: fadd s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} +// CHECK-ASM: faddp s{{[0-9]+}}, v{{[0-9]+}}.2s // COMMONIR: ret float [[VPADDD_I]] float32_t test_vpadds_f32(float32x2_t a) { return vpadds_f32(a); @@ -505,7 +546,7 @@ // COMMON-LABEL: test_vcvts_f32_s32 // UNCONSTRAINED: [[TMP0:%.*]] = sitofp i32 %a to float // CONSTRAINED: [[TMP0:%.*]] = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %a, metadata !"round.tonearest", metadata !"fpexcept.strict") -// CHECK-ASM: scvtf s{{[0-9]+}}, w{{[0-9]+}} +// CHECK-ASM: scvtf s{{[0-9]+}}, {{s|w}}{{[0-9]+}} // COMMONIR: ret float [[TMP0]] float32_t test_vcvts_f32_s32(int32_t a) { return vcvts_f32_s32(a); @@ -514,7 +555,7 @@ // COMMON-LABEL: test_vcvtd_f64_s64 // UNCONSTRAINED: [[TMP0:%.*]] = sitofp i64 %a to double // CONSTRAINED: [[TMP0:%.*]] = call double @llvm.experimental.constrained.sitofp.f64.i64(i64 %a, metadata !"round.tonearest", metadata !"fpexcept.strict") -// CHECK-ASM: scvtf d{{[0-9]}}, x{{[0-9]+}} +// CHECK-ASM: scvtf d{{[0-9]}}, {{d|x}}{{[0-9]+}} // COMMONIR: ret double [[TMP0]] float64_t test_vcvtd_f64_s64(int64_t a) { return vcvtd_f64_s64(a); @@ -523,7 +564,7 @@ // COMMON-LABEL: test_vcvts_f32_u32 // UNCONSTRAINED: [[TMP0:%.*]] = uitofp i32 %a to float // CONSTRAINED: [[TMP0:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 %a, metadata !"round.tonearest", metadata !"fpexcept.strict") -// CHECK-ASM: ucvtf s{{[0-9]+}}, w{{[0-9]+}} +// CHECK-ASM: ucvtf s{{[0-9]+}}, {{s|w}}{{[0-9]+}} // COMMONIR: ret float [[TMP0]] float32_t test_vcvts_f32_u32(uint32_t a) { return vcvts_f32_u32(a); @@ -533,7 +574,7 @@ // COMMON-LABEL: test_vcvtd_f64_u64 // UNCONSTRAINED: [[TMP0:%.*]] = uitofp i64 %a to double // CONSTRAINED: [[TMP0:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i64(i64 %a, metadata !"round.tonearest", metadata !"fpexcept.strict") -// CHECK-ASM: ucvtf d{{[0-9]}}, x{{[0-9]+}} +// CHECK-ASM: ucvtf d{{[0-9]}}, {{d|x}}{{[0-9]+}} // COMMONIR: ret double [[TMP0]] float64_t test_vcvtd_f64_u64(uint64_t a) { return vcvtd_f64_u64(a); @@ -585,8 +626,9 @@ // COMMON-LABEL: test_vcges_f32 // UNCONSTRAINED: [[TMP0:%.*]] = fcmp oge float %a, %b -// CONSTRAINED: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"oge", metadata !"fpexcept.strict") -// CHECK-ASM: fcmp s{{[0-9]+}}, s{{[0-9]+}} +// CONSTRAINED: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"oge", metadata !"fpexcept.strict") +// CHECK-ASM-UNCONSTRAINED: fcmp s{{[0-9]+}}, s{{[0-9]+}} +// CHECK-ASM-CONSTRAINED: fcmpe s{{[0-9]+}}, s{{[0-9]+}} // CHECK-ASM-NEXT:cset {{w[0-9]+}}, ge // COMMONIR: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32 // COMMONIR: ret i32 [[VCMPD_I]] @@ -596,8 +638,9 @@ // COMMON-LABEL: test_vcged_f64 // UNCONSTRAINED: [[TMP0:%.*]] = fcmp oge double %a, %b -// CONSTRAINED: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"oge", metadata !"fpexcept.strict") -// CHECK-ASM: fcmp d{{[0-9]+}}, d{{[0-9]+}} +// CONSTRAINED: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"oge", metadata !"fpexcept.strict") +// CHECK-ASM-UNCONSTRAINED: fcmp d{{[0-9]+}}, d{{[0-9]+}} +// CHECK-ASM-CONSTRAINED: fcmpe d{{[0-9]+}}, d{{[0-9]+}} // CHECK-ASM-NEXT:cset {{w[0-9]+}}, ge // COMMONIR: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64 // COMMONIR: ret i64 [[VCMPD_I]] @@ -607,8 +650,9 @@ // COMMON-LABEL: test_vcgezs_f32 // UNCONSTRAINED: [[TMP0:%.*]] = fcmp oge float %a, 0.000000e+00 -// CONSTRAINED: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float 0.000000e+00, metadata !"oge", metadata !"fpexcept.strict") -// CHECK-ASM: fcmp s{{[0-9]+}}, #0.0 +// CONSTRAINED: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float 0.000000e+00, metadata !"oge", metadata !"fpexcept.strict") +// CHECK-ASM-UNCONSTRAINED: fcmp s{{[0-9]+}}, #0.0 +// CHECK-ASM-CONSTRAINED: fcmpe s{{[0-9]+}}, #0.0 // CHECK-ASM-NEXT:cset {{w[0-9]+}}, ge // COMMONIR: [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i32 // COMMONIR: ret i32 [[VCGEZ_I]] @@ -618,8 +662,9 @@ // COMMON-LABEL: test_vcgezd_f64 // UNCONSTRAINED: [[TMP0:%.*]] = fcmp oge double %a, 0.000000e+00 -// CONSTRAINED: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double 0.000000e+00, metadata !"oge", metadata !"fpexcept.strict") -// CHECK-ASM: fcmp d{{[0-9]+}}, #0.0 +// CONSTRAINED: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double 0.000000e+00, metadata !"oge", metadata !"fpexcept.strict") +// CHECK-ASM-UNCONSTRAINED: fcmp d{{[0-9]+}}, #0.0 +// CHECK-ASM-CONSTRAINED: fcmpe d{{[0-9]+}}, #0.0 // CHECK-ASM-NEXT:cset {{w[0-9]+}}, ge // COMMONIR: [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i64 // COMMONIR: ret i64 [[VCGEZ_I]] @@ -629,8 +674,9 @@ // COMMON-LABEL: test_vcgts_f32 // UNCONSTRAINED: [[TMP0:%.*]] = fcmp ogt float %a, %b -// CONSTRAINED: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ogt", metadata !"fpexcept.strict") -// CHECK-ASM: fcmp s{{[0-9]+}}, s{{[0-9]+}} +// CONSTRAINED: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"ogt", metadata !"fpexcept.strict") +// CHECK-ASM-UNCONSTRAINED: fcmp s{{[0-9]+}}, s{{[0-9]+}} +// CHECK-ASM-CONSTRAINED: fcmpe s{{[0-9]+}}, s{{[0-9]+}} // CHECK-ASM-NEXT:cset {{w[0-9]+}}, gt // COMMONIR: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32 // COMMONIR: ret i32 [[VCMPD_I]] @@ -640,8 +686,9 @@ // COMMON-LABEL: test_vcgtd_f64 // UNCONSTRAINED: [[TMP0:%.*]] = fcmp ogt double %a, %b -// CONSTRAINED: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ogt", metadata !"fpexcept.strict") -// CHECK-ASM: fcmp d{{[0-9]+}}, d{{[0-9]+}} +// CONSTRAINED: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"ogt", metadata !"fpexcept.strict") +// CHECK-ASM-UNCONSTRAINED: fcmp d{{[0-9]+}}, d{{[0-9]+}} +// CHECK-ASM-CONSTRAINED: fcmpe d{{[0-9]+}}, d{{[0-9]+}} // CHECK-ASM-NEXT:cset {{w[0-9]+}}, gt // COMMONIR: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64 // COMMONIR: ret i64 [[VCMPD_I]] @@ -651,8 +698,9 @@ // COMMON-LABEL: test_vcgtzs_f32 // UNCONSTRAINED: [[TMP0:%.*]] = fcmp ogt float %a, 0.000000e+00 -// CONSTRAINED: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float 0.000000e+00, metadata !"ogt", metadata !"fpexcept.strict") -// CHECK-ASM: fcmp s{{[0-9]+}}, #0.0 +// CONSTRAINED: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float 0.000000e+00, metadata !"ogt", metadata !"fpexcept.strict") +// CHECK-ASM-UNCONSTRAINED: fcmp s{{[0-9]+}}, #0.0 +// CHECK-ASM-CONSTRAINED: fcmpe s{{[0-9]+}}, #0.0 // CHECK-ASM-NEXT:cset {{w[0-9]+}}, gt // COMMONIR: [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i32 // COMMONIR: ret i32 [[VCGTZ_I]] @@ -662,8 +710,9 @@ // COMMON-LABEL: test_vcgtzd_f64 // UNCONSTRAINED: [[TMP0:%.*]] = fcmp ogt double %a, 0.000000e+00 -// CONSTRAINED: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double 0.000000e+00, metadata !"ogt", metadata !"fpexcept.strict") -// CHECK-ASM: fcmp d{{[0-9]+}}, #0.0 +// CONSTRAINED: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double 0.000000e+00, metadata !"ogt", metadata !"fpexcept.strict") +// CHECK-ASM-UNCONSTRAINED: fcmp d{{[0-9]+}}, #0.0 +// CHECK-ASM-CONSTRAINED: fcmpe d{{[0-9]+}}, #0.0 // CHECK-ASM-NEXT:cset {{w[0-9]+}}, gt // COMMONIR: [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i64 // COMMONIR: ret i64 [[VCGTZ_I]] @@ -673,8 +722,9 @@ // COMMON-LABEL: test_vcles_f32 // UNCONSTRAINED: [[TMP0:%.*]] = fcmp ole float %a, %b -// CONSTRAINED: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ole", metadata !"fpexcept.strict") -// CHECK-ASM: fcmp s{{[0-9]+}}, s{{[0-9]+}} +// CONSTRAINED: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"ole", metadata !"fpexcept.strict") +// CHECK-ASM-UNCONSTRAINED: fcmp s{{[0-9]+}}, s{{[0-9]+}} +// CHECK-ASM-CONSTRAINED: fcmpe s{{[0-9]+}}, s{{[0-9]+}} // CHECK-ASM-NEXT:cset {{w[0-9]+}}, ls // COMMONIR: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32 // COMMONIR: ret i32 [[VCMPD_I]] @@ -684,8 +734,9 @@ // COMMON-LABEL: test_vcled_f64 // UNCONSTRAINED: [[TMP0:%.*]] = fcmp ole double %a, %b -// CONSTRAINED: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ole", metadata !"fpexcept.strict") -// CHECK-ASM: fcmp d{{[0-9]+}}, d{{[0-9]+}} +// CONSTRAINED: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"ole", metadata !"fpexcept.strict") +// CHECK-ASM-UNCONSTRAINED: fcmp d{{[0-9]+}}, d{{[0-9]+}} +// CHECK-ASM-CONSTRAINED: fcmpe d{{[0-9]+}}, d{{[0-9]+}} // CHECK-ASM-NEXT:cset {{w[0-9]+}}, ls // COMMONIR: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64 // COMMONIR: ret i64 [[VCMPD_I]] @@ -695,8 +746,9 @@ // COMMON-LABEL: test_vclezs_f32 // UNCONSTRAINED: [[TMP0:%.*]] = fcmp ole float %a, 0.000000e+00 -// CONSTRAINED: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float 0.000000e+00, metadata !"ole", metadata !"fpexcept.strict") -// CHECK-ASM: fcmp s{{[0-9]+}}, #0.0 +// CONSTRAINED: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float 0.000000e+00, metadata !"ole", metadata !"fpexcept.strict") +// CHECK-ASM-UNCONSTRAINED: fcmp s{{[0-9]+}}, #0.0 +// CHECK-ASM-CONSTRAINED: fcmpe s{{[0-9]+}}, #0.0 // CHECK-ASM-NEXT:cset {{w[0-9]+}}, ls // COMMONIR: [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i32 // COMMONIR: ret i32 [[VCLEZ_I]] @@ -706,8 +758,9 @@ // COMMON-LABEL: test_vclezd_f64 // UNCONSTRAINED: [[TMP0:%.*]] = fcmp ole double %a, 0.000000e+00 -// CONSTRAINED: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double 0.000000e+00, metadata !"ole", metadata !"fpexcept.strict") -// CHECK-ASM: fcmp d{{[0-9]+}}, #0.0 +// CONSTRAINED: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double 0.000000e+00, metadata !"ole", metadata !"fpexcept.strict") +// CHECK-ASM-UNCONSTRAINED: fcmp d{{[0-9]+}}, #0.0 +// CHECK-ASM-CONSTRAINED: fcmpe d{{[0-9]+}}, #0.0 // CHECK-ASM-NEXT:cset {{w[0-9]+}}, ls // COMMONIR: [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i64 // COMMONIR: ret i64 [[VCLEZ_I]] @@ -717,8 +770,9 @@ // COMMON-LABEL: test_vclts_f32 // UNCONSTRAINED: [[TMP0:%.*]] = fcmp olt float %a, %b -// CONSTRAINED: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"olt", metadata !"fpexcept.strict") -// CHECK-ASM: fcmp s{{[0-9]+}}, s{{[0-9]+}} +// CONSTRAINED: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"olt", metadata !"fpexcept.strict") +// CHECK-ASM-UNCONSTRAINED: fcmp s{{[0-9]+}}, s{{[0-9]+}} +// CHECK-ASM-CONSTRAINED: fcmpe s{{[0-9]+}}, s{{[0-9]+}} // CHECK-ASM-NEXT:cset {{w[0-9]+}}, mi // COMMONIR: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32 // COMMONIR: ret i32 [[VCMPD_I]] @@ -728,8 +782,9 @@ // COMMON-LABEL: test_vcltd_f64 // UNCONSTRAINED: [[TMP0:%.*]] = fcmp olt double %a, %b -// CONSTRAINED: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"olt", metadata !"fpexcept.strict") -// CHECK-ASM: fcmp d{{[0-9]+}}, d{{[0-9]+}} +// CONSTRAINED: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"olt", metadata !"fpexcept.strict") +// CHECK-ASM-UNCONSTRAINED: fcmp d{{[0-9]+}}, d{{[0-9]+}} +// CHECK-ASM-CONSTRAINED: fcmpe d{{[0-9]+}}, d{{[0-9]+}} // CHECK-ASM-NEXT:cset {{w[0-9]+}}, mi // COMMONIR: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64 // COMMONIR: ret i64 [[VCMPD_I]] @@ -739,8 +794,9 @@ // COMMON-LABEL: test_vcltzs_f32 // UNCONSTRAINED: [[TMP0:%.*]] = fcmp olt float %a, 0.000000e+00 -// CONSTRAINED: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float 0.000000e+00, metadata !"olt", metadata !"fpexcept.strict") -// CHECK-ASM: fcmp s{{[0-9]+}}, #0.0 +// CONSTRAINED: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float 0.000000e+00, metadata !"olt", metadata !"fpexcept.strict") +// CHECK-ASM-UNCONSTRAINED: fcmp s{{[0-9]+}}, #0.0 +// CHECK-ASM-CONSTRAINED: fcmpe s{{[0-9]+}}, #0.0 // CHECK-ASM-NEXT:cset {{w[0-9]+}}, mi // COMMONIR: [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i32 // COMMONIR: ret i32 [[VCLTZ_I]] @@ -750,8 +806,9 @@ // COMMON-LABEL: test_vcltzd_f64 // UNCONSTRAINED: [[TMP0:%.*]] = fcmp olt double %a, 0.000000e+00 -// CONSTRAINED: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double 0.000000e+00, metadata !"olt", metadata !"fpexcept.strict") -// CHECK-ASM: fcmp d{{[0-9]+}}, #0.0 +// CONSTRAINED: [[TMP0:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double 0.000000e+00, metadata !"olt", metadata !"fpexcept.strict") +// CHECK-ASM-UNCONSTRAINED: fcmp d{{[0-9]+}}, #0.0 +// CHECK-ASM-CONSTRAINED: fcmpe d{{[0-9]+}}, #0.0 // CHECK-ASM-NEXT:cset {{w[0-9]+}}, mi // COMMONIR: [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i64 // COMMONIR: ret i64 [[VCLTZ_I]] @@ -847,9 +904,8 @@ // COMMON-LABEL: test_vcvt_s64_f64 // COMMONIR: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// UNCONSTRAINED: [[TMP1:%.*]] = fptosi <1 x double> %a to <1 x i64> -// CONSTRAINED: [[TMP1:%.*]] = call <1 x i64> @llvm.experimental.constrained.fptosi.v1i64.v1f64(<1 x double> %a, metadata !"fpexcept.strict") -// CHECK-ASM: fcvtzs x{{[0-9]+}}, d{{[0-9]+}} +// COMMONIR: [[TMP1:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.v1f64(<1 x double> %a) +// CHECK-ASM: fcvtzs d{{[0-9]+}}, d{{[0-9]+}} // COMMONIR: ret <1 x i64> [[TMP1]] int64x1_t test_vcvt_s64_f64(float64x1_t a) { return vcvt_s64_f64(a); @@ -857,9 +913,8 @@ // COMMON-LABEL: test_vcvt_u64_f64 // COMMONIR: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// UNCONSTRAINED: [[TMP1:%.*]] = fptoui <1 x double> %a to <1 x i64> -// CONSTRAINED: [[TMP1:%.*]] = call <1 x i64> @llvm.experimental.constrained.fptoui.v1i64.v1f64(<1 x double> %a, metadata !"fpexcept.strict") -// CHECK-ASM: fcvtzu x{{[0-9]+}}, d{{[0-9]+}} +// COMMONIR: [[TMP1:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.v1f64(<1 x double> %a) +// CHECK-ASM: fcvtzu d{{[0-9]+}}, d{{[0-9]+}} // COMMONIR: ret <1 x i64> [[TMP1]] uint64x1_t test_vcvt_u64_f64(float64x1_t a) { return vcvt_u64_f64(a); @@ -869,7 +924,7 @@ // COMMONIR: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // UNCONSTRAINED: [[VCVT_I:%.*]] = sitofp <1 x i64> %a to <1 x double> // CONSTRAINED: [[VCVT_I:%.*]] = call <1 x double> @llvm.experimental.constrained.sitofp.v1f64.v1i64(<1 x i64> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") -// CHECK-ASM: scvtf d{{[0-9]+}}, x{{[0-9]+}} +// CHECK-ASM: scvtf d{{[0-9]+}}, {{d|x}}{{[0-9]+}} // COMMONIR: ret <1 x double> [[VCVT_I]] float64x1_t test_vcvt_f64_s64(int64x1_t a) { return vcvt_f64_s64(a); @@ -879,7 +934,7 @@ // COMMONIR: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // UNCONSTRAINED: [[VCVT_I:%.*]] = uitofp <1 x i64> %a to <1 x double> // CONSTRAINED: [[VCVT_I:%.*]] = call <1 x double> @llvm.experimental.constrained.uitofp.v1f64.v1i64(<1 x i64> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") -// CHECK-ASM: ucvtf d{{[0-9]+}}, x{{[0-9]+}} +// CHECK-ASM: ucvtf d{{[0-9]+}}, {{d|x}}{{[0-9]+}} // COMMONIR: ret <1 x double> [[VCVT_I]] float64x1_t test_vcvt_f64_u64(uint64x1_t a) { return vcvt_f64_u64(a); Index: llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -231,9 +231,16 @@ // Now process the remaining operands. for (unsigned i = 1; i < NumOpers; ++i) { SDValue Oper = N->getOperand(i); + EVT OperVT = Oper.getValueType(); - if (Oper.getValueType().isVector()) - Oper = GetScalarizedVector(Oper); + if (OperVT.isVector()) { + if (getTypeAction(OperVT) == TargetLowering::TypeScalarizeVector) + Oper = GetScalarizedVector(Oper); + else + Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + OperVT.getVectorElementType(), Oper, + DAG.getVectorIdxConstant(0, dl)); + } Opers[i] = Oper; } Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1477,6 +1477,17 @@ if (VT.isFloatingPoint() && VT.getScalarSizeInBits() != 64) setOperationAction(ISD::STRICT_FP_ROUND, VT, Legal); + // Signalling comparison has to be expanded as there's no signalling version + // of the vector comparison instructions. + setOperationAction(ISD::STRICT_FSETCCS, VT, Expand); + // FIXME: We could potentially make use of the vector comparison instructions + // for STRICT_FSETCC, but some kinds of comparison require more than one + // FCM instruction which wouldn't be valid so would need to get expanded + // instead. The lowering also involves target-specific ISD nodes so we would + // likely need to add strict versions of all of them and handle them + // appropriately. + setOperationAction(ISD::STRICT_FSETCC, VT, Expand); + if (Subtarget->isLittleEndian()) { for (unsigned im = (unsigned)ISD::PRE_INC; im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { @@ -3377,7 +3388,8 @@ // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp. // Any additional optimization in this function should be recorded // in the cost tables. - EVT InVT = Op.getOperand(0).getValueType(); + bool IsStrict = Op->isStrictFPOpcode(); + EVT InVT = Op.getOperand(IsStrict ? 1 : 0).getValueType(); EVT VT = Op.getValueType(); if (VT.isScalableVector()) { @@ -3397,6 +3409,13 @@ !Subtarget->hasFullFP16()) { MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts); SDLoc dl(Op); + if (IsStrict) { + SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, + {NewVT, MVT::Other}, + {Op.getOperand(0), Op.getOperand(1)}); + return DAG.getNode(Op.getOpcode(), dl, {VT, MVT::Other}, + {Ext.getValue(1), Ext.getValue(0)}); + } return DAG.getNode( Op.getOpcode(), dl, Op.getValueType(), DAG.getNode(ISD::FP_EXTEND, dl, NewVT, Op.getOperand(0))); @@ -3406,6 +3425,13 @@ uint64_t InVTSize = InVT.getFixedSizeInBits(); if (VTSize < InVTSize) { SDLoc dl(Op); + if (IsStrict ) { + InVT = InVT.changeVectorElementTypeToInteger(); + SDValue Cv = DAG.getNode(Op.getOpcode(), dl, {InVT, MVT::Other}, + {Op.getOperand(0), Op.getOperand(1)}); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, Cv); + return DAG.getMergeValues({Trunc, Cv.getValue(1)}, dl); + } SDValue Cv = DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(), Op.getOperand(0)); @@ -3417,10 +3443,33 @@ MVT ExtVT = MVT::getVectorVT(MVT::getFloatingPointVT(VT.getScalarSizeInBits()), VT.getVectorNumElements()); + if (IsStrict) { + SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, + {ExtVT, MVT::Other}, + {Op.getOperand(0), Op.getOperand(1)}); + return DAG.getNode(Op.getOpcode(), dl, {VT, MVT::Other}, + {Ext.getValue(1), Ext.getValue(0)}); + } SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, ExtVT, Op.getOperand(0)); return DAG.getNode(Op.getOpcode(), dl, VT, Ext); } + // Use a scalar operation for conversions between single-element vectors of + // the same size. + if (NumElts == 1) { + SDLoc dl(Op); + SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + InVT.getScalarType(), + Op.getOperand(IsStrict ? 1 : 0), + DAG.getConstant(0, dl, MVT::i64)); + EVT ScalarVT = VT.getScalarType(); + SDValue ScalarCvt; + if (IsStrict) + return DAG.getNode(Op.getOpcode(), dl, {ScalarVT, MVT::Other}, + {Op.getOperand(0), Extract}); + return DAG.getNode(Op.getOpcode(), dl, ScalarVT, Extract); + } + // Type changing conversions are illegal. return Op; } @@ -3583,9 +3632,10 @@ // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp. // Any additional optimization in this function should be recorded // in the cost tables. + bool IsStrict = Op->isStrictFPOpcode(); EVT VT = Op.getValueType(); SDLoc dl(Op); - SDValue In = Op.getOperand(0); + SDValue In = Op.getOperand(IsStrict ? 1 : 0); EVT InVT = In.getValueType(); unsigned Opc = Op.getOpcode(); bool IsSigned = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP; @@ -3613,6 +3663,13 @@ MVT CastVT = MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()), InVT.getVectorNumElements()); + if (IsStrict) { + In = DAG.getNode(Opc, dl, {CastVT, MVT::Other}, + {Op.getOperand(0), Op.getOperand(1)}); + return DAG.getNode( + ISD::STRICT_FP_ROUND, dl, {VT, MVT::Other}, + {In.getValue(1), In.getValue(0), DAG.getIntPtrConstant(0, dl)}); + } In = DAG.getNode(Opc, dl, CastVT, In); return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0, dl)); } @@ -3621,9 +3678,28 @@ unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; EVT CastVT = VT.changeVectorElementTypeToInteger(); In = DAG.getNode(CastOpc, dl, CastVT, In); + if (IsStrict) + return DAG.getNode(Opc, dl, {VT, MVT::Other}, + {Op.getOperand(0), In}); return DAG.getNode(Opc, dl, VT, In); } + // Use a scalar operation for conversions between single-element vectors of + // the same size. + if (VT.getVectorNumElements() == 1) { + SDLoc dl(Op); + SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + InVT.getScalarType(), + Op.getOperand(IsStrict ? 1 : 0), + DAG.getConstant(0, dl, MVT::i64)); + EVT ScalarVT = VT.getScalarType(); + SDValue ScalarCvt; + if (IsStrict) + return DAG.getNode(Op.getOpcode(), dl, {ScalarVT, MVT::Other}, + {Op.getOperand(0), Extract}); + return DAG.getNode(Op.getOpcode(), dl, ScalarVT, Extract); + } + return Op; }