Index: clang/include/clang/Basic/BuiltinsRISCV.def =================================================================== --- clang/include/clang/Basic/BuiltinsRISCV.def +++ clang/include/clang/Basic/BuiltinsRISCV.def @@ -79,5 +79,13 @@ TARGET_BUILTIN(__builtin_riscv_sm3p0, "LiLi", "nc", "zksh") TARGET_BUILTIN(__builtin_riscv_sm3p1, "LiLi", "nc", "zksh") +// Zfa extension +TARGET_BUILTIN(__builtin_riscv_fmaxm_d, "ddd", "Fnc", "d,experimental-zfa") +TARGET_BUILTIN(__builtin_riscv_fmaxm_s, "fff", "Fnc", "experimental-zfa") +TARGET_BUILTIN(__builtin_riscv_fmaxm_h, "xxx", "Fnc", "zfh,experimental-zfa") +TARGET_BUILTIN(__builtin_riscv_fminm_d, "ddd", "Fnc", "d,experimental-zfa") +TARGET_BUILTIN(__builtin_riscv_fminm_s, "fff", "Fnc", "experimental-zfa") +TARGET_BUILTIN(__builtin_riscv_fminm_h, "xxx", "Fnc", "zfh,experimental-zfa") + #undef BUILTIN #undef TARGET_BUILTIN Index: clang/lib/CodeGen/CGBuiltin.cpp =================================================================== --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -2362,6 +2362,21 @@ Intrinsic::minnum, Intrinsic::experimental_constrained_minnum)); + // Zfa + case RISCV::BI__builtin_riscv_fmaxm_d: + case RISCV::BI__builtin_riscv_fmaxm_s: + case RISCV::BI__builtin_riscv_fmaxm_h: + return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E, + Intrinsic::maximum, + Intrinsic::experimental_constrained_maximum)); + + case RISCV::BI__builtin_riscv_fminm_d: + case RISCV::BI__builtin_riscv_fminm_s: + case RISCV::BI__builtin_riscv_fminm_h: + return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E, + Intrinsic::minimum, + Intrinsic::experimental_constrained_minimum)); + // fmod() is a special-case. It maps to the frem instruction rather than an // LLVM intrinsic. case Builtin::BIfmod: Index: clang/test/CodeGen/RISCV/zfa-intrinsics.c =================================================================== --- /dev/null +++ clang/test/CodeGen/RISCV/zfa-intrinsics.c @@ -0,0 +1,167 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple riscv64 -target-feature +d -target-feature +zfh -target-feature +experimental-zfa -emit-llvm %s -o - \ +// RUN: | FileCheck %s -check-prefix=RV64ZFA +// RUN: %clang_cc1 -triple riscv32 -target-feature +d -target-feature +zfh -target-feature +experimental-zfa -emit-llvm %s -o - \ +// RUN: | FileCheck %s -check-prefix=RV32ZFA + +// RV64ZFA-LABEL: @fmaxm_s( +// RV64ZFA-NEXT: entry: +// RV64ZFA-NEXT: [[A_ADDR:%.*]] = alloca float, align 4 +// RV64ZFA-NEXT: [[B_ADDR:%.*]] = alloca float, align 4 +// RV64ZFA-NEXT: store float [[A:%.*]], ptr [[A_ADDR]], align 4 +// RV64ZFA-NEXT: store float [[B:%.*]], ptr [[B_ADDR]], align 4 +// RV64ZFA-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4 +// RV64ZFA-NEXT: [[TMP1:%.*]] = load float, ptr [[B_ADDR]], align 4 +// RV64ZFA-NEXT: [[TMP2:%.*]] = call float @llvm.maximum.f32(float [[TMP0]], float [[TMP1]]) +// RV64ZFA-NEXT: ret float [[TMP2]] +// +// RV32ZFA-LABEL: @fmaxm_s( +// RV32ZFA-NEXT: entry: +// RV32ZFA-NEXT: [[A_ADDR:%.*]] = alloca float, align 4 +// RV32ZFA-NEXT: [[B_ADDR:%.*]] = alloca float, align 4 +// RV32ZFA-NEXT: store float [[A:%.*]], ptr [[A_ADDR]], align 4 +// RV32ZFA-NEXT: store float [[B:%.*]], ptr [[B_ADDR]], align 4 +// RV32ZFA-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4 +// RV32ZFA-NEXT: [[TMP1:%.*]] = load float, ptr [[B_ADDR]], align 4 +// RV32ZFA-NEXT: [[TMP2:%.*]] = call float @llvm.maximum.f32(float [[TMP0]], float [[TMP1]]) +// RV32ZFA-NEXT: ret float [[TMP2]] +// +float fmaxm_s(float a, float b) +{ + return __builtin_riscv_fmaxm_s(a, b); +} + +// RV64ZFA-LABEL: @fminm_s( +// RV64ZFA-NEXT: entry: +// RV64ZFA-NEXT: [[A_ADDR:%.*]] = alloca float, align 4 +// RV64ZFA-NEXT: [[B_ADDR:%.*]] = alloca float, align 4 +// RV64ZFA-NEXT: store float [[A:%.*]], ptr [[A_ADDR]], align 4 +// RV64ZFA-NEXT: store float [[B:%.*]], ptr [[B_ADDR]], align 4 +// RV64ZFA-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4 +// RV64ZFA-NEXT: [[TMP1:%.*]] = load float, ptr [[B_ADDR]], align 4 +// RV64ZFA-NEXT: [[TMP2:%.*]] = call float @llvm.minimum.f32(float [[TMP0]], float [[TMP1]]) +// RV64ZFA-NEXT: ret float [[TMP2]] +// +// RV32ZFA-LABEL: @fminm_s( +// RV32ZFA-NEXT: entry: +// RV32ZFA-NEXT: [[A_ADDR:%.*]] = alloca float, align 4 +// RV32ZFA-NEXT: [[B_ADDR:%.*]] = alloca float, align 4 +// RV32ZFA-NEXT: store float [[A:%.*]], ptr [[A_ADDR]], align 4 +// RV32ZFA-NEXT: store float [[B:%.*]], ptr [[B_ADDR]], align 4 +// RV32ZFA-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4 +// RV32ZFA-NEXT: [[TMP1:%.*]] = load float, ptr [[B_ADDR]], align 4 +// RV32ZFA-NEXT: [[TMP2:%.*]] = call float @llvm.minimum.f32(float [[TMP0]], float [[TMP1]]) +// RV32ZFA-NEXT: ret float [[TMP2]] +// +float fminm_s(float a, float b) +{ + return __builtin_riscv_fminm_s(a, b); +} + +// RV64ZFA-LABEL: @fmaxm_d( +// RV64ZFA-NEXT: entry: +// RV64ZFA-NEXT: [[A_ADDR:%.*]] = alloca double, align 8 +// RV64ZFA-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 +// RV64ZFA-NEXT: store double [[A:%.*]], ptr [[A_ADDR]], align 8 +// RV64ZFA-NEXT: store double [[B:%.*]], ptr [[B_ADDR]], align 8 +// RV64ZFA-NEXT: [[TMP0:%.*]] = load double, ptr [[A_ADDR]], align 8 +// RV64ZFA-NEXT: [[TMP1:%.*]] = load double, ptr [[B_ADDR]], align 8 +// RV64ZFA-NEXT: [[TMP2:%.*]] = call double @llvm.maximum.f64(double [[TMP0]], double [[TMP1]]) +// RV64ZFA-NEXT: ret double [[TMP2]] +// +// RV32ZFA-LABEL: @fmaxm_d( +// RV32ZFA-NEXT: entry: +// RV32ZFA-NEXT: [[A_ADDR:%.*]] = alloca double, align 8 +// RV32ZFA-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 +// RV32ZFA-NEXT: store double [[A:%.*]], ptr [[A_ADDR]], align 8 +// RV32ZFA-NEXT: store double [[B:%.*]], ptr [[B_ADDR]], align 8 +// RV32ZFA-NEXT: [[TMP0:%.*]] = load double, ptr [[A_ADDR]], align 8 +// RV32ZFA-NEXT: [[TMP1:%.*]] = load double, ptr [[B_ADDR]], align 8 +// RV32ZFA-NEXT: [[TMP2:%.*]] = call double @llvm.maximum.f64(double [[TMP0]], double [[TMP1]]) +// RV32ZFA-NEXT: ret double [[TMP2]] +// +double fmaxm_d(double a, double b) +{ + return __builtin_riscv_fmaxm_d(a, b); +} + +// RV64ZFA-LABEL: @fminm_d( +// RV64ZFA-NEXT: entry: +// RV64ZFA-NEXT: [[A_ADDR:%.*]] = alloca double, align 8 +// RV64ZFA-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 +// RV64ZFA-NEXT: store double [[A:%.*]], ptr [[A_ADDR]], align 8 +// RV64ZFA-NEXT: store double [[B:%.*]], ptr [[B_ADDR]], align 8 +// RV64ZFA-NEXT: [[TMP0:%.*]] = load double, ptr [[A_ADDR]], align 8 +// RV64ZFA-NEXT: [[TMP1:%.*]] = load double, ptr [[B_ADDR]], align 8 +// RV64ZFA-NEXT: [[TMP2:%.*]] = call double @llvm.minimum.f64(double [[TMP0]], double [[TMP1]]) +// RV64ZFA-NEXT: ret double [[TMP2]] +// +// RV32ZFA-LABEL: @fminm_d( +// RV32ZFA-NEXT: entry: +// RV32ZFA-NEXT: [[A_ADDR:%.*]] = alloca double, align 8 +// RV32ZFA-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 +// RV32ZFA-NEXT: store double [[A:%.*]], ptr [[A_ADDR]], align 8 +// RV32ZFA-NEXT: store double [[B:%.*]], ptr [[B_ADDR]], align 8 +// RV32ZFA-NEXT: [[TMP0:%.*]] = load double, ptr [[A_ADDR]], align 8 +// RV32ZFA-NEXT: [[TMP1:%.*]] = load double, ptr [[B_ADDR]], align 8 +// RV32ZFA-NEXT: [[TMP2:%.*]] = call double @llvm.minimum.f64(double [[TMP0]], double [[TMP1]]) +// RV32ZFA-NEXT: ret double [[TMP2]] +// +double fminm_d(double a, double b) +{ + return __builtin_riscv_fminm_d(a, b); +} + +// RV64ZFA-LABEL: @fmaxm_h( +// RV64ZFA-NEXT: entry: +// RV64ZFA-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 +// RV64ZFA-NEXT: [[B_ADDR:%.*]] = alloca half, align 2 +// RV64ZFA-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2 +// RV64ZFA-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2 +// RV64ZFA-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 +// RV64ZFA-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2 +// RV64ZFA-NEXT: [[TMP2:%.*]] = call half @llvm.maximum.f16(half [[TMP0]], half [[TMP1]]) +// RV64ZFA-NEXT: ret half [[TMP2]] +// +// RV32ZFA-LABEL: @fmaxm_h( +// RV32ZFA-NEXT: entry: +// RV32ZFA-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 +// RV32ZFA-NEXT: [[B_ADDR:%.*]] = alloca half, align 2 +// RV32ZFA-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2 +// RV32ZFA-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2 +// RV32ZFA-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 +// RV32ZFA-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2 +// RV32ZFA-NEXT: [[TMP2:%.*]] = call half @llvm.maximum.f16(half [[TMP0]], half [[TMP1]]) +// RV32ZFA-NEXT: ret half [[TMP2]] +// +_Float16 fmaxm_h(_Float16 a, _Float16 b) +{ + return __builtin_riscv_fmaxm_h(a, b); +} + +// RV64ZFA-LABEL: @fminm_h( +// RV64ZFA-NEXT: entry: +// RV64ZFA-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 +// RV64ZFA-NEXT: [[B_ADDR:%.*]] = alloca half, align 2 +// RV64ZFA-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2 +// RV64ZFA-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2 +// RV64ZFA-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 +// RV64ZFA-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2 +// RV64ZFA-NEXT: [[TMP2:%.*]] = call half @llvm.minimum.f16(half [[TMP0]], half [[TMP1]]) +// RV64ZFA-NEXT: ret half [[TMP2]] +// +// RV32ZFA-LABEL: @fminm_h( +// RV32ZFA-NEXT: entry: +// RV32ZFA-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 +// RV32ZFA-NEXT: [[B_ADDR:%.*]] = alloca half, align 2 +// RV32ZFA-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2 +// RV32ZFA-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2 +// RV32ZFA-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 +// RV32ZFA-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2 +// RV32ZFA-NEXT: [[TMP2:%.*]] = call half @llvm.minimum.f16(half [[TMP0]], half [[TMP1]]) +// RV32ZFA-NEXT: ret half [[TMP2]] +// +_Float16 fminm_h(_Float16 a, _Float16 b) +{ + return __builtin_riscv_fminm_h(a, b); +}