diff --git a/clang/include/clang/Basic/BuiltinsRISCV.def b/clang/include/clang/Basic/BuiltinsRISCV.def --- a/clang/include/clang/Basic/BuiltinsRISCV.def +++ b/clang/include/clang/Basic/BuiltinsRISCV.def @@ -61,5 +61,15 @@ TARGET_BUILTIN(__builtin_riscv_crc32_d, "LiLi", "nc", "experimental-zbr,64bit") TARGET_BUILTIN(__builtin_riscv_crc32c_d, "LiLi", "nc", "experimental-zbr,64bit") +// Zbt extension +TARGET_BUILTIN(__builtin_riscv_cmov, "LiLiLiLi", "nc", "experimental-zbt") +TARGET_BUILTIN(__builtin_riscv_cmix, "LiLiLiLi", "nc", "experimental-zbt") +TARGET_BUILTIN(__builtin_riscv_fsl_32, "LiLiLiLi", "nc", "experimental-zbt") +TARGET_BUILTIN(__builtin_riscv_fsr_32, "LiLiLiLi", "nc", "experimental-zbt") +TARGET_BUILTIN(__builtin_riscv_fsri_32, "LiLiLiLi", "nc", "experimental-zbt") +TARGET_BUILTIN(__builtin_riscv_fsl_64, "WiWiWiWi", "nc", "experimental-zbt,64bit") +TARGET_BUILTIN(__builtin_riscv_fsr_64, "WiWiWiWi", "nc", "experimental-zbt,64bit") +TARGET_BUILTIN(__builtin_riscv_fsri_64, "WiWiWiWi", "nc", "experimental-zbt,64bit") + #undef BUILTIN #undef TARGET_BUILTIN diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -18853,7 +18853,15 @@ case RISCV::BI__builtin_riscv_crc32c_b: case RISCV::BI__builtin_riscv_crc32c_h: case RISCV::BI__builtin_riscv_crc32c_w: - case RISCV::BI__builtin_riscv_crc32c_d: { + case RISCV::BI__builtin_riscv_crc32c_d: + case RISCV::BI__builtin_riscv_cmov: + case RISCV::BI__builtin_riscv_cmix: + case RISCV::BI__builtin_riscv_fsl_32: + case RISCV::BI__builtin_riscv_fsr_32: + case RISCV::BI__builtin_riscv_fsri_32: + case RISCV::BI__builtin_riscv_fsl_64: + case RISCV::BI__builtin_riscv_fsr_64: + case RISCV::BI__builtin_riscv_fsri_64: { switch (BuiltinID) { default: llvm_unreachable("unexpected builtin ID"); // Zbb @@ -18944,6 +18952,26 @@ case RISCV::BI__builtin_riscv_crc32c_d: ID = Intrinsic::riscv_crc32c_d; break; + + // Zbt + case RISCV::BI__builtin_riscv_cmov: + ID = Intrinsic::riscv_cmov; + break; + case RISCV::BI__builtin_riscv_cmix: + ID = Intrinsic::riscv_cmix; + break; + case RISCV::BI__builtin_riscv_fsl_32: + case RISCV::BI__builtin_riscv_fsl_64: + ID = Intrinsic::riscv_fsl; + break; + case RISCV::BI__builtin_riscv_fsr_32: + case RISCV::BI__builtin_riscv_fsr_64: + ID = Intrinsic::riscv_fsr; + break; + case RISCV::BI__builtin_riscv_fsri_32: + case RISCV::BI__builtin_riscv_fsri_64: + ID = Intrinsic::riscv_fsri; + break; } IntrinsicTypes = {ResultType}; diff --git a/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv32-zbt.c b/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv32-zbt.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv32-zbt.c @@ -0,0 +1,93 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple riscv32 -target-feature +experimental-zbt -emit-llvm %s -o - \ +// RUN: | FileCheck %s -check-prefix=RV32ZBT + +// RV32ZBT-LABEL: @cmov( +// RV32ZBT-NEXT: entry: +// RV32ZBT-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 +// RV32ZBT-NEXT: [[RS2_ADDR:%.*]] = alloca i32, align 4 +// RV32ZBT-NEXT: [[RS3_ADDR:%.*]] = alloca i32, align 4 +// RV32ZBT-NEXT: store i32 [[RS1:%.*]], i32* [[RS1_ADDR]], align 4 +// RV32ZBT-NEXT: store i32 [[RS2:%.*]], i32* [[RS2_ADDR]], align 4 +// RV32ZBT-NEXT: store i32 [[RS3:%.*]], i32* [[RS3_ADDR]], align 4 +// RV32ZBT-NEXT: [[TMP0:%.*]] = load i32, i32* [[RS1_ADDR]], align 4 +// RV32ZBT-NEXT: [[TMP1:%.*]] = load i32, i32* [[RS2_ADDR]], align 4 +// RV32ZBT-NEXT: [[TMP2:%.*]] = load i32, i32* [[RS3_ADDR]], align 4 +// RV32ZBT-NEXT: [[TMP3:%.*]] = call i32 @llvm.riscv.cmov.i32(i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) +// RV32ZBT-NEXT: ret i32 [[TMP3]] +// +int cmov(int rs1, int rs2, int rs3) { + return __builtin_riscv_cmov(rs1, rs2, rs3); +} + +// RV32ZBT-LABEL: @cmix( +// RV32ZBT-NEXT: entry: +// RV32ZBT-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 +// RV32ZBT-NEXT: [[RS2_ADDR:%.*]] = alloca i32, align 4 +// RV32ZBT-NEXT: [[RS3_ADDR:%.*]] = alloca i32, align 4 +// RV32ZBT-NEXT: store i32 [[RS1:%.*]], i32* [[RS1_ADDR]], align 4 +// RV32ZBT-NEXT: store i32 [[RS2:%.*]], i32* [[RS2_ADDR]], align 4 +// RV32ZBT-NEXT: store i32 [[RS3:%.*]], i32* [[RS3_ADDR]], align 4 +// RV32ZBT-NEXT: [[TMP0:%.*]] = load i32, i32* [[RS1_ADDR]], align 4 +// RV32ZBT-NEXT: [[TMP1:%.*]] = load i32, i32* [[RS2_ADDR]], align 4 +// RV32ZBT-NEXT: [[TMP2:%.*]] = load i32, i32* [[RS3_ADDR]], align 4 +// RV32ZBT-NEXT: [[TMP3:%.*]] = call i32 @llvm.riscv.cmix.i32(i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) +// RV32ZBT-NEXT: ret i32 [[TMP3]] +// +int cmix(int rs1, int rs2, int rs3) { + return __builtin_riscv_cmix(rs1, rs2, rs3); +} + +// RV32ZBT-LABEL: @fsl( +// RV32ZBT-NEXT: entry: +// RV32ZBT-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 +// RV32ZBT-NEXT: [[RS2_ADDR:%.*]] = alloca i32, align 4 +// RV32ZBT-NEXT: [[RS3_ADDR:%.*]] = alloca i32, align 4 +// RV32ZBT-NEXT: store i32 [[RS1:%.*]], i32* [[RS1_ADDR]], align 4 +// RV32ZBT-NEXT: store i32 [[RS2:%.*]], i32* [[RS2_ADDR]], align 4 +// RV32ZBT-NEXT: store i32 [[RS3:%.*]], i32* [[RS3_ADDR]], align 4 +// RV32ZBT-NEXT: [[TMP0:%.*]] = load i32, i32* [[RS1_ADDR]], align 4 +// RV32ZBT-NEXT: [[TMP1:%.*]] = load i32, i32* [[RS2_ADDR]], align 4 +// RV32ZBT-NEXT: [[TMP2:%.*]] = load i32, i32* [[RS3_ADDR]], align 4 +// RV32ZBT-NEXT: [[TMP3:%.*]] = call i32 @llvm.riscv.fsl.i32(i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) +// RV32ZBT-NEXT: ret i32 [[TMP3]] +// +int fsl(int rs1, int rs2, int rs3) { + return __builtin_riscv_fsl_32(rs1, rs2, rs3); +} + +// RV32ZBT-LABEL: @fsr( +// RV32ZBT-NEXT: entry: +// RV32ZBT-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 +// RV32ZBT-NEXT: [[RS2_ADDR:%.*]] = alloca i32, align 4 +// RV32ZBT-NEXT: [[RS3_ADDR:%.*]] = alloca i32, align 4 +// RV32ZBT-NEXT: store i32 [[RS1:%.*]], i32* [[RS1_ADDR]], align 4 +// RV32ZBT-NEXT: store i32 [[RS2:%.*]], i32* [[RS2_ADDR]], align 4 +// RV32ZBT-NEXT: store i32 [[RS3:%.*]], i32* [[RS3_ADDR]], align 4 +// RV32ZBT-NEXT: [[TMP0:%.*]] = load i32, i32* [[RS1_ADDR]], align 4 +// RV32ZBT-NEXT: [[TMP1:%.*]] = load i32, i32* [[RS2_ADDR]], align 4 +// RV32ZBT-NEXT: [[TMP2:%.*]] = load i32, i32* [[RS3_ADDR]], align 4 +// RV32ZBT-NEXT: [[TMP3:%.*]] = call i32 @llvm.riscv.fsr.i32(i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) +// RV32ZBT-NEXT: ret i32 [[TMP3]] +// +int fsr(int rs1, int rs2, int rs3) { + return __builtin_riscv_fsr_32(rs1, rs2, rs3); +} + +// RV32ZBT-LABEL: @fsri( +// RV32ZBT-NEXT: entry: +// RV32ZBT-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 +// RV32ZBT-NEXT: [[RS2_ADDR:%.*]] = alloca i32, align 4 +// RV32ZBT-NEXT: [[I:%.*]] = alloca i32, align 4 +// RV32ZBT-NEXT: store i32 [[RS1:%.*]], i32* [[RS1_ADDR]], align 4 +// RV32ZBT-NEXT: store i32 [[RS2:%.*]], i32* [[RS2_ADDR]], align 4 +// RV32ZBT-NEXT: store i32 13, i32* [[I]], align 4 +// RV32ZBT-NEXT: [[TMP0:%.*]] = load i32, i32* [[RS1_ADDR]], align 4 +// RV32ZBT-NEXT: [[TMP1:%.*]] = load i32, i32* [[RS2_ADDR]], align 4 +// RV32ZBT-NEXT: [[TMP2:%.*]] = call i32 @llvm.riscv.fsri.i32(i32 [[TMP0]], i32 [[TMP1]], i32 13) +// RV32ZBT-NEXT: ret i32 [[TMP2]] +// +int fsri(int rs1, int rs2) { + const int i = 13; + return __builtin_riscv_fsri_32(rs1, rs2, i); +} diff --git a/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv64-zbt.c b/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv64-zbt.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv64-zbt.c @@ -0,0 +1,219 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-zbt -emit-llvm %s -o - \ +// RUN: | FileCheck %s -check-prefix=RV64ZBT + +// RV64ZBT-LABEL: @cmov( +// RV64ZBT-NEXT: entry: +// RV64ZBT-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 +// RV64ZBT-NEXT: [[RS2_ADDR:%.*]] = alloca i32, align 4 +// RV64ZBT-NEXT: [[RS3_ADDR:%.*]] = alloca i32, align 4 +// RV64ZBT-NEXT: store i32 [[RS1:%.*]], i32* [[RS1_ADDR]], align 4 +// RV64ZBT-NEXT: store i32 [[RS2:%.*]], i32* [[RS2_ADDR]], align 4 +// RV64ZBT-NEXT: store i32 [[RS3:%.*]], i32* [[RS3_ADDR]], align 4 +// RV64ZBT-NEXT: [[TMP0:%.*]] = load i32, i32* [[RS1_ADDR]], align 4 +// RV64ZBT-NEXT: [[CONV:%.*]] = sext i32 [[TMP0]] to i64 +// RV64ZBT-NEXT: [[TMP1:%.*]] = load i32, i32* [[RS2_ADDR]], align 4 +// RV64ZBT-NEXT: [[CONV1:%.*]] = sext i32 [[TMP1]] to i64 +// RV64ZBT-NEXT: [[TMP2:%.*]] = load i32, i32* [[RS3_ADDR]], align 4 +// RV64ZBT-NEXT: [[CONV2:%.*]] = sext i32 [[TMP2]] to i64 +// RV64ZBT-NEXT: [[TMP3:%.*]] = call i64 @llvm.riscv.cmov.i64(i64 [[CONV]], i64 [[CONV1]], i64 [[CONV2]]) +// RV64ZBT-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP3]] to i32 +// RV64ZBT-NEXT: ret i32 [[CONV3]] +// +int cmov(int rs1, int rs2, int rs3) { + return __builtin_riscv_cmov(rs1, rs2, rs3); +} + +// RV32ZBT-LABEL: @cmix( +// RV32ZBT-NEXT: entry: +// RV32ZBT-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// RV32ZBT-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// RV32ZBT-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4 +// RV32ZBT-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4 +// RV32ZBT-NEXT: store i32 [[B:%.*]], i32* [[B_ADDR]], align 4 +// RV32ZBT-NEXT: store i32 [[C:%.*]], i32* [[C_ADDR]], align 4 +// RV32ZBT-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// RV32ZBT-NEXT: [[TMP1:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// RV32ZBT-NEXT: [[TMP2:%.*]] = load i32, i32* [[C_ADDR]], align 4 +// RV32ZBT-NEXT: [[TMP3:%.*]] = call i32 @llvm.riscv.cmix.i32(i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) +// RV32ZBT-NEXT: ret i32 [[TMP3]] +// RV64ZBT-LABEL: @cmix( +// RV64ZBT-NEXT: entry: +// RV64ZBT-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 +// RV64ZBT-NEXT: [[RS2_ADDR:%.*]] = alloca i32, align 4 +// RV64ZBT-NEXT: [[RS3_ADDR:%.*]] = alloca i32, align 4 +// RV64ZBT-NEXT: store i32 [[RS1:%.*]], i32* [[RS1_ADDR]], align 4 +// RV64ZBT-NEXT: store i32 [[RS2:%.*]], i32* [[RS2_ADDR]], align 4 +// RV64ZBT-NEXT: store i32 [[RS3:%.*]], i32* [[RS3_ADDR]], align 4 +// RV64ZBT-NEXT: [[TMP0:%.*]] = load i32, i32* [[RS1_ADDR]], align 4 +// RV64ZBT-NEXT: [[CONV:%.*]] = sext i32 [[TMP0]] to i64 +// RV64ZBT-NEXT: [[TMP1:%.*]] = load i32, i32* [[RS2_ADDR]], align 4 +// RV64ZBT-NEXT: [[CONV1:%.*]] = sext i32 [[TMP1]] to i64 +// RV64ZBT-NEXT: [[TMP2:%.*]] = load i32, i32* [[RS3_ADDR]], align 4 +// RV64ZBT-NEXT: [[CONV2:%.*]] = sext i32 [[TMP2]] to i64 +// RV64ZBT-NEXT: [[TMP3:%.*]] = call i64 @llvm.riscv.cmix.i64(i64 [[CONV]], i64 [[CONV1]], i64 [[CONV2]]) +// RV64ZBT-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP3]] to i32 +// RV64ZBT-NEXT: ret i32 [[CONV3]] +// +int cmix(int rs1, int rs2, int rs3) { + return __builtin_riscv_cmix(rs1, rs2, rs3); +} + +// RV32ZBT-LABEL: @fsl( +// RV32ZBT-NEXT: entry: +// RV32ZBT-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// RV32ZBT-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// RV32ZBT-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4 +// RV32ZBT-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4 +// RV32ZBT-NEXT: store i32 [[B:%.*]], i32* [[B_ADDR]], align 4 +// RV32ZBT-NEXT: store i32 [[C:%.*]], i32* [[C_ADDR]], align 4 +// RV32ZBT-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// RV32ZBT-NEXT: [[TMP1:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// RV32ZBT-NEXT: [[TMP2:%.*]] = load i32, i32* [[C_ADDR]], align 4 +// RV32ZBT-NEXT: [[TMP3:%.*]] = call i32 @llvm.riscv.fsl.i32(i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) +// RV32ZBT-NEXT: ret i32 [[TMP3]] +// RV64ZBT-LABEL: @fsl( +// RV64ZBT-NEXT: entry: +// RV64ZBT-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 +// RV64ZBT-NEXT: [[RS2_ADDR:%.*]] = alloca i32, align 4 +// RV64ZBT-NEXT: [[RS3_ADDR:%.*]] = alloca i32, align 4 +// RV64ZBT-NEXT: store i32 [[RS1:%.*]], i32* [[RS1_ADDR]], align 4 +// RV64ZBT-NEXT: store i32 [[RS2:%.*]], i32* [[RS2_ADDR]], align 4 +// RV64ZBT-NEXT: store i32 [[RS3:%.*]], i32* [[RS3_ADDR]], align 4 +// RV64ZBT-NEXT: [[TMP0:%.*]] = load i32, i32* [[RS1_ADDR]], align 4 +// RV64ZBT-NEXT: [[CONV:%.*]] = sext i32 [[TMP0]] to i64 +// RV64ZBT-NEXT: [[TMP1:%.*]] = load i32, i32* [[RS2_ADDR]], align 4 +// RV64ZBT-NEXT: [[CONV1:%.*]] = sext i32 [[TMP1]] to i64 +// RV64ZBT-NEXT: [[TMP2:%.*]] = load i32, i32* [[RS3_ADDR]], align 4 +// RV64ZBT-NEXT: [[CONV2:%.*]] = sext i32 [[TMP2]] to i64 +// RV64ZBT-NEXT: [[TMP3:%.*]] = call i64 @llvm.riscv.fsl.i64(i64 [[CONV]], i64 [[CONV1]], i64 [[CONV2]]) +// RV64ZBT-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP3]] to i32 +// RV64ZBT-NEXT: ret i32 [[CONV3]] +// +int fsl(int rs1, int rs2, int rs3) { + return __builtin_riscv_fsl_32(rs1, rs2, rs3); +} + +// RV32ZBT-LABEL: @fsr( +// RV32ZBT-NEXT: entry: +// RV32ZBT-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// RV32ZBT-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// RV32ZBT-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4 +// RV32ZBT-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4 +// RV32ZBT-NEXT: store i32 [[B:%.*]], i32* [[B_ADDR]], align 4 +// RV32ZBT-NEXT: store i32 [[C:%.*]], i32* [[C_ADDR]], align 4 +// RV32ZBT-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// RV32ZBT-NEXT: [[TMP1:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// RV32ZBT-NEXT: [[TMP2:%.*]] = load i32, i32* [[C_ADDR]], align 4 +// RV32ZBT-NEXT: [[TMP3:%.*]] = call i32 @llvm.riscv.fsr.i32(i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) +// RV32ZBT-NEXT: ret i32 [[TMP3]] +// RV64ZBT-LABEL: @fsr( +// RV64ZBT-NEXT: entry: +// RV64ZBT-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 +// RV64ZBT-NEXT: [[RS2_ADDR:%.*]] = alloca i32, align 4 +// RV64ZBT-NEXT: [[RS3_ADDR:%.*]] = alloca i32, align 4 +// RV64ZBT-NEXT: store i32 [[RS1:%.*]], i32* [[RS1_ADDR]], align 4 +// RV64ZBT-NEXT: store i32 [[RS2:%.*]], i32* [[RS2_ADDR]], align 4 +// RV64ZBT-NEXT: store i32 [[RS3:%.*]], i32* [[RS3_ADDR]], align 4 +// RV64ZBT-NEXT: [[TMP0:%.*]] = load i32, i32* [[RS1_ADDR]], align 4 +// RV64ZBT-NEXT: [[CONV:%.*]] = sext i32 [[TMP0]] to i64 +// RV64ZBT-NEXT: [[TMP1:%.*]] = load i32, i32* [[RS2_ADDR]], align 4 +// RV64ZBT-NEXT: [[CONV1:%.*]] = sext i32 [[TMP1]] to i64 +// RV64ZBT-NEXT: [[TMP2:%.*]] = load i32, i32* [[RS3_ADDR]], align 4 +// RV64ZBT-NEXT: [[CONV2:%.*]] = sext i32 [[TMP2]] to i64 +// RV64ZBT-NEXT: [[TMP3:%.*]] = call i64 @llvm.riscv.fsr.i64(i64 [[CONV]], i64 [[CONV1]], i64 [[CONV2]]) +// RV64ZBT-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP3]] to i32 +// RV64ZBT-NEXT: ret i32 [[CONV3]] +// +int fsr(int rs1, int rs2, int rs3) { + return __builtin_riscv_fsr_32(rs1, rs2, rs3); +} + +// RV32ZBT-LABEL: @fsri( +// RV32ZBT-NEXT: entry: +// RV32ZBT-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// RV32ZBT-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// RV32ZBT-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4 +// RV32ZBT-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4 +// RV32ZBT-NEXT: store i32 [[B:%.*]], i32* [[B_ADDR]], align 4 +// RV32ZBT-NEXT: store i32 [[C:%.*]], i32* [[C_ADDR]], align 4 +// RV32ZBT-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// RV32ZBT-NEXT: [[TMP1:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// RV32ZBT-NEXT: [[TMP2:%.*]] = load i32, i32* [[C_ADDR]], align 4 +// RV32ZBT-NEXT: [[TMP3:%.*]] = call i32 @llvm.riscv.fsri.i32(i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) +// RV32ZBT-NEXT: ret i32 [[TMP3]] +// RV64ZBT-LABEL: @fsri( +// RV64ZBT-NEXT: entry: +// RV64ZBT-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 +// RV64ZBT-NEXT: [[RS2_ADDR:%.*]] = alloca i32, align 4 +// RV64ZBT-NEXT: [[RS3_ADDR:%.*]] = alloca i32, align 4 +// RV64ZBT-NEXT: store i32 [[RS1:%.*]], i32* [[RS1_ADDR]], align 4 +// RV64ZBT-NEXT: store i32 [[RS2:%.*]], i32* [[RS2_ADDR]], align 4 +// RV64ZBT-NEXT: store i32 [[RS3:%.*]], i32* [[RS3_ADDR]], align 4 +// RV64ZBT-NEXT: [[TMP0:%.*]] = load i32, i32* [[RS1_ADDR]], align 4 +// RV64ZBT-NEXT: [[CONV:%.*]] = sext i32 [[TMP0]] to i64 +// RV64ZBT-NEXT: [[TMP1:%.*]] = load i32, i32* [[RS2_ADDR]], align 4 +// RV64ZBT-NEXT: [[CONV1:%.*]] = sext i32 [[TMP1]] to i64 +// RV64ZBT-NEXT: [[TMP2:%.*]] = load i32, i32* [[RS3_ADDR]], align 4 +// RV64ZBT-NEXT: [[CONV2:%.*]] = sext i32 [[TMP2]] to i64 +// RV64ZBT-NEXT: [[TMP3:%.*]] = call i64 @llvm.riscv.fsri.i64(i64 [[CONV]], i64 [[CONV1]], i64 [[CONV2]]) +// RV64ZBT-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP3]] to i32 +// RV64ZBT-NEXT: ret i32 [[CONV3]] +// +int fsri(int rs1, int rs2, int rs3) { + return __builtin_riscv_fsri_32(rs1, rs2, rs3); +} + +// RV64ZBT-LABEL: @fslw( +// RV64ZBT-NEXT: entry: +// RV64ZBT-NEXT: [[RS1_ADDR:%.*]] = alloca i64, align 8 +// RV64ZBT-NEXT: [[RS2_ADDR:%.*]] = alloca i64, align 8 +// RV64ZBT-NEXT: [[RS3_ADDR:%.*]] = alloca i64, align 8 +// RV64ZBT-NEXT: store i64 [[RS1:%.*]], i64* [[RS1_ADDR]], align 8 +// RV64ZBT-NEXT: store i64 [[RS2:%.*]], i64* [[RS2_ADDR]], align 8 +// RV64ZBT-NEXT: store i64 [[RS3:%.*]], i64* [[RS3_ADDR]], align 8 +// RV64ZBT-NEXT: [[TMP0:%.*]] = load i64, i64* [[RS1_ADDR]], align 8 +// RV64ZBT-NEXT: [[TMP1:%.*]] = load i64, i64* [[RS2_ADDR]], align 8 +// RV64ZBT-NEXT: [[TMP2:%.*]] = load i64, i64* [[RS3_ADDR]], align 8 +// RV64ZBT-NEXT: [[TMP3:%.*]] = call i64 @llvm.riscv.fsl.i64(i64 [[TMP0]], i64 [[TMP1]], i64 [[TMP2]]) +// RV64ZBT-NEXT: ret i64 [[TMP3]] +// +long fslw(long rs1, long rs2, long rs3) { + return __builtin_riscv_fsl_64(rs1, rs2, rs3); +} + +// RV64ZBT-LABEL: @fsrw( +// RV64ZBT-NEXT: entry: +// RV64ZBT-NEXT: [[RS1_ADDR:%.*]] = alloca i64, align 8 +// RV64ZBT-NEXT: [[RS2_ADDR:%.*]] = alloca i64, align 8 +// RV64ZBT-NEXT: [[RS3_ADDR:%.*]] = alloca i64, align 8 +// RV64ZBT-NEXT: store i64 [[RS1:%.*]], i64* [[RS1_ADDR]], align 8 +// RV64ZBT-NEXT: store i64 [[RS2:%.*]], i64* [[RS2_ADDR]], align 8 +// RV64ZBT-NEXT: store i64 [[RS3:%.*]], i64* [[RS3_ADDR]], align 8 +// RV64ZBT-NEXT: [[TMP0:%.*]] = load i64, i64* [[RS1_ADDR]], align 8 +// RV64ZBT-NEXT: [[TMP1:%.*]] = load i64, i64* [[RS2_ADDR]], align 8 +// RV64ZBT-NEXT: [[TMP2:%.*]] = load i64, i64* [[RS3_ADDR]], align 8 +// RV64ZBT-NEXT: [[TMP3:%.*]] = call i64 @llvm.riscv.fsr.i64(i64 [[TMP0]], i64 [[TMP1]], i64 [[TMP2]]) +// RV64ZBT-NEXT: ret i64 [[TMP3]] +// +long fsrw(long rs1, long rs2, long rs3) { + return __builtin_riscv_fsr_64(rs1, rs2, rs3); +} + +// RV64ZBT-LABEL: @fsriw( +// RV64ZBT-NEXT: entry: +// RV64ZBT-NEXT: [[RS1_ADDR:%.*]] = alloca i64, align 8 +// RV64ZBT-NEXT: [[RS2_ADDR:%.*]] = alloca i64, align 8 +// RV64ZBT-NEXT: [[I:%.*]] = alloca i32, align 4 +// RV64ZBT-NEXT: store i64 [[RS1:%.*]], i64* [[RS1_ADDR]], align 8 +// RV64ZBT-NEXT: store i64 [[RS2:%.*]], i64* [[RS2_ADDR]], align 8 +// RV64ZBT-NEXT: store i32 13, i32* [[I]], align 4 +// RV64ZBT-NEXT: [[TMP0:%.*]] = load i64, i64* [[RS1_ADDR]], align 8 +// RV64ZBT-NEXT: [[TMP1:%.*]] = load i64, i64* [[RS2_ADDR]], align 8 +// RV64ZBT-NEXT: [[TMP2:%.*]] = call i64 @llvm.riscv.fsl.i64(i64 [[TMP0]], i64 [[TMP1]], i64 13) +// RV64ZBT-NEXT: ret i64 [[TMP2]] +// +long fsriw(long rs1, long rs2) { + const int i = 13; + return __builtin_riscv_fsl_64(rs1, rs2, i); +} diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td --- a/llvm/include/llvm/IR/IntrinsicsRISCV.td +++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td @@ -80,6 +80,10 @@ : Intrinsic<[llvm_any_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; + class BitManipGPRGPRGRIntrinsics + : Intrinsic<[llvm_any_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; // Zbb def int_riscv_orc_b : BitManipGPRIntrinsics; @@ -115,6 +119,13 @@ def int_riscv_crc32c_h : BitManipGPRIntrinsics; def int_riscv_crc32c_w : BitManipGPRIntrinsics; def int_riscv_crc32c_d : BitManipGPRIntrinsics; + + // Zbt + def int_riscv_cmov : BitManipGPRGPRGRIntrinsics; + def int_riscv_cmix : BitManipGPRGPRGRIntrinsics; + def int_riscv_fsl : BitManipGPRGPRGRIntrinsics; + def int_riscv_fsr : BitManipGPRGPRGRIntrinsics; + def int_riscv_fsri : BitManipGPRGPRGRIntrinsics; } // TargetPrefix = "riscv" //===----------------------------------------------------------------------===// diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -118,6 +118,9 @@ def SDTIntShiftDOp: SDTypeProfile<1, 3, [ // fshl, fshr SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3> ]>; +def SDTIntCondOp: SDTypeProfile<1, 3, [ // cmov, cmix + SDTCisSameAs<0, 1>, SDTCisSameAs<0, 3>, SDTCisInt<0>, SDTCisInt<2> +]>; def SDTIntSatNoShOp : SDTypeProfile<1, 2, [ // ssat with no shift SDTCisSameAs<0, 1>, SDTCisInt<2> ]>; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -62,6 +62,10 @@ // named RISC-V instructions. CLZW, CTZW, + // RV32IB Conditional instructions directly matching the semantics of the + // named RISC-V instructions. + CMOV, + CMIX, // RV64IB/RV32IB funnel shifts, with the semantics of the named RISC-V // instructions, but the same operand order as fshl/fshr intrinsics. FSR, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4242,6 +4242,18 @@ case Intrinsic::riscv_bfp: return DAG.getNode(RISCVISD::BFP, DL, XLenVT, Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::riscv_cmov: + return DAG.getNode(RISCVISD::CMOV, DL, XLenVT, Op.getOperand(1), + Op.getOperand(2), Op.getOperand(3)); + case Intrinsic::riscv_cmix: + return DAG.getNode(RISCVISD::CMIX, DL, XLenVT, Op.getOperand(1), + Op.getOperand(2), Op.getOperand(3)); + case Intrinsic::riscv_fsl: + return DAG.getNode(RISCVISD::FSL, DL, XLenVT, Op.getOperand(1), + Op.getOperand(2), Op.getOperand(3)); + case Intrinsic::riscv_fsr: + return DAG.getNode(RISCVISD::FSR, DL, XLenVT, Op.getOperand(1), + Op.getOperand(2), Op.getOperand(3)); case Intrinsic::riscv_vmv_x_s: assert(Op.getValueType() == XLenVT && "Unexpected VT!"); return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(), @@ -5825,6 +5837,10 @@ return RISCVISD::BDECOMPRESSW; case Intrinsic::riscv_bfp: return RISCVISD::BFPW; + case Intrinsic::riscv_fsl: + return RISCVISD::FSLW; + case Intrinsic::riscv_fsr: + return RISCVISD::FSRW; } } @@ -6310,6 +6326,21 @@ Results.push_back(customLegalizeToWOpByIntr(N, DAG, IntNo)); break; } + case Intrinsic::riscv_fsl: + case Intrinsic::riscv_fsr: { + assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && + "Unexpected custom legalisation"); + SDValue NewOp1 = + DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); + SDValue NewOp2 = + DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2)); + SDValue NewOp3 = + DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)); + unsigned Opc = getRISCVWOpcodeByIntr(IntNo); + SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp1, NewOp2, NewOp3); + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); + break; + } case Intrinsic::riscv_orc_b: { // Lower to the GORCI encoding for orc.b with the operand extended. SDValue NewOp = @@ -9772,6 +9803,8 @@ NODE_NAME_CASE(BCOMPRESSW) NODE_NAME_CASE(BDECOMPRESS) NODE_NAME_CASE(BDECOMPRESSW) + NODE_NAME_CASE(CMOV) + NODE_NAME_CASE(CMIX) NODE_NAME_CASE(VMV_V_X_VL) NODE_NAME_CASE(VFMV_V_F_VL) NODE_NAME_CASE(VMV_X_S) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -995,6 +995,8 @@ : Pat<(OpNode GPR:$rs1), (Inst GPR:$rs1)>; class PatGprGpr : Pat<(OpNode GPR:$rs1, GPR:$rs2), (Inst GPR:$rs1, GPR:$rs2)>; +class PatGprGprGpr + : Pat<(OpNode GPR:$rs1, GPR:$rs2, GPR:$rs3), (Inst GPR:$rs1, GPR:$rs2, GPR:$rs3)>; class PatGprImm : Pat<(XLenVT (OpNode (XLenVT GPR:$rs1), ImmType:$imm)), diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td @@ -51,6 +51,8 @@ def riscv_bcompressw : SDNode<"RISCVISD::BCOMPRESSW", SDT_RISCVIntBinOpW>; def riscv_bdecompress : SDNode<"RISCVISD::BDECOMPRESS", SDTIntBinOp>; def riscv_bdecompressw : SDNode<"RISCVISD::BDECOMPRESSW",SDT_RISCVIntBinOpW>; +def riscv_cmov : SDNode<"RISCVISD::CMOV", SDTIntCondOp>; +def riscv_cmix : SDNode<"RISCVISD::CMIX", SDTIntCondOp>; def UImmLog2XLenHalfAsmOperand : AsmOperandClass { let Name = "UImmLog2XLenHalf"; @@ -861,6 +863,11 @@ } // Predicates = [HasStdExtZbp, IsRV64] let Predicates = [HasStdExtZbt] in { +def : Pat<(riscv_cmov GPR:$rs1, GPR:$rs3, GPR:$rs2), + (CMOV GPR:$rs1, GPR:$rs2, GPR:$rs3)>; +def : Pat<(riscv_cmix GPR:$rs1, GPR:$rs3, GPR:$rs2), + (CMIX GPR:$rs1, GPR:$rs2, GPR:$rs3)>; + def : Pat<(or (and (not GPR:$rs2), GPR:$rs3), (and GPR:$rs2, GPR:$rs1)), (CMIX GPR:$rs1, GPR:$rs2, GPR:$rs3)>; @@ -897,6 +904,8 @@ (FSL GPR:$rs1, GPR:$rs2, GPR:$rs3)>; def : Pat<(riscv_fsr GPR:$rs3, GPR:$rs1, GPR:$rs2), (FSR GPR:$rs1, GPR:$rs2, GPR:$rs3)>; +def : Pat<(riscv_fsr GPR:$rs3, GPR:$rs1, uimmlog2xlen:$shamt), + (FSRI GPR:$rs1, GPR:$rs3, uimmlog2xlen:$shamt)>; def : Pat<(fshr GPR:$rs3, GPR:$rs1, uimmlog2xlen:$shamt), (FSRI GPR:$rs1, GPR:$rs3, uimmlog2xlen:$shamt)>; @@ -1138,4 +1147,4 @@ def : PatGprGpr; let Predicates = [HasStdExtZbf, IsRV64] in -def : PatGprGpr; +def : PatGprGpr; \ No newline at end of file diff --git a/llvm/test/CodeGen/RISCV/rv32zbt-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv32zbt-intrinsic.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rv32zbt-intrinsic.ll @@ -0,0 +1,56 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbt -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32ZBT + +declare i32 @llvm.riscv.cmov.i32(i32 %a, i32 %b, i32 %c) + +define i32 @cmov(i32 %a, i32 %b, i32 %c) nounwind { +; RV32ZBT-LABEL: cmov: +; RV32ZBT: # %bb.0: +; RV32ZBT-NEXT: cmov a0, a2, a0, a1 +; RV32ZBT-NEXT: ret + %1 = call i32 @llvm.riscv.cmov.i32(i32 %a, i32 %b, i32 %c) + ret i32 %1 +} + +declare i32 @llvm.riscv.cmix.i32(i32 %a, i32 %b, i32 %c) + +define i32 @cmix(i32 %a, i32 %b, i32 %c) nounwind { +; RV32ZBT-LABEL: cmix: +; RV32ZBT: # %bb.0: +; RV32ZBT-NEXT: cmix a0, a2, a0, a1 +; RV32ZBT-NEXT: ret + %1 = call i32 @llvm.riscv.cmix.i32(i32 %a, i32 %b, i32 %c) + ret i32 %1 +} + +declare i32 @llvm.riscv.fsl.i32(i32, i32, i32) + +define i32 @fsl_i32(i32 %a, i32 %b, i32 %c) nounwind { +; RV32ZBT-LABEL: fsl_i32: +; RV32ZBT: # %bb.0: +; RV32ZBT-NEXT: fsl a0, a0, a1, a2 +; RV32ZBT-NEXT: ret + %1 = call i32 @llvm.riscv.fsl.i32(i32 %a, i32 %b, i32 %c) + ret i32 %1 +} + +declare i32 @llvm.riscv.fsr.i32(i32, i32, i32) + +define i32 @fsr_i32(i32 %a, i32 %b, i32 %c) nounwind { +; RV32ZBT-LABEL: fsr_i32: +; RV32ZBT: # %bb.0: +; RV32ZBT-NEXT: fsr a0, a1, a0, a2 +; RV32ZBT-NEXT: ret + %1 = call i32 @llvm.riscv.fsr.i32(i32 %a, i32 %b, i32 %c) + ret i32 %1 +} + +define i32 @fsri_i32(i32 %a, i32 %b) nounwind { +; RV32ZBT-LABEL: fsri_i32: +; RV32ZBT: # %bb.0: +; RV32ZBT-NEXT: fsri a0, a1, a0, 5 +; RV32ZBT-NEXT: ret + %1 = call i32 @llvm.riscv.fsr.i32(i32 %a, i32 %b, i32 5) + ret i32 %1 +} diff --git a/llvm/test/CodeGen/RISCV/rv64zbt-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv64zbt-intrinsic.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rv64zbt-intrinsic.ll @@ -0,0 +1,65 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zbt -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32ZBT + +declare i32 @llvm.riscv.fsl.i32(i32, i32, i32) + +define i32 @fsl_i32(i32 %a, i32 %b, i32 %c) nounwind { +; RV32ZBT-LABEL: fsl_i32: +; RV32ZBT: # %bb.0: +; RV32ZBT-NEXT: fslw a0, a0, a1, a2 +; RV32ZBT-NEXT: ret + %1 = call i32 @llvm.riscv.fsl.i32(i32 %a, i32 %b, i32 %c) + ret i32 %1 +} + +declare i32 @llvm.riscv.fsr.i32(i32, i32, i32) + +define i32 @fsr_i32(i32 %a, i32 %b, i32 %c) nounwind { +; RV32ZBT-LABEL: fsr_i32: +; RV32ZBT: # %bb.0: +; RV32ZBT-NEXT: fsrw a0, a1, a0, a2 +; RV32ZBT-NEXT: ret + %1 = call i32 @llvm.riscv.fsr.i32(i32 %a, i32 %b, i32 %c) + ret i32 %1 +} + +define i32 @fsri_i32(i32 %a, i32 %b) nounwind { +; RV32ZBT-LABEL: fsri_i32: +; RV32ZBT: # %bb.0: +; RV32ZBT-NEXT: fsriw a0, a1, a0, 5 +; RV32ZBT-NEXT: ret + %1 = call i32 @llvm.riscv.fsr.i32(i32 %a, i32 %b, i32 5) + ret i32 %1 +} + +declare i64 @llvm.riscv.fsl.i64(i64, i64, i64) + +define i64 @fsl_i64(i64 %a, i64 %b, i64 %c) nounwind { +; RV32ZBT-LABEL: fsl_i64: +; RV32ZBT: # %bb.0: +; RV32ZBT-NEXT: fsl a0, a0, a1, a2 +; RV32ZBT-NEXT: ret + %1 = call i64 @llvm.riscv.fsl.i64(i64 %a, i64 %b, i64 %c) + ret i64 %1 +} + +declare i64 @llvm.riscv.fsr.i64(i64, i64, i64) + +define i64 @fsr_i64(i64 %a, i64 %b, i64 %c) nounwind { +; RV32ZBT-LABEL: fsr_i64: +; RV32ZBT: # %bb.0: +; RV32ZBT-NEXT: fsr a0, a1, a0, a2 +; RV32ZBT-NEXT: ret + %1 = call i64 @llvm.riscv.fsr.i64(i64 %a, i64 %b, i64 %c) + ret i64 %1 +} + +define i64 @fsri_i64(i64 %a, i64 %b) nounwind { +; RV32ZBT-LABEL: fsri_i64: +; RV32ZBT: # %bb.0: +; RV32ZBT-NEXT: fsri a0, a1, a0, 5 +; RV32ZBT-NEXT: ret + %1 = call i64 @llvm.riscv.fsr.i64(i64 %a, i64 %b, i64 5) + ret i64 %1 +}