diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -99,6 +99,7 @@ ) set(riscv_files + riscv_crypto.h riscv_ntlh.h ) diff --git a/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv32-zbkb.c b/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv32-zbkb.c --- a/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv32-zbkb.c +++ b/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv32-zbkb.c @@ -2,43 +2,112 @@ // RUN: %clang_cc1 -triple riscv32 -target-feature +zbkb -emit-llvm %s -o - \ // RUN: | FileCheck %s -check-prefix=RV32ZBKB -#include +#include // RV32ZBKB-LABEL: @brev8( // RV32ZBKB-NEXT: entry: +// RV32ZBKB-NEXT: [[__X_ADDR_I:%.*]] = alloca i32, align 4 // RV32ZBKB-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 // RV32ZBKB-NEXT: store i32 [[RS1:%.*]], ptr [[RS1_ADDR]], align 4 // RV32ZBKB-NEXT: [[TMP0:%.*]] = load i32, ptr [[RS1_ADDR]], align 4 -// RV32ZBKB-NEXT: [[TMP1:%.*]] = call i32 @llvm.riscv.brev8.i32(i32 [[TMP0]]) -// RV32ZBKB-NEXT: ret i32 [[TMP1]] +// RV32ZBKB-NEXT: store i32 [[TMP0]], ptr [[__X_ADDR_I]], align 4 +// RV32ZBKB-NEXT: [[TMP1:%.*]] = load i32, ptr [[__X_ADDR_I]], align 4 +// RV32ZBKB-NEXT: [[TMP2:%.*]] = call i32 @llvm.riscv.brev8.i32(i32 [[TMP1]]) +// RV32ZBKB-NEXT: ret i32 [[TMP2]] // uint32_t brev8(uint32_t rs1) { - return __builtin_riscv_brev8_32(rs1); + return __riscv_brev8_32(rs1); } // RV32ZBKB-LABEL: @zip( // RV32ZBKB-NEXT: entry: +// RV32ZBKB-NEXT: [[__X_ADDR_I:%.*]] = alloca i32, align 4 // RV32ZBKB-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 // RV32ZBKB-NEXT: store i32 [[RS1:%.*]], ptr [[RS1_ADDR]], align 4 // RV32ZBKB-NEXT: [[TMP0:%.*]] = load i32, ptr [[RS1_ADDR]], align 4 -// RV32ZBKB-NEXT: [[TMP1:%.*]] = call i32 @llvm.riscv.zip.i32(i32 [[TMP0]]) -// RV32ZBKB-NEXT: ret i32 [[TMP1]] +// RV32ZBKB-NEXT: store i32 [[TMP0]], ptr [[__X_ADDR_I]], align 4 +// RV32ZBKB-NEXT: [[TMP1:%.*]] = load i32, ptr [[__X_ADDR_I]], align 4 +// RV32ZBKB-NEXT: [[CALL_I:%.*]] = call i32 @__builtin_riscv_zip32(i32 noundef [[TMP1]]) #[[ATTR3:[0-9]+]] +// RV32ZBKB-NEXT: ret i32 [[CALL_I]] // uint32_t zip(uint32_t rs1) { - return __builtin_riscv_zip_32(rs1); + return __riscv_zip_32(rs1); } // RV32ZBKB-LABEL: @unzip( // RV32ZBKB-NEXT: entry: +// RV32ZBKB-NEXT: [[__X_ADDR_I:%.*]] = alloca i32, align 4 // RV32ZBKB-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 // RV32ZBKB-NEXT: store i32 [[RS1:%.*]], ptr [[RS1_ADDR]], align 4 // RV32ZBKB-NEXT: [[TMP0:%.*]] = load i32, ptr [[RS1_ADDR]], align 4 -// RV32ZBKB-NEXT: [[TMP1:%.*]] = call i32 @llvm.riscv.unzip.i32(i32 [[TMP0]]) -// RV32ZBKB-NEXT: ret i32 [[TMP1]] +// RV32ZBKB-NEXT: store i32 [[TMP0]], ptr [[__X_ADDR_I]], align 4 +// RV32ZBKB-NEXT: [[TMP1:%.*]] = load i32, ptr [[__X_ADDR_I]], align 4 +// RV32ZBKB-NEXT: [[CALL_I:%.*]] = call i32 @__builtin_riscv_unzip32(i32 noundef [[TMP1]]) #[[ATTR3]] +// RV32ZBKB-NEXT: ret i32 [[CALL_I]] // uint32_t unzip(uint32_t rs1) { - return __builtin_riscv_unzip_32(rs1); + return __riscv_unzip_32(rs1); +} + +// RV32ZBKB-LABEL: @rev8( +// RV32ZBKB-NEXT: entry: +// RV32ZBKB-NEXT: [[__X_ADDR_I:%.*]] = alloca i32, align 4 +// RV32ZBKB-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 +// RV32ZBKB-NEXT: store i32 [[RS1:%.*]], ptr [[RS1_ADDR]], align 4 +// RV32ZBKB-NEXT: [[TMP0:%.*]] = load i32, ptr [[RS1_ADDR]], align 4 +// RV32ZBKB-NEXT: store i32 [[TMP0]], ptr [[__X_ADDR_I]], align 4 +// RV32ZBKB-NEXT: [[TMP1:%.*]] = load i32, ptr [[__X_ADDR_I]], align 4 +// RV32ZBKB-NEXT: [[TMP2:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +// RV32ZBKB-NEXT: ret i32 [[TMP2]] +// +uint32_t rev8(uint32_t rs1) +{ + return __riscv_rev8_32(rs1); +} + +// RV32ZBKB-LABEL: @rol( +// RV32ZBKB-NEXT: entry: +// RV32ZBKB-NEXT: [[__X_ADDR_I:%.*]] = alloca i32, align 4 +// RV32ZBKB-NEXT: [[__Y_ADDR_I:%.*]] = alloca i32, align 4 +// RV32ZBKB-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 +// RV32ZBKB-NEXT: [[RS2_ADDR:%.*]] = alloca i32, align 4 +// RV32ZBKB-NEXT: store i32 [[RS1:%.*]], ptr [[RS1_ADDR]], align 4 +// RV32ZBKB-NEXT: store i32 [[RS2:%.*]], ptr [[RS2_ADDR]], align 4 +// RV32ZBKB-NEXT: [[TMP0:%.*]] = load i32, ptr [[RS1_ADDR]], align 4 +// RV32ZBKB-NEXT: [[TMP1:%.*]] = load i32, ptr [[RS2_ADDR]], align 4 +// RV32ZBKB-NEXT: store i32 [[TMP0]], ptr [[__X_ADDR_I]], align 4 +// RV32ZBKB-NEXT: store i32 [[TMP1]], ptr [[__Y_ADDR_I]], align 4 +// RV32ZBKB-NEXT: [[TMP2:%.*]] = load i32, ptr [[__X_ADDR_I]], align 4 +// RV32ZBKB-NEXT: [[TMP3:%.*]] = load i32, ptr [[__Y_ADDR_I]], align 4 +// RV32ZBKB-NEXT: [[TMP4:%.*]] = call i32 @llvm.fshl.i32(i32 [[TMP2]], i32 [[TMP2]], i32 [[TMP3]]) +// RV32ZBKB-NEXT: ret i32 [[TMP4]] +// +uint32_t rol(uint32_t rs1, uint32_t rs2) +{ + return __riscv_rol_32(rs1, rs2); +} + +// RV32ZBKB-LABEL: @ror( +// RV32ZBKB-NEXT: entry: +// RV32ZBKB-NEXT: [[__X_ADDR_I:%.*]] = alloca i32, align 4 +// RV32ZBKB-NEXT: [[__Y_ADDR_I:%.*]] = alloca i32, align 4 +// RV32ZBKB-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 +// RV32ZBKB-NEXT: [[RS2_ADDR:%.*]] = alloca i32, align 4 +// RV32ZBKB-NEXT: store i32 [[RS1:%.*]], ptr [[RS1_ADDR]], align 4 +// RV32ZBKB-NEXT: store i32 [[RS2:%.*]], ptr [[RS2_ADDR]], align 4 +// RV32ZBKB-NEXT: [[TMP0:%.*]] = load i32, ptr [[RS1_ADDR]], align 4 +// RV32ZBKB-NEXT: [[TMP1:%.*]] = load i32, ptr [[RS2_ADDR]], align 4 +// RV32ZBKB-NEXT: store i32 [[TMP0]], ptr [[__X_ADDR_I]], align 4 +// RV32ZBKB-NEXT: store i32 [[TMP1]], ptr [[__Y_ADDR_I]], align 4 +// RV32ZBKB-NEXT: [[TMP2:%.*]] = load i32, ptr [[__X_ADDR_I]], align 4 +// RV32ZBKB-NEXT: [[TMP3:%.*]] = load i32, ptr [[__Y_ADDR_I]], align 4 +// RV32ZBKB-NEXT: [[TMP4:%.*]] = call i32 @llvm.fshr.i32(i32 [[TMP2]], i32 [[TMP2]], i32 [[TMP3]]) +// RV32ZBKB-NEXT: ret i32 [[TMP4]] +// +uint32_t ror(uint32_t rs1, uint32_t rs2) +{ + return __riscv_ror_32(rs1, rs2); } diff --git a/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv32-zbkc.c b/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv32-zbkc.c --- a/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv32-zbkc.c +++ b/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv32-zbkc.c @@ -2,34 +2,46 @@ // RUN: %clang_cc1 -triple riscv32 -target-feature +zbkc -emit-llvm %s -o - \ // RUN: | FileCheck %s -check-prefix=RV32ZBKC -#include +#include // RV32ZBKC-LABEL: @clmul_32( // RV32ZBKC-NEXT: entry: +// RV32ZBKC-NEXT: [[__X_ADDR_I:%.*]] = alloca i32, align 4 +// RV32ZBKC-NEXT: [[__Y_ADDR_I:%.*]] = alloca i32, align 4 // RV32ZBKC-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // RV32ZBKC-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // RV32ZBKC-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR]], align 4 // RV32ZBKC-NEXT: store i32 [[B:%.*]], ptr [[B_ADDR]], align 4 // RV32ZBKC-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // RV32ZBKC-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// RV32ZBKC-NEXT: [[TMP2:%.*]] = call i32 @llvm.riscv.clmul.i32(i32 [[TMP0]], i32 [[TMP1]]) -// RV32ZBKC-NEXT: ret i32 [[TMP2]] +// RV32ZBKC-NEXT: store i32 [[TMP0]], ptr [[__X_ADDR_I]], align 4 +// RV32ZBKC-NEXT: store i32 [[TMP1]], ptr [[__Y_ADDR_I]], align 4 +// RV32ZBKC-NEXT: [[TMP2:%.*]] = load i32, ptr [[__X_ADDR_I]], align 4 +// RV32ZBKC-NEXT: [[TMP3:%.*]] = load i32, ptr [[__Y_ADDR_I]], align 4 +// RV32ZBKC-NEXT: [[TMP4:%.*]] = call i32 @llvm.riscv.clmul.i32(i32 [[TMP2]], i32 [[TMP3]]) +// RV32ZBKC-NEXT: ret i32 [[TMP4]] // uint32_t clmul_32(uint32_t a, uint32_t b) { - return __builtin_riscv_clmul_32(a, b); + return __riscv_clmul_32(a, b); } // RV32ZBKC-LABEL: @clmulh_32( // RV32ZBKC-NEXT: entry: +// RV32ZBKC-NEXT: [[__X_ADDR_I:%.*]] = alloca i32, align 4 +// RV32ZBKC-NEXT: [[__Y_ADDR_I:%.*]] = alloca i32, align 4 // RV32ZBKC-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // RV32ZBKC-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // RV32ZBKC-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR]], align 4 // RV32ZBKC-NEXT: store i32 [[B:%.*]], ptr [[B_ADDR]], align 4 // RV32ZBKC-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // RV32ZBKC-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// RV32ZBKC-NEXT: [[TMP2:%.*]] = call i32 @llvm.riscv.clmulh.i32(i32 [[TMP0]], i32 [[TMP1]]) -// RV32ZBKC-NEXT: ret i32 [[TMP2]] +// RV32ZBKC-NEXT: store i32 [[TMP0]], ptr [[__X_ADDR_I]], align 4 +// RV32ZBKC-NEXT: store i32 [[TMP1]], ptr [[__Y_ADDR_I]], align 4 +// RV32ZBKC-NEXT: [[TMP2:%.*]] = load i32, ptr [[__X_ADDR_I]], align 4 +// RV32ZBKC-NEXT: [[TMP3:%.*]] = load i32, ptr [[__Y_ADDR_I]], align 4 +// RV32ZBKC-NEXT: [[TMP4:%.*]] = call i32 @llvm.riscv.clmulh.i32(i32 [[TMP2]], i32 [[TMP3]]) +// RV32ZBKC-NEXT: ret i32 [[TMP4]] // uint32_t clmulh_32(uint32_t a, uint32_t b) { - return __builtin_riscv_clmulh_32(a, b); + return __riscv_clmulh_32(a, b); } diff --git a/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv32-zbkx.c b/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv32-zbkx.c --- a/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv32-zbkx.c +++ b/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv32-zbkx.c @@ -2,36 +2,48 @@ // RUN: %clang_cc1 -triple riscv32 -target-feature +zbkx -emit-llvm %s -o - \ // RUN: | FileCheck %s -check-prefix=RV32ZBKX -#include +#include // RV32ZBKX-LABEL: @xperm8( // RV32ZBKX-NEXT: entry: +// RV32ZBKX-NEXT: [[__X_ADDR_I:%.*]] = alloca i32, align 4 +// RV32ZBKX-NEXT: [[__Y_ADDR_I:%.*]] = alloca i32, align 4 // RV32ZBKX-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 // RV32ZBKX-NEXT: [[RS2_ADDR:%.*]] = alloca i32, align 4 // RV32ZBKX-NEXT: store i32 [[RS1:%.*]], ptr [[RS1_ADDR]], align 4 // RV32ZBKX-NEXT: store i32 [[RS2:%.*]], ptr [[RS2_ADDR]], align 4 // RV32ZBKX-NEXT: [[TMP0:%.*]] = load i32, ptr [[RS1_ADDR]], align 4 // RV32ZBKX-NEXT: [[TMP1:%.*]] = load i32, ptr [[RS2_ADDR]], align 4 -// RV32ZBKX-NEXT: [[TMP2:%.*]] = call i32 @llvm.riscv.xperm8.i32(i32 [[TMP0]], i32 [[TMP1]]) -// RV32ZBKX-NEXT: ret i32 [[TMP2]] +// RV32ZBKX-NEXT: store i32 [[TMP0]], ptr [[__X_ADDR_I]], align 4 +// RV32ZBKX-NEXT: store i32 [[TMP1]], ptr [[__Y_ADDR_I]], align 4 +// RV32ZBKX-NEXT: [[TMP2:%.*]] = load i32, ptr [[__X_ADDR_I]], align 4 +// RV32ZBKX-NEXT: [[TMP3:%.*]] = load i32, ptr [[__Y_ADDR_I]], align 4 +// RV32ZBKX-NEXT: [[TMP4:%.*]] = call i32 @llvm.riscv.xperm8.i32(i32 [[TMP2]], i32 [[TMP3]]) +// RV32ZBKX-NEXT: ret i32 [[TMP4]] // uint32_t xperm8(uint32_t rs1, uint32_t rs2) { - return __builtin_riscv_xperm8_32(rs1, rs2); + return __riscv_xperm8_32(rs1, rs2); } // RV32ZBKX-LABEL: @xperm4( // RV32ZBKX-NEXT: entry: +// RV32ZBKX-NEXT: [[__X_ADDR_I:%.*]] = alloca i32, align 4 +// RV32ZBKX-NEXT: [[__Y_ADDR_I:%.*]] = alloca i32, align 4 // RV32ZBKX-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 // RV32ZBKX-NEXT: [[RS2_ADDR:%.*]] = alloca i32, align 4 // RV32ZBKX-NEXT: store i32 [[RS1:%.*]], ptr [[RS1_ADDR]], align 4 // RV32ZBKX-NEXT: store i32 [[RS2:%.*]], ptr [[RS2_ADDR]], align 4 // RV32ZBKX-NEXT: [[TMP0:%.*]] = load i32, ptr [[RS1_ADDR]], align 4 // RV32ZBKX-NEXT: [[TMP1:%.*]] = load i32, ptr [[RS2_ADDR]], align 4 -// RV32ZBKX-NEXT: [[TMP2:%.*]] = call i32 @llvm.riscv.xperm4.i32(i32 [[TMP0]], i32 [[TMP1]]) -// RV32ZBKX-NEXT: ret i32 [[TMP2]] +// RV32ZBKX-NEXT: store i32 [[TMP0]], ptr [[__X_ADDR_I]], align 4 +// RV32ZBKX-NEXT: store i32 [[TMP1]], ptr [[__Y_ADDR_I]], align 4 +// RV32ZBKX-NEXT: [[TMP2:%.*]] = load i32, ptr [[__X_ADDR_I]], align 4 +// RV32ZBKX-NEXT: [[TMP3:%.*]] = load i32, ptr [[__Y_ADDR_I]], align 4 +// RV32ZBKX-NEXT: [[TMP4:%.*]] = call i32 @llvm.riscv.xperm4.i32(i32 [[TMP2]], i32 [[TMP3]]) +// RV32ZBKX-NEXT: ret i32 [[TMP4]] // uint32_t xperm4(uint32_t rs1, uint32_t rs2) { - return __builtin_riscv_xperm4_32(rs1, rs2); + return __riscv_xperm4_32(rs1, rs2); } diff --git a/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv64-zbkb.c b/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv64-zbkb.c --- a/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv64-zbkb.c +++ b/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv64-zbkb.c @@ -2,30 +2,158 @@ // RUN: %clang_cc1 -triple riscv64 -target-feature +zbkb -emit-llvm %s -o - \ // RUN: | FileCheck %s -check-prefix=RV64ZBKB -#include +#include // RV64ZBKB-LABEL: @brev8_32( // RV64ZBKB-NEXT: entry: +// RV64ZBKB-NEXT: [[__X_ADDR_I:%.*]] = alloca i32, align 4 // RV64ZBKB-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 // RV64ZBKB-NEXT: store i32 [[RS1:%.*]], ptr [[RS1_ADDR]], align 4 // RV64ZBKB-NEXT: [[TMP0:%.*]] = load i32, ptr [[RS1_ADDR]], align 4 -// RV64ZBKB-NEXT: [[TMP1:%.*]] = call i32 @llvm.riscv.brev8.i32(i32 [[TMP0]]) -// RV64ZBKB-NEXT: ret i32 [[TMP1]] +// RV64ZBKB-NEXT: store i32 [[TMP0]], ptr [[__X_ADDR_I]], align 4 +// RV64ZBKB-NEXT: [[TMP1:%.*]] = load i32, ptr [[__X_ADDR_I]], align 4 +// RV64ZBKB-NEXT: [[TMP2:%.*]] = call i32 @llvm.riscv.brev8.i32(i32 [[TMP1]]) +// RV64ZBKB-NEXT: ret i32 [[TMP2]] // uint32_t brev8_32(uint32_t rs1) { - return __builtin_riscv_brev8_32(rs1); + return __riscv_brev8_32(rs1); } // RV64ZBKB-LABEL: @brev8_64( // RV64ZBKB-NEXT: entry: +// RV64ZBKB-NEXT: [[__X_ADDR_I:%.*]] = alloca i64, align 8 // RV64ZBKB-NEXT: [[RS1_ADDR:%.*]] = alloca i64, align 8 // RV64ZBKB-NEXT: store i64 [[RS1:%.*]], ptr [[RS1_ADDR]], align 8 // RV64ZBKB-NEXT: [[TMP0:%.*]] = load i64, ptr [[RS1_ADDR]], align 8 -// RV64ZBKB-NEXT: [[TMP1:%.*]] = call i64 @llvm.riscv.brev8.i64(i64 [[TMP0]]) -// RV64ZBKB-NEXT: ret i64 [[TMP1]] +// RV64ZBKB-NEXT: store i64 [[TMP0]], ptr [[__X_ADDR_I]], align 8 +// RV64ZBKB-NEXT: [[TMP1:%.*]] = load i64, ptr [[__X_ADDR_I]], align 8 +// RV64ZBKB-NEXT: [[TMP2:%.*]] = call i64 @llvm.riscv.brev8.i64(i64 [[TMP1]]) +// RV64ZBKB-NEXT: ret i64 [[TMP2]] // uint64_t brev8_64(uint64_t rs1) { - return __builtin_riscv_brev8_64(rs1); + return __riscv_brev8_64(rs1); +} + +// RV64ZBKB-LABEL: @rev8_32( +// RV64ZBKB-NEXT: entry: +// RV64ZBKB-NEXT: [[__X_ADDR_I:%.*]] = alloca i32, align 4 +// RV64ZBKB-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 +// RV64ZBKB-NEXT: store i32 [[RS1:%.*]], ptr [[RS1_ADDR]], align 4 +// RV64ZBKB-NEXT: [[TMP0:%.*]] = load i32, ptr [[RS1_ADDR]], align 4 +// RV64ZBKB-NEXT: store i32 [[TMP0]], ptr [[__X_ADDR_I]], align 4 +// RV64ZBKB-NEXT: [[TMP1:%.*]] = load i32, ptr [[__X_ADDR_I]], align 4 +// RV64ZBKB-NEXT: [[TMP2:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +// RV64ZBKB-NEXT: ret i32 [[TMP2]] +// +uint32_t rev8_32(uint32_t rs1) +{ + return __riscv_rev8_32(rs1); +} + +// RV64ZBKB-LABEL: @rev8_64( +// RV64ZBKB-NEXT: entry: +// RV64ZBKB-NEXT: [[__X_ADDR_I:%.*]] = alloca i64, align 8 +// RV64ZBKB-NEXT: [[RS1_ADDR:%.*]] = alloca i64, align 8 +// RV64ZBKB-NEXT: store i64 [[RS1:%.*]], ptr [[RS1_ADDR]], align 8 +// RV64ZBKB-NEXT: [[TMP0:%.*]] = load i64, ptr [[RS1_ADDR]], align 8 +// RV64ZBKB-NEXT: store i64 [[TMP0]], ptr [[__X_ADDR_I]], align 8 +// RV64ZBKB-NEXT: [[TMP1:%.*]] = load i64, ptr [[__X_ADDR_I]], align 8 +// RV64ZBKB-NEXT: [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]]) +// RV64ZBKB-NEXT: ret i64 [[TMP2]] +// +uint64_t rev8_64(uint64_t rs1) +{ + return __riscv_rev8_64(rs1); +} + +// RV64ZBKB-LABEL: @rol_32( +// RV64ZBKB-NEXT: entry: +// RV64ZBKB-NEXT: [[__X_ADDR_I:%.*]] = alloca i32, align 4 +// RV64ZBKB-NEXT: [[__Y_ADDR_I:%.*]] = alloca i32, align 4 +// RV64ZBKB-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 +// RV64ZBKB-NEXT: [[RS2_ADDR:%.*]] = alloca i32, align 4 +// RV64ZBKB-NEXT: store i32 [[RS1:%.*]], ptr [[RS1_ADDR]], align 4 +// RV64ZBKB-NEXT: store i32 [[RS2:%.*]], ptr [[RS2_ADDR]], align 4 +// RV64ZBKB-NEXT: [[TMP0:%.*]] = load i32, ptr [[RS1_ADDR]], align 4 +// RV64ZBKB-NEXT: [[TMP1:%.*]] = load i32, ptr [[RS2_ADDR]], align 4 +// RV64ZBKB-NEXT: store i32 [[TMP0]], ptr [[__X_ADDR_I]], align 4 +// RV64ZBKB-NEXT: store i32 [[TMP1]], ptr [[__Y_ADDR_I]], align 4 +// RV64ZBKB-NEXT: [[TMP2:%.*]] = load i32, ptr [[__X_ADDR_I]], align 4 +// RV64ZBKB-NEXT: [[TMP3:%.*]] = load i32, ptr [[__Y_ADDR_I]], align 4 +// RV64ZBKB-NEXT: [[TMP4:%.*]] = call i32 @llvm.fshl.i32(i32 [[TMP2]], i32 [[TMP2]], i32 [[TMP3]]) +// RV64ZBKB-NEXT: ret i32 [[TMP4]] +// +uint32_t rol_32(uint32_t rs1, uint32_t rs2) +{ + return __riscv_rol_32(rs1, rs2); +} + +// RV64ZBKB-LABEL: @rol_64( +// RV64ZBKB-NEXT: entry: +// RV64ZBKB-NEXT: [[__X_ADDR_I:%.*]] = alloca i64, align 8 +// RV64ZBKB-NEXT: [[__Y_ADDR_I:%.*]] = alloca i32, align 4 +// RV64ZBKB-NEXT: [[RS1_ADDR:%.*]] = alloca i64, align 8 +// RV64ZBKB-NEXT: [[RS2_ADDR:%.*]] = alloca i32, align 4 +// RV64ZBKB-NEXT: store i64 [[RS1:%.*]], ptr [[RS1_ADDR]], align 8 +// RV64ZBKB-NEXT: store i32 [[RS2:%.*]], ptr [[RS2_ADDR]], align 4 +// RV64ZBKB-NEXT: [[TMP0:%.*]] = load i64, ptr [[RS1_ADDR]], align 8 +// RV64ZBKB-NEXT: [[TMP1:%.*]] = load i32, ptr [[RS2_ADDR]], align 4 +// RV64ZBKB-NEXT: store i64 [[TMP0]], ptr [[__X_ADDR_I]], align 8 +// RV64ZBKB-NEXT: store i32 [[TMP1]], ptr [[__Y_ADDR_I]], align 4 +// RV64ZBKB-NEXT: [[TMP2:%.*]] = load i64, ptr [[__X_ADDR_I]], align 8 +// RV64ZBKB-NEXT: [[TMP3:%.*]] = load i32, ptr [[__Y_ADDR_I]], align 4 +// RV64ZBKB-NEXT: [[CONV_I:%.*]] = zext i32 [[TMP3]] to i64 +// RV64ZBKB-NEXT: [[TMP4:%.*]] = call i64 @llvm.fshl.i64(i64 [[TMP2]], i64 [[TMP2]], i64 [[CONV_I]]) +// RV64ZBKB-NEXT: ret i64 [[TMP4]] +// +uint64_t rol_64(uint64_t rs1, uint32_t rs2) +{ + return __riscv_rol_64(rs1, rs2); +} + +// RV64ZBKB-LABEL: @ror_32( +// RV64ZBKB-NEXT: entry: +// RV64ZBKB-NEXT: [[__X_ADDR_I:%.*]] = alloca i32, align 4 +// RV64ZBKB-NEXT: [[__Y_ADDR_I:%.*]] = alloca i32, align 4 +// RV64ZBKB-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 +// RV64ZBKB-NEXT: [[RS2_ADDR:%.*]] = alloca i32, align 4 +// RV64ZBKB-NEXT: store i32 [[RS1:%.*]], ptr [[RS1_ADDR]], align 4 +// RV64ZBKB-NEXT: store i32 [[RS2:%.*]], ptr [[RS2_ADDR]], align 4 +// RV64ZBKB-NEXT: [[TMP0:%.*]] = load i32, ptr [[RS1_ADDR]], align 4 +// RV64ZBKB-NEXT: [[TMP1:%.*]] = load i32, ptr [[RS2_ADDR]], align 4 +// RV64ZBKB-NEXT: store i32 [[TMP0]], ptr [[__X_ADDR_I]], align 4 +// RV64ZBKB-NEXT: store i32 [[TMP1]], ptr [[__Y_ADDR_I]], align 4 +// RV64ZBKB-NEXT: [[TMP2:%.*]] = load i32, ptr [[__X_ADDR_I]], align 4 +// RV64ZBKB-NEXT: [[TMP3:%.*]] = load i32, ptr [[__Y_ADDR_I]], align 4 +// RV64ZBKB-NEXT: [[TMP4:%.*]] = call i32 @llvm.fshr.i32(i32 [[TMP2]], i32 [[TMP2]], i32 [[TMP3]]) +// RV64ZBKB-NEXT: ret i32 [[TMP4]] +// +uint32_t ror_32(uint32_t rs1, uint32_t rs2) +{ + return __riscv_ror_32(rs1, rs2); +} + +// RV64ZBKB-LABEL: @ror_64( +// RV64ZBKB-NEXT: entry: +// RV64ZBKB-NEXT: [[__X_ADDR_I:%.*]] = alloca i64, align 8 +// RV64ZBKB-NEXT: [[__Y_ADDR_I:%.*]] = alloca i32, align 4 +// RV64ZBKB-NEXT: [[RS1_ADDR:%.*]] = alloca i64, align 8 +// RV64ZBKB-NEXT: [[RS2_ADDR:%.*]] = alloca i32, align 4 +// RV64ZBKB-NEXT: store i64 [[RS1:%.*]], ptr [[RS1_ADDR]], align 8 +// RV64ZBKB-NEXT: store i32 [[RS2:%.*]], ptr [[RS2_ADDR]], align 4 +// RV64ZBKB-NEXT: [[TMP0:%.*]] = load i64, ptr [[RS1_ADDR]], align 8 +// RV64ZBKB-NEXT: [[TMP1:%.*]] = load i32, ptr [[RS2_ADDR]], align 4 +// RV64ZBKB-NEXT: store i64 [[TMP0]], ptr [[__X_ADDR_I]], align 8 +// RV64ZBKB-NEXT: store i32 [[TMP1]], ptr [[__Y_ADDR_I]], align 4 +// RV64ZBKB-NEXT: [[TMP2:%.*]] = load i64, ptr [[__X_ADDR_I]], align 8 +// RV64ZBKB-NEXT: [[TMP3:%.*]] = load i32, ptr [[__Y_ADDR_I]], align 4 +// RV64ZBKB-NEXT: [[CONV_I:%.*]] = zext i32 [[TMP3]] to i64 +// RV64ZBKB-NEXT: [[TMP4:%.*]] = call i64 @llvm.fshr.i64(i64 [[TMP2]], i64 [[TMP2]], i64 [[CONV_I]]) +// RV64ZBKB-NEXT: ret i64 [[TMP4]] +// +uint64_t ror_64(uint64_t rs1, uint32_t rs2) +{ + return __riscv_ror_64(rs1, rs2); } diff --git a/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv64-zbkc.c b/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv64-zbkc.c --- a/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv64-zbkc.c +++ b/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv64-zbkc.c @@ -2,49 +2,67 @@ // RUN: %clang_cc1 -triple riscv64 -target-feature +zbkc -emit-llvm %s -o - \ // RUN: | FileCheck %s -check-prefix=RV64ZBKC -#include +#include // RV64ZBKC-LABEL: @clmul_64( // RV64ZBKC-NEXT: entry: +// RV64ZBKC-NEXT: [[__X_ADDR_I:%.*]] = alloca i64, align 8 +// RV64ZBKC-NEXT: [[__Y_ADDR_I:%.*]] = alloca i64, align 8 // RV64ZBKC-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // RV64ZBKC-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // RV64ZBKC-NEXT: store i64 [[A:%.*]], ptr [[A_ADDR]], align 8 // RV64ZBKC-NEXT: store i64 [[B:%.*]], ptr [[B_ADDR]], align 8 // RV64ZBKC-NEXT: [[TMP0:%.*]] = load i64, ptr [[A_ADDR]], align 8 // RV64ZBKC-NEXT: [[TMP1:%.*]] = load i64, ptr [[B_ADDR]], align 8 -// RV64ZBKC-NEXT: [[TMP2:%.*]] = call i64 @llvm.riscv.clmul.i64(i64 [[TMP0]], i64 [[TMP1]]) -// RV64ZBKC-NEXT: ret i64 [[TMP2]] +// RV64ZBKC-NEXT: store i64 [[TMP0]], ptr [[__X_ADDR_I]], align 8 +// RV64ZBKC-NEXT: store i64 [[TMP1]], ptr [[__Y_ADDR_I]], align 8 +// RV64ZBKC-NEXT: [[TMP2:%.*]] = load i64, ptr [[__X_ADDR_I]], align 8 +// RV64ZBKC-NEXT: [[TMP3:%.*]] = load i64, ptr [[__Y_ADDR_I]], align 8 +// RV64ZBKC-NEXT: [[TMP4:%.*]] = call i64 @llvm.riscv.clmul.i64(i64 [[TMP2]], i64 [[TMP3]]) +// RV64ZBKC-NEXT: ret i64 [[TMP4]] // uint64_t clmul_64(uint64_t a, uint64_t b) { - return __builtin_riscv_clmul_64(a, b); + return __riscv_clmul_64(a, b); } // RV64ZBKC-LABEL: @clmulh_64( // RV64ZBKC-NEXT: entry: +// RV64ZBKC-NEXT: [[__X_ADDR_I:%.*]] = alloca i64, align 8 +// RV64ZBKC-NEXT: [[__Y_ADDR_I:%.*]] = alloca i64, align 8 // RV64ZBKC-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // RV64ZBKC-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // RV64ZBKC-NEXT: store i64 [[A:%.*]], ptr [[A_ADDR]], align 8 // RV64ZBKC-NEXT: store i64 [[B:%.*]], ptr [[B_ADDR]], align 8 // RV64ZBKC-NEXT: [[TMP0:%.*]] = load i64, ptr [[A_ADDR]], align 8 // RV64ZBKC-NEXT: [[TMP1:%.*]] = load i64, ptr [[B_ADDR]], align 8 -// RV64ZBKC-NEXT: [[TMP2:%.*]] = call i64 @llvm.riscv.clmulh.i64(i64 [[TMP0]], i64 [[TMP1]]) -// RV64ZBKC-NEXT: ret i64 [[TMP2]] +// RV64ZBKC-NEXT: store i64 [[TMP0]], ptr [[__X_ADDR_I]], align 8 +// RV64ZBKC-NEXT: store i64 [[TMP1]], ptr [[__Y_ADDR_I]], align 8 +// RV64ZBKC-NEXT: [[TMP2:%.*]] = load i64, ptr [[__X_ADDR_I]], align 8 +// RV64ZBKC-NEXT: [[TMP3:%.*]] = load i64, ptr [[__Y_ADDR_I]], align 8 +// RV64ZBKC-NEXT: [[TMP4:%.*]] = call i64 @llvm.riscv.clmulh.i64(i64 [[TMP2]], i64 [[TMP3]]) +// RV64ZBKC-NEXT: ret i64 [[TMP4]] // uint64_t clmulh_64(uint64_t a, uint64_t b) { - return __builtin_riscv_clmulh_64(a, b); + return __riscv_clmulh_64(a, b); } // RV64ZBKC-LABEL: @clmul_32( // RV64ZBKC-NEXT: entry: +// RV64ZBKC-NEXT: [[__X_ADDR_I:%.*]] = alloca i32, align 4 +// RV64ZBKC-NEXT: [[__Y_ADDR_I:%.*]] = alloca i32, align 4 // RV64ZBKC-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // RV64ZBKC-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // RV64ZBKC-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR]], align 4 // RV64ZBKC-NEXT: store i32 [[B:%.*]], ptr [[B_ADDR]], align 4 // RV64ZBKC-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // RV64ZBKC-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// RV64ZBKC-NEXT: [[TMP2:%.*]] = call i32 @llvm.riscv.clmul.i32(i32 [[TMP0]], i32 [[TMP1]]) -// RV64ZBKC-NEXT: ret i32 [[TMP2]] +// RV64ZBKC-NEXT: store i32 [[TMP0]], ptr [[__X_ADDR_I]], align 4 +// RV64ZBKC-NEXT: store i32 [[TMP1]], ptr [[__Y_ADDR_I]], align 4 +// RV64ZBKC-NEXT: [[TMP2:%.*]] = load i32, ptr [[__X_ADDR_I]], align 4 +// RV64ZBKC-NEXT: [[TMP3:%.*]] = load i32, ptr [[__Y_ADDR_I]], align 4 +// RV64ZBKC-NEXT: [[TMP4:%.*]] = call i32 @llvm.riscv.clmul.i32(i32 [[TMP2]], i32 [[TMP3]]) +// RV64ZBKC-NEXT: ret i32 [[TMP4]] // uint32_t clmul_32(uint32_t a, uint32_t b) { - return __builtin_riscv_clmul_32(a, b); + return __riscv_clmul_32(a, b); } diff --git a/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv64-zbkx.c b/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv64-zbkx.c --- a/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv64-zbkx.c +++ b/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv64-zbkx.c @@ -2,36 +2,48 @@ // RUN: %clang_cc1 -triple riscv64 -target-feature +zbkx -emit-llvm %s -o - \ // RUN: | FileCheck %s -check-prefix=RV64ZBKX -#include +#include // RV64ZBKX-LABEL: @xperm8( // RV64ZBKX-NEXT: entry: +// RV64ZBKX-NEXT: [[__X_ADDR_I:%.*]] = alloca i64, align 8 +// RV64ZBKX-NEXT: [[__Y_ADDR_I:%.*]] = alloca i64, align 8 // RV64ZBKX-NEXT: [[RS1_ADDR:%.*]] = alloca i64, align 8 // RV64ZBKX-NEXT: [[RS2_ADDR:%.*]] = alloca i64, align 8 // RV64ZBKX-NEXT: store i64 [[RS1:%.*]], ptr [[RS1_ADDR]], align 8 // RV64ZBKX-NEXT: store i64 [[RS2:%.*]], ptr [[RS2_ADDR]], align 8 // RV64ZBKX-NEXT: [[TMP0:%.*]] = load i64, ptr [[RS1_ADDR]], align 8 // RV64ZBKX-NEXT: [[TMP1:%.*]] = load i64, ptr [[RS2_ADDR]], align 8 -// RV64ZBKX-NEXT: [[TMP2:%.*]] = call i64 @llvm.riscv.xperm8.i64(i64 [[TMP0]], i64 [[TMP1]]) -// RV64ZBKX-NEXT: ret i64 [[TMP2]] +// RV64ZBKX-NEXT: store i64 [[TMP0]], ptr [[__X_ADDR_I]], align 8 +// RV64ZBKX-NEXT: store i64 [[TMP1]], ptr [[__Y_ADDR_I]], align 8 +// RV64ZBKX-NEXT: [[TMP2:%.*]] = load i64, ptr [[__X_ADDR_I]], align 8 +// RV64ZBKX-NEXT: [[TMP3:%.*]] = load i64, ptr [[__Y_ADDR_I]], align 8 +// RV64ZBKX-NEXT: [[TMP4:%.*]] = call i64 @llvm.riscv.xperm8.i64(i64 [[TMP2]], i64 [[TMP3]]) +// RV64ZBKX-NEXT: ret i64 [[TMP4]] // uint64_t xperm8(uint64_t rs1, uint64_t rs2) { - return __builtin_riscv_xperm8_64(rs1, rs2); + return __riscv_xperm8_64(rs1, rs2); } // RV64ZBKX-LABEL: @xperm4( // RV64ZBKX-NEXT: entry: +// RV64ZBKX-NEXT: [[__X_ADDR_I:%.*]] = alloca i64, align 8 +// RV64ZBKX-NEXT: [[__Y_ADDR_I:%.*]] = alloca i64, align 8 // RV64ZBKX-NEXT: [[RS1_ADDR:%.*]] = alloca i64, align 8 // RV64ZBKX-NEXT: [[RS2_ADDR:%.*]] = alloca i64, align 8 // RV64ZBKX-NEXT: store i64 [[RS1:%.*]], ptr [[RS1_ADDR]], align 8 // RV64ZBKX-NEXT: store i64 [[RS2:%.*]], ptr [[RS2_ADDR]], align 8 // RV64ZBKX-NEXT: [[TMP0:%.*]] = load i64, ptr [[RS1_ADDR]], align 8 // RV64ZBKX-NEXT: [[TMP1:%.*]] = load i64, ptr [[RS2_ADDR]], align 8 -// RV64ZBKX-NEXT: [[TMP2:%.*]] = call i64 @llvm.riscv.xperm4.i64(i64 [[TMP0]], i64 [[TMP1]]) -// RV64ZBKX-NEXT: ret i64 [[TMP2]] +// RV64ZBKX-NEXT: store i64 [[TMP0]], ptr [[__X_ADDR_I]], align 8 +// RV64ZBKX-NEXT: store i64 [[TMP1]], ptr [[__Y_ADDR_I]], align 8 +// RV64ZBKX-NEXT: [[TMP2:%.*]] = load i64, ptr [[__X_ADDR_I]], align 8 +// RV64ZBKX-NEXT: [[TMP3:%.*]] = load i64, ptr [[__Y_ADDR_I]], align 8 +// RV64ZBKX-NEXT: [[TMP4:%.*]] = call i64 @llvm.riscv.xperm4.i64(i64 [[TMP2]], i64 [[TMP3]]) +// RV64ZBKX-NEXT: ret i64 [[TMP4]] // uint64_t xperm4(uint64_t rs1, uint64_t rs2) { - return __builtin_riscv_xperm4_64(rs1, rs2); + return __riscv_xperm4_64(rs1, rs2); } diff --git a/clang/test/CodeGen/RISCV/rvk-intrinsics/riscv32-zknd.c b/clang/test/CodeGen/RISCV/rvk-intrinsics/riscv32-zknd.c --- a/clang/test/CodeGen/RISCV/rvk-intrinsics/riscv32-zknd.c +++ b/clang/test/CodeGen/RISCV/rvk-intrinsics/riscv32-zknd.c @@ -2,7 +2,7 @@ // RUN: %clang_cc1 -triple riscv32 -target-feature +zknd -emit-llvm %s -o - \ // RUN: | FileCheck %s -check-prefix=RV32ZKND -#include +#include // RV32ZKND-LABEL: @aes32dsi( // RV32ZKND-NEXT: entry: @@ -16,7 +16,7 @@ // RV32ZKND-NEXT: ret i32 [[TMP2]] // uint32_t aes32dsi(uint32_t rs1, uint32_t rs2) { - return __builtin_riscv_aes32dsi(rs1, rs2, 3); + return __riscv_aes32dsi(rs1, rs2, 3); } // RV32ZKND-LABEL: @aes32dsmi( @@ -31,5 +31,5 @@ // RV32ZKND-NEXT: ret i32 [[TMP2]] // uint32_t aes32dsmi(uint32_t rs1, uint32_t rs2) { - return __builtin_riscv_aes32dsmi(rs1, rs2, 3); + return __riscv_aes32dsmi(rs1, rs2, 3); } diff --git a/clang/test/CodeGen/RISCV/rvk-intrinsics/riscv32-zkne.c b/clang/test/CodeGen/RISCV/rvk-intrinsics/riscv32-zkne.c --- a/clang/test/CodeGen/RISCV/rvk-intrinsics/riscv32-zkne.c +++ b/clang/test/CodeGen/RISCV/rvk-intrinsics/riscv32-zkne.c @@ -2,7 +2,7 @@ // RUN: %clang_cc1 -triple riscv32 -target-feature +zkne -emit-llvm %s -o - \ // RUN: | FileCheck %s -check-prefix=RV32ZKNE -#include +#include // RV32ZKNE-LABEL: @aes32esi( // RV32ZKNE-NEXT: entry: @@ -16,7 +16,7 @@ // RV32ZKNE-NEXT: ret i32 [[TMP2]] // uint32_t aes32esi(uint32_t rs1, uint32_t rs2) { - return __builtin_riscv_aes32esi(rs1, rs2, 3); + return __riscv_aes32esi(rs1, rs2, 3); } // RV32ZKNE-LABEL: @aes32esmi( @@ -31,5 +31,5 @@ // RV32ZKNE-NEXT: ret i32 [[TMP2]] // uint32_t aes32esmi(uint32_t rs1, uint32_t rs2) { - return __builtin_riscv_aes32esmi(rs1, rs2, 3); + return __riscv_aes32esmi(rs1, rs2, 3); } diff --git a/clang/test/CodeGen/RISCV/rvk-intrinsics/riscv32-zknh.c b/clang/test/CodeGen/RISCV/rvk-intrinsics/riscv32-zknh.c --- a/clang/test/CodeGen/RISCV/rvk-intrinsics/riscv32-zknh.c +++ b/clang/test/CodeGen/RISCV/rvk-intrinsics/riscv32-zknh.c @@ -2,142 +2,190 @@ // RUN: %clang_cc1 -triple riscv32 -target-feature +zknh -emit-llvm %s -o - \ // RUN: | FileCheck %s -check-prefix=RV32ZKNH -#include +#include // RV32ZKNH-LABEL: @sha256sig0( // RV32ZKNH-NEXT: entry: +// RV32ZKNH-NEXT: [[__X_ADDR_I:%.*]] = alloca i32, align 4 // RV32ZKNH-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 // RV32ZKNH-NEXT: store i32 [[RS1:%.*]], ptr [[RS1_ADDR]], align 4 // RV32ZKNH-NEXT: [[TMP0:%.*]] = load i32, ptr [[RS1_ADDR]], align 4 -// RV32ZKNH-NEXT: [[TMP1:%.*]] = call i32 @llvm.riscv.sha256sig0(i32 [[TMP0]]) -// RV32ZKNH-NEXT: ret i32 [[TMP1]] +// RV32ZKNH-NEXT: store i32 [[TMP0]], ptr [[__X_ADDR_I]], align 4 +// RV32ZKNH-NEXT: [[TMP1:%.*]] = load i32, ptr [[__X_ADDR_I]], align 4 +// RV32ZKNH-NEXT: [[TMP2:%.*]] = call i32 @llvm.riscv.sha256sig0(i32 [[TMP1]]) +// RV32ZKNH-NEXT: ret i32 [[TMP2]] // uint32_t sha256sig0(uint32_t rs1) { - return __builtin_riscv_sha256sig0(rs1); + return __riscv_sha256sig0(rs1); } // RV32ZKNH-LABEL: @sha256sig1( // RV32ZKNH-NEXT: entry: +// RV32ZKNH-NEXT: [[__X_ADDR_I:%.*]] = alloca i32, align 4 // RV32ZKNH-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 // RV32ZKNH-NEXT: store i32 [[RS1:%.*]], ptr [[RS1_ADDR]], align 4 // RV32ZKNH-NEXT: [[TMP0:%.*]] = load i32, ptr [[RS1_ADDR]], align 4 -// RV32ZKNH-NEXT: [[TMP1:%.*]] = call i32 @llvm.riscv.sha256sig1(i32 [[TMP0]]) -// RV32ZKNH-NEXT: ret i32 [[TMP1]] +// RV32ZKNH-NEXT: store i32 [[TMP0]], ptr [[__X_ADDR_I]], align 4 +// RV32ZKNH-NEXT: [[TMP1:%.*]] = load i32, ptr [[__X_ADDR_I]], align 4 +// RV32ZKNH-NEXT: [[TMP2:%.*]] = call i32 @llvm.riscv.sha256sig0(i32 [[TMP1]]) +// RV32ZKNH-NEXT: ret i32 [[TMP2]] // uint32_t sha256sig1(uint32_t rs1) { - return __builtin_riscv_sha256sig1(rs1); + return __riscv_sha256sig1(rs1); } // RV32ZKNH-LABEL: @sha256sum0( // RV32ZKNH-NEXT: entry: +// RV32ZKNH-NEXT: [[__X_ADDR_I:%.*]] = alloca i32, align 4 // RV32ZKNH-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 // RV32ZKNH-NEXT: store i32 [[RS1:%.*]], ptr [[RS1_ADDR]], align 4 // RV32ZKNH-NEXT: [[TMP0:%.*]] = load i32, ptr [[RS1_ADDR]], align 4 -// RV32ZKNH-NEXT: [[TMP1:%.*]] = call i32 @llvm.riscv.sha256sum0(i32 [[TMP0]]) -// RV32ZKNH-NEXT: ret i32 [[TMP1]] +// RV32ZKNH-NEXT: store i32 [[TMP0]], ptr [[__X_ADDR_I]], align 4 +// RV32ZKNH-NEXT: [[TMP1:%.*]] = load i32, ptr [[__X_ADDR_I]], align 4 +// RV32ZKNH-NEXT: [[TMP2:%.*]] = call i32 @llvm.riscv.sha256sum0(i32 [[TMP1]]) +// RV32ZKNH-NEXT: ret i32 [[TMP2]] // uint32_t sha256sum0(uint32_t rs1) { - return __builtin_riscv_sha256sum0(rs1); + return __riscv_sha256sum0(rs1); } // RV32ZKNH-LABEL: @sha256sum1( // RV32ZKNH-NEXT: entry: +// RV32ZKNH-NEXT: [[__X_ADDR_I:%.*]] = alloca i32, align 4 // RV32ZKNH-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 // RV32ZKNH-NEXT: store i32 [[RS1:%.*]], ptr [[RS1_ADDR]], align 4 // RV32ZKNH-NEXT: [[TMP0:%.*]] = load i32, ptr [[RS1_ADDR]], align 4 -// RV32ZKNH-NEXT: [[TMP1:%.*]] = call i32 @llvm.riscv.sha256sum1(i32 [[TMP0]]) -// RV32ZKNH-NEXT: ret i32 [[TMP1]] +// RV32ZKNH-NEXT: store i32 [[TMP0]], ptr [[__X_ADDR_I]], align 4 +// RV32ZKNH-NEXT: [[TMP1:%.*]] = load i32, ptr [[__X_ADDR_I]], align 4 +// RV32ZKNH-NEXT: [[TMP2:%.*]] = call i32 @llvm.riscv.sha256sum0(i32 [[TMP1]]) +// RV32ZKNH-NEXT: ret i32 [[TMP2]] // uint32_t sha256sum1(uint32_t rs1) { - return __builtin_riscv_sha256sum1(rs1); + return __riscv_sha256sum1(rs1); } // RV32ZKNH-LABEL: @sha512sig0h( // RV32ZKNH-NEXT: entry: +// RV32ZKNH-NEXT: [[__X_ADDR_I:%.*]] = alloca i32, align 4 +// RV32ZKNH-NEXT: [[__Y_ADDR_I:%.*]] = alloca i32, align 4 // RV32ZKNH-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 // RV32ZKNH-NEXT: [[RS2_ADDR:%.*]] = alloca i32, align 4 // RV32ZKNH-NEXT: store i32 [[RS1:%.*]], ptr [[RS1_ADDR]], align 4 // RV32ZKNH-NEXT: store i32 [[RS2:%.*]], ptr [[RS2_ADDR]], align 4 // RV32ZKNH-NEXT: [[TMP0:%.*]] = load i32, ptr [[RS1_ADDR]], align 4 // RV32ZKNH-NEXT: [[TMP1:%.*]] = load i32, ptr [[RS2_ADDR]], align 4 -// RV32ZKNH-NEXT: [[TMP2:%.*]] = call i32 @llvm.riscv.sha512sig0h(i32 [[TMP0]], i32 [[TMP1]]) -// RV32ZKNH-NEXT: ret i32 [[TMP2]] +// RV32ZKNH-NEXT: store i32 [[TMP0]], ptr [[__X_ADDR_I]], align 4 +// RV32ZKNH-NEXT: store i32 [[TMP1]], ptr [[__Y_ADDR_I]], align 4 +// RV32ZKNH-NEXT: [[TMP2:%.*]] = load i32, ptr [[__X_ADDR_I]], align 4 +// RV32ZKNH-NEXT: [[TMP3:%.*]] = load i32, ptr [[__Y_ADDR_I]], align 4 +// RV32ZKNH-NEXT: [[TMP4:%.*]] = call i32 @llvm.riscv.sha512sig0h(i32 [[TMP2]], i32 [[TMP3]]) +// RV32ZKNH-NEXT: ret i32 [[TMP4]] // uint32_t sha512sig0h(uint32_t rs1, uint32_t rs2) { - return __builtin_riscv_sha512sig0h(rs1, rs2); + return __riscv_sha512sig0h(rs1, rs2); } // RV32ZKNH-LABEL: @sha512sig0l( // RV32ZKNH-NEXT: entry: +// RV32ZKNH-NEXT: [[__X_ADDR_I:%.*]] = alloca i32, align 4 +// RV32ZKNH-NEXT: [[__Y_ADDR_I:%.*]] = alloca i32, align 4 // RV32ZKNH-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 // RV32ZKNH-NEXT: [[RS2_ADDR:%.*]] = alloca i32, align 4 // RV32ZKNH-NEXT: store i32 [[RS1:%.*]], ptr [[RS1_ADDR]], align 4 // RV32ZKNH-NEXT: store i32 [[RS2:%.*]], ptr [[RS2_ADDR]], align 4 // RV32ZKNH-NEXT: [[TMP0:%.*]] = load i32, ptr [[RS1_ADDR]], align 4 // RV32ZKNH-NEXT: [[TMP1:%.*]] = load i32, ptr [[RS2_ADDR]], align 4 -// RV32ZKNH-NEXT: [[TMP2:%.*]] = call i32 @llvm.riscv.sha512sig0l(i32 [[TMP0]], i32 [[TMP1]]) -// RV32ZKNH-NEXT: ret i32 [[TMP2]] +// RV32ZKNH-NEXT: store i32 [[TMP0]], ptr [[__X_ADDR_I]], align 4 +// RV32ZKNH-NEXT: store i32 [[TMP1]], ptr [[__Y_ADDR_I]], align 4 +// RV32ZKNH-NEXT: [[TMP2:%.*]] = load i32, ptr [[__X_ADDR_I]], align 4 +// RV32ZKNH-NEXT: [[TMP3:%.*]] = load i32, ptr [[__Y_ADDR_I]], align 4 +// RV32ZKNH-NEXT: [[TMP4:%.*]] = call i32 @llvm.riscv.sha512sig0l(i32 [[TMP2]], i32 [[TMP3]]) +// RV32ZKNH-NEXT: ret i32 [[TMP4]] // uint32_t sha512sig0l(uint32_t rs1, uint32_t rs2) { - return __builtin_riscv_sha512sig0l(rs1, rs2); + return __riscv_sha512sig0l(rs1, rs2); } // RV32ZKNH-LABEL: @sha512sig1h( // RV32ZKNH-NEXT: entry: +// RV32ZKNH-NEXT: [[__X_ADDR_I:%.*]] = alloca i32, align 4 +// RV32ZKNH-NEXT: [[__Y_ADDR_I:%.*]] = alloca i32, align 4 // RV32ZKNH-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 // RV32ZKNH-NEXT: [[RS2_ADDR:%.*]] = alloca i32, align 4 // RV32ZKNH-NEXT: store i32 [[RS1:%.*]], ptr [[RS1_ADDR]], align 4 // RV32ZKNH-NEXT: store i32 [[RS2:%.*]], ptr [[RS2_ADDR]], align 4 // RV32ZKNH-NEXT: [[TMP0:%.*]] = load i32, ptr [[RS1_ADDR]], align 4 // RV32ZKNH-NEXT: [[TMP1:%.*]] = load i32, ptr [[RS2_ADDR]], align 4 -// RV32ZKNH-NEXT: [[TMP2:%.*]] = call i32 @llvm.riscv.sha512sig1h(i32 [[TMP0]], i32 [[TMP1]]) -// RV32ZKNH-NEXT: ret i32 [[TMP2]] +// RV32ZKNH-NEXT: store i32 [[TMP0]], ptr [[__X_ADDR_I]], align 4 +// RV32ZKNH-NEXT: store i32 [[TMP1]], ptr [[__Y_ADDR_I]], align 4 +// RV32ZKNH-NEXT: [[TMP2:%.*]] = load i32, ptr [[__X_ADDR_I]], align 4 +// RV32ZKNH-NEXT: [[TMP3:%.*]] = load i32, ptr [[__Y_ADDR_I]], align 4 +// RV32ZKNH-NEXT: [[TMP4:%.*]] = call i32 @llvm.riscv.sha512sig1h(i32 [[TMP2]], i32 [[TMP3]]) +// RV32ZKNH-NEXT: ret i32 [[TMP4]] // uint32_t sha512sig1h(uint32_t rs1, uint32_t rs2) { - return __builtin_riscv_sha512sig1h(rs1, rs2); + return __riscv_sha512sig1h(rs1, rs2); } // RV32ZKNH-LABEL: @sha512sig1l( // RV32ZKNH-NEXT: entry: +// RV32ZKNH-NEXT: [[__X_ADDR_I:%.*]] = alloca i32, align 4 +// RV32ZKNH-NEXT: [[__Y_ADDR_I:%.*]] = alloca i32, align 4 // RV32ZKNH-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 // RV32ZKNH-NEXT: [[RS2_ADDR:%.*]] = alloca i32, align 4 // RV32ZKNH-NEXT: store i32 [[RS1:%.*]], ptr [[RS1_ADDR]], align 4 // RV32ZKNH-NEXT: store i32 [[RS2:%.*]], ptr [[RS2_ADDR]], align 4 // RV32ZKNH-NEXT: [[TMP0:%.*]] = load i32, ptr [[RS1_ADDR]], align 4 // RV32ZKNH-NEXT: [[TMP1:%.*]] = load i32, ptr [[RS2_ADDR]], align 4 -// RV32ZKNH-NEXT: [[TMP2:%.*]] = call i32 @llvm.riscv.sha512sig1l(i32 [[TMP0]], i32 [[TMP1]]) -// RV32ZKNH-NEXT: ret i32 [[TMP2]] +// RV32ZKNH-NEXT: store i32 [[TMP0]], ptr [[__X_ADDR_I]], align 4 +// RV32ZKNH-NEXT: store i32 [[TMP1]], ptr [[__Y_ADDR_I]], align 4 +// RV32ZKNH-NEXT: [[TMP2:%.*]] = load i32, ptr [[__X_ADDR_I]], align 4 +// RV32ZKNH-NEXT: [[TMP3:%.*]] = load i32, ptr [[__Y_ADDR_I]], align 4 +// RV32ZKNH-NEXT: [[TMP4:%.*]] = call i32 @llvm.riscv.sha512sig1l(i32 [[TMP2]], i32 [[TMP3]]) +// RV32ZKNH-NEXT: ret i32 [[TMP4]] // uint32_t sha512sig1l(uint32_t rs1, uint32_t rs2) { - return __builtin_riscv_sha512sig1l(rs1, rs2); + return __riscv_sha512sig1l(rs1, rs2); } // RV32ZKNH-LABEL: @sha512sum0r( // RV32ZKNH-NEXT: entry: +// RV32ZKNH-NEXT: [[__X_ADDR_I:%.*]] = alloca i32, align 4 +// RV32ZKNH-NEXT: [[__Y_ADDR_I:%.*]] = alloca i32, align 4 // RV32ZKNH-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 // RV32ZKNH-NEXT: [[RS2_ADDR:%.*]] = alloca i32, align 4 // RV32ZKNH-NEXT: store i32 [[RS1:%.*]], ptr [[RS1_ADDR]], align 4 // RV32ZKNH-NEXT: store i32 [[RS2:%.*]], ptr [[RS2_ADDR]], align 4 // RV32ZKNH-NEXT: [[TMP0:%.*]] = load i32, ptr [[RS1_ADDR]], align 4 // RV32ZKNH-NEXT: [[TMP1:%.*]] = load i32, ptr [[RS2_ADDR]], align 4 -// RV32ZKNH-NEXT: [[TMP2:%.*]] = call i32 @llvm.riscv.sha512sum0r(i32 [[TMP0]], i32 [[TMP1]]) -// RV32ZKNH-NEXT: ret i32 [[TMP2]] +// RV32ZKNH-NEXT: store i32 [[TMP0]], ptr [[__X_ADDR_I]], align 4 +// RV32ZKNH-NEXT: store i32 [[TMP1]], ptr [[__Y_ADDR_I]], align 4 +// RV32ZKNH-NEXT: [[TMP2:%.*]] = load i32, ptr [[__X_ADDR_I]], align 4 +// RV32ZKNH-NEXT: [[TMP3:%.*]] = load i32, ptr [[__Y_ADDR_I]], align 4 +// RV32ZKNH-NEXT: [[TMP4:%.*]] = call i32 @llvm.riscv.sha512sum0r(i32 [[TMP2]], i32 [[TMP3]]) +// RV32ZKNH-NEXT: ret i32 [[TMP4]] // uint32_t sha512sum0r(uint32_t rs1, uint32_t rs2) { - return __builtin_riscv_sha512sum0r(rs1, rs2); + return __riscv_sha512sum0r(rs1, rs2); } // RV32ZKNH-LABEL: @sha512sum1r( // RV32ZKNH-NEXT: entry: +// RV32ZKNH-NEXT: [[__X_ADDR_I:%.*]] = alloca i32, align 4 +// RV32ZKNH-NEXT: [[__Y_ADDR_I:%.*]] = alloca i32, align 4 // RV32ZKNH-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 // RV32ZKNH-NEXT: [[RS2_ADDR:%.*]] = alloca i32, align 4 // RV32ZKNH-NEXT: store i32 [[RS1:%.*]], ptr [[RS1_ADDR]], align 4 // RV32ZKNH-NEXT: store i32 [[RS2:%.*]], ptr [[RS2_ADDR]], align 4 // RV32ZKNH-NEXT: [[TMP0:%.*]] = load i32, ptr [[RS1_ADDR]], align 4 // RV32ZKNH-NEXT: [[TMP1:%.*]] = load i32, ptr [[RS2_ADDR]], align 4 -// RV32ZKNH-NEXT: [[TMP2:%.*]] = call i32 @llvm.riscv.sha512sum1r(i32 [[TMP0]], i32 [[TMP1]]) -// RV32ZKNH-NEXT: ret i32 [[TMP2]] +// RV32ZKNH-NEXT: store i32 [[TMP0]], ptr [[__X_ADDR_I]], align 4 +// RV32ZKNH-NEXT: store i32 [[TMP1]], ptr [[__Y_ADDR_I]], align 4 +// RV32ZKNH-NEXT: [[TMP2:%.*]] = load i32, ptr [[__X_ADDR_I]], align 4 +// RV32ZKNH-NEXT: [[TMP3:%.*]] = load i32, ptr [[__Y_ADDR_I]], align 4 +// RV32ZKNH-NEXT: [[TMP4:%.*]] = call i32 @llvm.riscv.sha512sum1r(i32 [[TMP2]], i32 [[TMP3]]) +// RV32ZKNH-NEXT: ret i32 [[TMP4]] // uint32_t sha512sum1r(uint32_t rs1, uint32_t rs2) { - return __builtin_riscv_sha512sum1r(rs1, rs2); + return __riscv_sha512sum1r(rs1, rs2); } diff --git a/clang/test/CodeGen/RISCV/rvk-intrinsics/riscv64-zknd-zkne.c b/clang/test/CodeGen/RISCV/rvk-intrinsics/riscv64-zknd-zkne.c --- a/clang/test/CodeGen/RISCV/rvk-intrinsics/riscv64-zknd-zkne.c +++ b/clang/test/CodeGen/RISCV/rvk-intrinsics/riscv64-zknd-zkne.c @@ -4,7 +4,7 @@ // RUN: %clang_cc1 -triple riscv64 -target-feature +zkne -emit-llvm %s -o - \ // RUN: | FileCheck %s -check-prefix=RV64ZKND-ZKNE -#include +#include // RV64ZKND-ZKNE-LABEL: @aes64ks1i( // RV64ZKND-ZKNE-NEXT: entry: @@ -15,20 +15,26 @@ // RV64ZKND-ZKNE-NEXT: ret i64 [[TMP1]] // uint64_t aes64ks1i(uint64_t rs1) { - return __builtin_riscv_aes64ks1i(rs1, 0); + return __riscv_aes64ks1i(rs1, 0); } // RV64ZKND-ZKNE-LABEL: @aes64ks2( // RV64ZKND-ZKNE-NEXT: entry: +// RV64ZKND-ZKNE-NEXT: [[__X_ADDR_I:%.*]] = alloca i64, align 8 +// RV64ZKND-ZKNE-NEXT: [[__Y_ADDR_I:%.*]] = alloca i64, align 8 // RV64ZKND-ZKNE-NEXT: [[RS1_ADDR:%.*]] = alloca i64, align 8 // RV64ZKND-ZKNE-NEXT: [[RS2_ADDR:%.*]] = alloca i64, align 8 // RV64ZKND-ZKNE-NEXT: store i64 [[RS1:%.*]], ptr [[RS1_ADDR]], align 8 // RV64ZKND-ZKNE-NEXT: store i64 [[RS2:%.*]], ptr [[RS2_ADDR]], align 8 // RV64ZKND-ZKNE-NEXT: [[TMP0:%.*]] = load i64, ptr [[RS1_ADDR]], align 8 // RV64ZKND-ZKNE-NEXT: [[TMP1:%.*]] = load i64, ptr [[RS2_ADDR]], align 8 -// RV64ZKND-ZKNE-NEXT: [[TMP2:%.*]] = call i64 @llvm.riscv.aes64ks2(i64 [[TMP0]], i64 [[TMP1]]) -// RV64ZKND-ZKNE-NEXT: ret i64 [[TMP2]] +// RV64ZKND-ZKNE-NEXT: store i64 [[TMP0]], ptr [[__X_ADDR_I]], align 8 +// RV64ZKND-ZKNE-NEXT: store i64 [[TMP1]], ptr [[__Y_ADDR_I]], align 8 +// RV64ZKND-ZKNE-NEXT: [[TMP2:%.*]] = load i64, ptr [[__X_ADDR_I]], align 8 +// RV64ZKND-ZKNE-NEXT: [[TMP3:%.*]] = load i64, ptr [[__Y_ADDR_I]], align 8 +// RV64ZKND-ZKNE-NEXT: [[TMP4:%.*]] = call i64 @llvm.riscv.aes64ks2(i64 [[TMP2]], i64 [[TMP3]]) +// RV64ZKND-ZKNE-NEXT: ret i64 [[TMP4]] // uint64_t aes64ks2(uint64_t rs1, uint64_t rs2) { - return __builtin_riscv_aes64ks2(rs1, rs2); + return __riscv_aes64ks2(rs1, rs2); } diff --git a/clang/test/CodeGen/RISCV/rvk-intrinsics/riscv64-zknd.c b/clang/test/CodeGen/RISCV/rvk-intrinsics/riscv64-zknd.c --- a/clang/test/CodeGen/RISCV/rvk-intrinsics/riscv64-zknd.c +++ b/clang/test/CodeGen/RISCV/rvk-intrinsics/riscv64-zknd.c @@ -2,48 +2,63 @@ // RUN: %clang_cc1 -triple riscv64 -target-feature +zknd -emit-llvm %s -o - \ // RUN: | FileCheck %s -check-prefix=RV64ZKND -#include +#include // RV64ZKND-LABEL: @aes64dsm( // RV64ZKND-NEXT: entry: +// RV64ZKND-NEXT: [[__X_ADDR_I:%.*]] = alloca i64, align 8 +// RV64ZKND-NEXT: [[__Y_ADDR_I:%.*]] = alloca i64, align 8 // RV64ZKND-NEXT: [[RS1_ADDR:%.*]] = alloca i64, align 8 // RV64ZKND-NEXT: [[RS2_ADDR:%.*]] = alloca i64, align 8 // RV64ZKND-NEXT: store i64 [[RS1:%.*]], ptr [[RS1_ADDR]], align 8 // RV64ZKND-NEXT: store i64 [[RS2:%.*]], ptr [[RS2_ADDR]], align 8 // RV64ZKND-NEXT: [[TMP0:%.*]] = load i64, ptr [[RS1_ADDR]], align 8 // RV64ZKND-NEXT: [[TMP1:%.*]] = load i64, ptr [[RS2_ADDR]], align 8 -// RV64ZKND-NEXT: [[TMP2:%.*]] = call i64 @llvm.riscv.aes64dsm(i64 [[TMP0]], i64 [[TMP1]]) -// RV64ZKND-NEXT: ret i64 [[TMP2]] +// RV64ZKND-NEXT: store i64 [[TMP0]], ptr [[__X_ADDR_I]], align 8 +// RV64ZKND-NEXT: store i64 [[TMP1]], ptr [[__Y_ADDR_I]], align 8 +// RV64ZKND-NEXT: [[TMP2:%.*]] = load i64, ptr [[__X_ADDR_I]], align 8 +// RV64ZKND-NEXT: [[TMP3:%.*]] = load i64, ptr [[__Y_ADDR_I]], align 8 +// RV64ZKND-NEXT: [[TMP4:%.*]] = call i64 @llvm.riscv.aes64dsm(i64 [[TMP2]], i64 [[TMP3]]) +// RV64ZKND-NEXT: ret i64 [[TMP4]] // uint64_t aes64dsm(uint64_t rs1, uint64_t rs2) { - return __builtin_riscv_aes64dsm(rs1, rs2); + return __riscv_aes64dsm(rs1, rs2); } // RV64ZKND-LABEL: @aes64ds( // RV64ZKND-NEXT: entry: +// RV64ZKND-NEXT: [[__X_ADDR_I:%.*]] = alloca i64, align 8 +// RV64ZKND-NEXT: [[__Y_ADDR_I:%.*]] = alloca i64, align 8 // RV64ZKND-NEXT: [[RS1_ADDR:%.*]] = alloca i64, align 8 // RV64ZKND-NEXT: [[RS2_ADDR:%.*]] = alloca i64, align 8 // RV64ZKND-NEXT: store i64 [[RS1:%.*]], ptr [[RS1_ADDR]], align 8 // RV64ZKND-NEXT: store i64 [[RS2:%.*]], ptr [[RS2_ADDR]], align 8 // RV64ZKND-NEXT: [[TMP0:%.*]] = load i64, ptr [[RS1_ADDR]], align 8 // RV64ZKND-NEXT: [[TMP1:%.*]] = load i64, ptr [[RS2_ADDR]], align 8 -// RV64ZKND-NEXT: [[TMP2:%.*]] = call i64 @llvm.riscv.aes64ds(i64 [[TMP0]], i64 [[TMP1]]) -// RV64ZKND-NEXT: ret i64 [[TMP2]] +// RV64ZKND-NEXT: store i64 [[TMP0]], ptr [[__X_ADDR_I]], align 8 +// RV64ZKND-NEXT: store i64 [[TMP1]], ptr [[__Y_ADDR_I]], align 8 +// RV64ZKND-NEXT: [[TMP2:%.*]] = load i64, ptr [[__X_ADDR_I]], align 8 +// RV64ZKND-NEXT: [[TMP3:%.*]] = load i64, ptr [[__Y_ADDR_I]], align 8 +// RV64ZKND-NEXT: [[TMP4:%.*]] = call i64 @llvm.riscv.aes64ds(i64 [[TMP2]], i64 [[TMP3]]) +// RV64ZKND-NEXT: ret i64 [[TMP4]] // uint64_t aes64ds(uint64_t rs1, uint64_t rs2) { - return __builtin_riscv_aes64ds(rs1, rs2); + return __riscv_aes64ds(rs1, rs2); } // RV64ZKND-LABEL: @aes64im( // RV64ZKND-NEXT: entry: +// RV64ZKND-NEXT: [[__X_ADDR_I:%.*]] = alloca i64, align 8 // RV64ZKND-NEXT: [[RS1_ADDR:%.*]] = alloca i64, align 8 // RV64ZKND-NEXT: store i64 [[RS1:%.*]], ptr [[RS1_ADDR]], align 8 // RV64ZKND-NEXT: [[TMP0:%.*]] = load i64, ptr [[RS1_ADDR]], align 8 -// RV64ZKND-NEXT: [[TMP1:%.*]] = call i64 @llvm.riscv.aes64im(i64 [[TMP0]]) -// RV64ZKND-NEXT: ret i64 [[TMP1]] +// RV64ZKND-NEXT: store i64 [[TMP0]], ptr [[__X_ADDR_I]], align 8 +// RV64ZKND-NEXT: [[TMP1:%.*]] = load i64, ptr [[__X_ADDR_I]], align 8 +// RV64ZKND-NEXT: [[TMP2:%.*]] = call i64 @llvm.riscv.aes64im(i64 [[TMP1]]) +// RV64ZKND-NEXT: ret i64 [[TMP2]] // uint64_t aes64im(uint64_t rs1) { - return __builtin_riscv_aes64im(rs1); + return __riscv_aes64im(rs1); } diff --git a/clang/test/CodeGen/RISCV/rvk-intrinsics/riscv64-zkne.c b/clang/test/CodeGen/RISCV/rvk-intrinsics/riscv64-zkne.c --- a/clang/test/CodeGen/RISCV/rvk-intrinsics/riscv64-zkne.c +++ b/clang/test/CodeGen/RISCV/rvk-intrinsics/riscv64-zkne.c @@ -2,35 +2,47 @@ // RUN: %clang_cc1 -triple riscv64 -target-feature +zkne -emit-llvm %s -o - \ // RUN: | FileCheck %s -check-prefix=RV64ZKNE -#include +#include // RV64ZKNE-LABEL: @aes64es( // RV64ZKNE-NEXT: entry: +// RV64ZKNE-NEXT: [[__X_ADDR_I:%.*]] = alloca i64, align 8 +// RV64ZKNE-NEXT: [[__Y_ADDR_I:%.*]] = alloca i64, align 8 // RV64ZKNE-NEXT: [[RS1_ADDR:%.*]] = alloca i64, align 8 // RV64ZKNE-NEXT: [[RS2_ADDR:%.*]] = alloca i64, align 8 // RV64ZKNE-NEXT: store i64 [[RS1:%.*]], ptr [[RS1_ADDR]], align 8 // RV64ZKNE-NEXT: store i64 [[RS2:%.*]], ptr [[RS2_ADDR]], align 8 // RV64ZKNE-NEXT: [[TMP0:%.*]] = load i64, ptr [[RS1_ADDR]], align 8 // RV64ZKNE-NEXT: [[TMP1:%.*]] = load i64, ptr [[RS2_ADDR]], align 8 -// RV64ZKNE-NEXT: [[TMP2:%.*]] = call i64 @llvm.riscv.aes64es(i64 [[TMP0]], i64 [[TMP1]]) -// RV64ZKNE-NEXT: ret i64 [[TMP2]] +// RV64ZKNE-NEXT: store i64 [[TMP0]], ptr [[__X_ADDR_I]], align 8 +// RV64ZKNE-NEXT: store i64 [[TMP1]], ptr [[__Y_ADDR_I]], align 8 +// RV64ZKNE-NEXT: [[TMP2:%.*]] = load i64, ptr [[__X_ADDR_I]], align 8 +// RV64ZKNE-NEXT: [[TMP3:%.*]] = load i64, ptr [[__Y_ADDR_I]], align 8 +// RV64ZKNE-NEXT: [[TMP4:%.*]] = call i64 @llvm.riscv.aes64es(i64 [[TMP2]], i64 [[TMP3]]) +// RV64ZKNE-NEXT: ret i64 [[TMP4]] // uint64_t aes64es(uint64_t rs1, uint64_t rs2) { - return __builtin_riscv_aes64es(rs1, rs2); + return __riscv_aes64es(rs1, rs2); } // RV64ZKNE-LABEL: @aes64esm( // RV64ZKNE-NEXT: entry: +// RV64ZKNE-NEXT: [[__X_ADDR_I:%.*]] = alloca i64, align 8 +// RV64ZKNE-NEXT: [[__Y_ADDR_I:%.*]] = alloca i64, align 8 // RV64ZKNE-NEXT: [[RS1_ADDR:%.*]] = alloca i64, align 8 // RV64ZKNE-NEXT: [[RS2_ADDR:%.*]] = alloca i64, align 8 // RV64ZKNE-NEXT: store i64 [[RS1:%.*]], ptr [[RS1_ADDR]], align 8 // RV64ZKNE-NEXT: store i64 [[RS2:%.*]], ptr [[RS2_ADDR]], align 8 // RV64ZKNE-NEXT: [[TMP0:%.*]] = load i64, ptr [[RS1_ADDR]], align 8 // RV64ZKNE-NEXT: [[TMP1:%.*]] = load i64, ptr [[RS2_ADDR]], align 8 -// RV64ZKNE-NEXT: [[TMP2:%.*]] = call i64 @llvm.riscv.aes64esm(i64 [[TMP0]], i64 [[TMP1]]) -// RV64ZKNE-NEXT: ret i64 [[TMP2]] +// RV64ZKNE-NEXT: store i64 [[TMP0]], ptr [[__X_ADDR_I]], align 8 +// RV64ZKNE-NEXT: store i64 [[TMP1]], ptr [[__Y_ADDR_I]], align 8 +// RV64ZKNE-NEXT: [[TMP2:%.*]] = load i64, ptr [[__X_ADDR_I]], align 8 +// RV64ZKNE-NEXT: [[TMP3:%.*]] = load i64, ptr [[__Y_ADDR_I]], align 8 +// RV64ZKNE-NEXT: [[TMP4:%.*]] = call i64 @llvm.riscv.aes64esm(i64 [[TMP2]], i64 [[TMP3]]) +// RV64ZKNE-NEXT: ret i64 [[TMP4]] // uint64_t aes64esm(uint64_t rs1, uint64_t rs2) { - return __builtin_riscv_aes64esm(rs1, rs2); + return __riscv_aes64esm(rs1, rs2); } diff --git a/clang/test/CodeGen/RISCV/rvk-intrinsics/riscv64-zknh.c b/clang/test/CodeGen/RISCV/rvk-intrinsics/riscv64-zknh.c --- a/clang/test/CodeGen/RISCV/rvk-intrinsics/riscv64-zknh.c +++ b/clang/test/CodeGen/RISCV/rvk-intrinsics/riscv64-zknh.c @@ -2,105 +2,129 @@ // RUN: %clang_cc1 -triple riscv64 -target-feature +zknh -emit-llvm %s -o - \ // RUN: | FileCheck %s -check-prefix=RV64ZKNH -#include +#include // RV64ZKNH-LABEL: @sha512sig0( // RV64ZKNH-NEXT: entry: +// RV64ZKNH-NEXT: [[__X_ADDR_I:%.*]] = alloca i64, align 8 // RV64ZKNH-NEXT: [[RS1_ADDR:%.*]] = alloca i64, align 8 // RV64ZKNH-NEXT: store i64 [[RS1:%.*]], ptr [[RS1_ADDR]], align 8 // RV64ZKNH-NEXT: [[TMP0:%.*]] = load i64, ptr [[RS1_ADDR]], align 8 -// RV64ZKNH-NEXT: [[TMP1:%.*]] = call i64 @llvm.riscv.sha512sig0(i64 [[TMP0]]) -// RV64ZKNH-NEXT: ret i64 [[TMP1]] +// RV64ZKNH-NEXT: store i64 [[TMP0]], ptr [[__X_ADDR_I]], align 8 +// RV64ZKNH-NEXT: [[TMP1:%.*]] = load i64, ptr [[__X_ADDR_I]], align 8 +// RV64ZKNH-NEXT: [[TMP2:%.*]] = call i64 @llvm.riscv.sha512sig0(i64 [[TMP1]]) +// RV64ZKNH-NEXT: ret i64 [[TMP2]] // uint64_t sha512sig0(uint64_t rs1) { - return __builtin_riscv_sha512sig0(rs1); + return __riscv_sha512sig0(rs1); } // RV64ZKNH-LABEL: @sha512sig1( // RV64ZKNH-NEXT: entry: +// RV64ZKNH-NEXT: [[__X_ADDR_I:%.*]] = alloca i64, align 8 // RV64ZKNH-NEXT: [[RS1_ADDR:%.*]] = alloca i64, align 8 // RV64ZKNH-NEXT: store i64 [[RS1:%.*]], ptr [[RS1_ADDR]], align 8 // RV64ZKNH-NEXT: [[TMP0:%.*]] = load i64, ptr [[RS1_ADDR]], align 8 -// RV64ZKNH-NEXT: [[TMP1:%.*]] = call i64 @llvm.riscv.sha512sig1(i64 [[TMP0]]) -// RV64ZKNH-NEXT: ret i64 [[TMP1]] +// RV64ZKNH-NEXT: store i64 [[TMP0]], ptr [[__X_ADDR_I]], align 8 +// RV64ZKNH-NEXT: [[TMP1:%.*]] = load i64, ptr [[__X_ADDR_I]], align 8 +// RV64ZKNH-NEXT: [[TMP2:%.*]] = call i64 @llvm.riscv.sha512sig0(i64 [[TMP1]]) +// RV64ZKNH-NEXT: ret i64 [[TMP2]] // uint64_t sha512sig1(uint64_t rs1) { - return __builtin_riscv_sha512sig1(rs1); + return __riscv_sha512sig1(rs1); } // RV64ZKNH-LABEL: @sha512sum0( // RV64ZKNH-NEXT: entry: +// RV64ZKNH-NEXT: [[__X_ADDR_I:%.*]] = alloca i64, align 8 // RV64ZKNH-NEXT: [[RS1_ADDR:%.*]] = alloca i64, align 8 // RV64ZKNH-NEXT: store i64 [[RS1:%.*]], ptr [[RS1_ADDR]], align 8 // RV64ZKNH-NEXT: [[TMP0:%.*]] = load i64, ptr [[RS1_ADDR]], align 8 -// RV64ZKNH-NEXT: [[TMP1:%.*]] = call i64 @llvm.riscv.sha512sum0(i64 [[TMP0]]) -// RV64ZKNH-NEXT: ret i64 [[TMP1]] +// RV64ZKNH-NEXT: store i64 [[TMP0]], ptr [[__X_ADDR_I]], align 8 +// RV64ZKNH-NEXT: [[TMP1:%.*]] = load i64, ptr [[__X_ADDR_I]], align 8 +// RV64ZKNH-NEXT: [[TMP2:%.*]] = call i64 @llvm.riscv.sha512sum0(i64 [[TMP1]]) +// RV64ZKNH-NEXT: ret i64 [[TMP2]] // uint64_t sha512sum0(uint64_t rs1) { - return __builtin_riscv_sha512sum0(rs1); + return __riscv_sha512sum0(rs1); } // RV64ZKNH-LABEL: @sha512sum1( // RV64ZKNH-NEXT: entry: +// RV64ZKNH-NEXT: [[__X_ADDR_I:%.*]] = alloca i64, align 8 // RV64ZKNH-NEXT: [[RS1_ADDR:%.*]] = alloca i64, align 8 // RV64ZKNH-NEXT: store i64 [[RS1:%.*]], ptr [[RS1_ADDR]], align 8 // RV64ZKNH-NEXT: [[TMP0:%.*]] = load i64, ptr [[RS1_ADDR]], align 8 -// RV64ZKNH-NEXT: [[TMP1:%.*]] = call i64 @llvm.riscv.sha512sum1(i64 [[TMP0]]) -// RV64ZKNH-NEXT: ret i64 [[TMP1]] +// RV64ZKNH-NEXT: store i64 [[TMP0]], ptr [[__X_ADDR_I]], align 8 +// RV64ZKNH-NEXT: [[TMP1:%.*]] = load i64, ptr [[__X_ADDR_I]], align 8 +// RV64ZKNH-NEXT: [[TMP2:%.*]] = call i64 @llvm.riscv.sha512sum0(i64 [[TMP1]]) +// RV64ZKNH-NEXT: ret i64 [[TMP2]] // uint64_t sha512sum1(uint64_t rs1) { - return __builtin_riscv_sha512sum1(rs1); + return __riscv_sha512sum1(rs1); } // RV64ZKNH-LABEL: @sha256sig0( // RV64ZKNH-NEXT: entry: +// RV64ZKNH-NEXT: [[__X_ADDR_I:%.*]] = alloca i32, align 4 // RV64ZKNH-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 // RV64ZKNH-NEXT: store i32 [[RS1:%.*]], ptr [[RS1_ADDR]], align 4 // RV64ZKNH-NEXT: [[TMP0:%.*]] = load i32, ptr [[RS1_ADDR]], align 4 -// RV64ZKNH-NEXT: [[TMP1:%.*]] = call i32 @llvm.riscv.sha256sig0(i32 [[TMP0]]) -// RV64ZKNH-NEXT: ret i32 [[TMP1]] +// RV64ZKNH-NEXT: store i32 [[TMP0]], ptr [[__X_ADDR_I]], align 4 +// RV64ZKNH-NEXT: [[TMP1:%.*]] = load i32, ptr [[__X_ADDR_I]], align 4 +// RV64ZKNH-NEXT: [[TMP2:%.*]] = call i32 @llvm.riscv.sha256sig0(i32 [[TMP1]]) +// RV64ZKNH-NEXT: ret i32 [[TMP2]] // uint32_t sha256sig0(uint32_t rs1) { - return __builtin_riscv_sha256sig0(rs1); + return __riscv_sha256sig0(rs1); } // RV64ZKNH-LABEL: @sha256sig1( // RV64ZKNH-NEXT: entry: +// RV64ZKNH-NEXT: [[__X_ADDR_I:%.*]] = alloca i32, align 4 // RV64ZKNH-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 // RV64ZKNH-NEXT: store i32 [[RS1:%.*]], ptr [[RS1_ADDR]], align 4 // RV64ZKNH-NEXT: [[TMP0:%.*]] = load i32, ptr [[RS1_ADDR]], align 4 -// RV64ZKNH-NEXT: [[TMP1:%.*]] = call i32 @llvm.riscv.sha256sig1(i32 [[TMP0]]) -// RV64ZKNH-NEXT: ret i32 [[TMP1]] +// RV64ZKNH-NEXT: store i32 [[TMP0]], ptr [[__X_ADDR_I]], align 4 +// RV64ZKNH-NEXT: [[TMP1:%.*]] = load i32, ptr [[__X_ADDR_I]], align 4 +// RV64ZKNH-NEXT: [[TMP2:%.*]] = call i32 @llvm.riscv.sha256sig0(i32 [[TMP1]]) +// RV64ZKNH-NEXT: ret i32 [[TMP2]] // uint32_t sha256sig1(uint32_t rs1) { - return __builtin_riscv_sha256sig1(rs1); + return __riscv_sha256sig1(rs1); } // RV64ZKNH-LABEL: @sha256sum0( // RV64ZKNH-NEXT: entry: +// RV64ZKNH-NEXT: [[__X_ADDR_I:%.*]] = alloca i32, align 4 // RV64ZKNH-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 // RV64ZKNH-NEXT: store i32 [[RS1:%.*]], ptr [[RS1_ADDR]], align 4 // RV64ZKNH-NEXT: [[TMP0:%.*]] = load i32, ptr [[RS1_ADDR]], align 4 -// RV64ZKNH-NEXT: [[TMP1:%.*]] = call i32 @llvm.riscv.sha256sum0(i32 [[TMP0]]) -// RV64ZKNH-NEXT: ret i32 [[TMP1]] +// RV64ZKNH-NEXT: store i32 [[TMP0]], ptr [[__X_ADDR_I]], align 4 +// RV64ZKNH-NEXT: [[TMP1:%.*]] = load i32, ptr [[__X_ADDR_I]], align 4 +// RV64ZKNH-NEXT: [[TMP2:%.*]] = call i32 @llvm.riscv.sha256sum0(i32 [[TMP1]]) +// RV64ZKNH-NEXT: ret i32 [[TMP2]] // uint32_t sha256sum0(uint32_t rs1) { - return __builtin_riscv_sha256sum0(rs1); + return __riscv_sha256sum0(rs1); } // RV64ZKNH-LABEL: @sha256sum1( // RV64ZKNH-NEXT: entry: +// RV64ZKNH-NEXT: [[__X_ADDR_I:%.*]] = alloca i32, align 4 // RV64ZKNH-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 // RV64ZKNH-NEXT: store i32 [[RS1:%.*]], ptr [[RS1_ADDR]], align 4 // RV64ZKNH-NEXT: [[TMP0:%.*]] = load i32, ptr [[RS1_ADDR]], align 4 -// RV64ZKNH-NEXT: [[TMP1:%.*]] = call i32 @llvm.riscv.sha256sum1(i32 [[TMP0]]) -// RV64ZKNH-NEXT: ret i32 [[TMP1]] +// RV64ZKNH-NEXT: store i32 [[TMP0]], ptr [[__X_ADDR_I]], align 4 +// RV64ZKNH-NEXT: [[TMP1:%.*]] = load i32, ptr [[__X_ADDR_I]], align 4 +// RV64ZKNH-NEXT: [[TMP2:%.*]] = call i32 @llvm.riscv.sha256sum0(i32 [[TMP1]]) +// RV64ZKNH-NEXT: ret i32 [[TMP2]] // uint32_t sha256sum1(uint32_t rs1) { - return __builtin_riscv_sha256sum1(rs1); + return __riscv_sha256sum1(rs1); } diff --git a/clang/test/CodeGen/RISCV/rvk-intrinsics/zksed.c b/clang/test/CodeGen/RISCV/rvk-intrinsics/zksed.c --- a/clang/test/CodeGen/RISCV/rvk-intrinsics/zksed.c +++ b/clang/test/CodeGen/RISCV/rvk-intrinsics/zksed.c @@ -4,7 +4,7 @@ // RUN: %clang_cc1 -triple riscv64 -target-feature +zksed -emit-llvm %s -o - \ // RUN: | FileCheck %s -check-prefix=RV64ZKSED -#include +#include // RV32ZKSED-LABEL: @sm4ks( // RV32ZKSED-NEXT: entry: @@ -29,7 +29,7 @@ // RV64ZKSED-NEXT: ret i32 [[TMP2]] // uint32_t sm4ks(uint32_t rs1, uint32_t rs2) { - return __builtin_riscv_sm4ks(rs1, rs2, 0); + return __riscv_sm4ks(rs1, rs2, 0); } // RV32ZKSED-LABEL: @sm4ed( @@ -55,5 +55,5 @@ // RV64ZKSED-NEXT: ret i32 [[TMP2]] // uint32_t sm4ed(uint32_t rs1, uint32_t rs2) { - return __builtin_riscv_sm4ed(rs1, rs2, 0); + return __riscv_sm4ed(rs1, rs2, 0); } diff --git a/clang/test/CodeGen/RISCV/rvk-intrinsics/zksh.c b/clang/test/CodeGen/RISCV/rvk-intrinsics/zksh.c --- a/clang/test/CodeGen/RISCV/rvk-intrinsics/zksh.c +++ b/clang/test/CodeGen/RISCV/rvk-intrinsics/zksh.c @@ -4,45 +4,57 @@ // RUN: %clang_cc1 -triple riscv64 -target-feature +zksh -emit-llvm %s -o - \ // RUN: | FileCheck %s -check-prefix=RV64ZKSH -#include +#include // RV32ZKSH-LABEL: @sm3p0( // RV32ZKSH-NEXT: entry: +// RV32ZKSH-NEXT: [[__X_ADDR_I:%.*]] = alloca i32, align 4 // RV32ZKSH-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 // RV32ZKSH-NEXT: store i32 [[RS1:%.*]], ptr [[RS1_ADDR]], align 4 // RV32ZKSH-NEXT: [[TMP0:%.*]] = load i32, ptr [[RS1_ADDR]], align 4 -// RV32ZKSH-NEXT: [[TMP1:%.*]] = call i32 @llvm.riscv.sm3p0(i32 [[TMP0]]) -// RV32ZKSH-NEXT: ret i32 [[TMP1]] +// RV32ZKSH-NEXT: store i32 [[TMP0]], ptr [[__X_ADDR_I]], align 4 +// RV32ZKSH-NEXT: [[TMP1:%.*]] = load i32, ptr [[__X_ADDR_I]], align 4 +// RV32ZKSH-NEXT: [[TMP2:%.*]] = call i32 @llvm.riscv.sm3p0(i32 [[TMP1]]) +// RV32ZKSH-NEXT: ret i32 [[TMP2]] // // RV64ZKSH-LABEL: @sm3p0( // RV64ZKSH-NEXT: entry: +// RV64ZKSH-NEXT: [[__X_ADDR_I:%.*]] = alloca i32, align 4 // RV64ZKSH-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 // RV64ZKSH-NEXT: store i32 [[RS1:%.*]], ptr [[RS1_ADDR]], align 4 // RV64ZKSH-NEXT: [[TMP0:%.*]] = load i32, ptr [[RS1_ADDR]], align 4 -// RV64ZKSH-NEXT: [[TMP1:%.*]] = call i32 @llvm.riscv.sm3p0(i32 [[TMP0]]) -// RV64ZKSH-NEXT: ret i32 [[TMP1]] +// RV64ZKSH-NEXT: store i32 [[TMP0]], ptr [[__X_ADDR_I]], align 4 +// RV64ZKSH-NEXT: [[TMP1:%.*]] = load i32, ptr [[__X_ADDR_I]], align 4 +// RV64ZKSH-NEXT: [[TMP2:%.*]] = call i32 @llvm.riscv.sm3p0(i32 [[TMP1]]) +// RV64ZKSH-NEXT: ret i32 [[TMP2]] // uint32_t sm3p0(uint32_t rs1) { - return __builtin_riscv_sm3p0(rs1); + return __riscv_sm3p0(rs1); } // RV32ZKSH-LABEL: @sm3p1( // RV32ZKSH-NEXT: entry: +// RV32ZKSH-NEXT: [[__X_ADDR_I:%.*]] = alloca i32, align 4 // RV32ZKSH-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 // RV32ZKSH-NEXT: store i32 [[RS1:%.*]], ptr [[RS1_ADDR]], align 4 // RV32ZKSH-NEXT: [[TMP0:%.*]] = load i32, ptr [[RS1_ADDR]], align 4 -// RV32ZKSH-NEXT: [[TMP1:%.*]] = call i32 @llvm.riscv.sm3p1(i32 [[TMP0]]) -// RV32ZKSH-NEXT: ret i32 [[TMP1]] +// RV32ZKSH-NEXT: store i32 [[TMP0]], ptr [[__X_ADDR_I]], align 4 +// RV32ZKSH-NEXT: [[TMP1:%.*]] = load i32, ptr [[__X_ADDR_I]], align 4 +// RV32ZKSH-NEXT: [[TMP2:%.*]] = call i32 @llvm.riscv.sm3p1(i32 [[TMP1]]) +// RV32ZKSH-NEXT: ret i32 [[TMP2]] // // RV64ZKSH-LABEL: @sm3p1( // RV64ZKSH-NEXT: entry: +// RV64ZKSH-NEXT: [[__X_ADDR_I:%.*]] = alloca i32, align 4 // RV64ZKSH-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 // RV64ZKSH-NEXT: store i32 [[RS1:%.*]], ptr [[RS1_ADDR]], align 4 // RV64ZKSH-NEXT: [[TMP0:%.*]] = load i32, ptr [[RS1_ADDR]], align 4 -// RV64ZKSH-NEXT: [[TMP1:%.*]] = call i32 @llvm.riscv.sm3p1(i32 [[TMP0]]) -// RV64ZKSH-NEXT: ret i32 [[TMP1]] +// RV64ZKSH-NEXT: store i32 [[TMP0]], ptr [[__X_ADDR_I]], align 4 +// RV64ZKSH-NEXT: [[TMP1:%.*]] = load i32, ptr [[__X_ADDR_I]], align 4 +// RV64ZKSH-NEXT: [[TMP2:%.*]] = call i32 @llvm.riscv.sm3p1(i32 [[TMP1]]) +// RV64ZKSH-NEXT: ret i32 [[TMP2]] // uint32_t sm3p1(uint32_t rs1) { - return __builtin_riscv_sm3p1(rs1); + return __riscv_sm3p1(rs1); }