diff --git a/clang/include/clang/Basic/BuiltinsRISCV.def b/clang/include/clang/Basic/BuiltinsRISCV.def --- a/clang/include/clang/Basic/BuiltinsRISCV.def +++ b/clang/include/clang/Basic/BuiltinsRISCV.def @@ -26,6 +26,20 @@ TARGET_BUILTIN(__builtin_riscv_clmulh, "LiLiLi", "nc", "experimental-zbc") TARGET_BUILTIN(__builtin_riscv_clmulr, "LiLiLi", "nc", "experimental-zbc") +// Zbp extension +TARGET_BUILTIN(__builtin_riscv_grev_32, "ZiZiZi", "nc", "experimental-zbp") +TARGET_BUILTIN(__builtin_riscv_grev_64, "WiWiWi", "nc", "experimental-zbp,64bit") +TARGET_BUILTIN(__builtin_riscv_gorc_32, "ZiZiZi", "nc", "experimental-zbp") +TARGET_BUILTIN(__builtin_riscv_gorc_64, "WiWiWi", "nc", "experimental-zbp,64bit") +TARGET_BUILTIN(__builtin_riscv_shfl_32, "ZiZiZi", "nc", "experimental-zbp") +TARGET_BUILTIN(__builtin_riscv_shfl_64, "WiWiWi", "nc", "experimental-zbp,64bit") +TARGET_BUILTIN(__builtin_riscv_unshfl_32, "ZiZiZi", "nc", "experimental-zbp") +TARGET_BUILTIN(__builtin_riscv_unshfl_64, "WiWiWi", "nc", "experimental-zbp,64bit") +TARGET_BUILTIN(__builtin_riscv_xperm_n, "LiLiLi", "nc", "experimental-zbp") +TARGET_BUILTIN(__builtin_riscv_xperm_b, "LiLiLi", "nc", "experimental-zbp") +TARGET_BUILTIN(__builtin_riscv_xperm_h, "LiLiLi", "nc", "experimental-zbp") +TARGET_BUILTIN(__builtin_riscv_xperm_w, "WiWiWi", "nc", "experimental-zbp,64bit") + // Zbr extension TARGET_BUILTIN(__builtin_riscv_crc32_b, "LiLi", "nc", "experimental-zbr") TARGET_BUILTIN(__builtin_riscv_crc32_h, "LiLi", "nc", "experimental-zbr") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -17849,6 +17849,18 @@ case RISCV::BI__builtin_riscv_clmul: case RISCV::BI__builtin_riscv_clmulh: case RISCV::BI__builtin_riscv_clmulr: + case RISCV::BI__builtin_riscv_grev_32: + case RISCV::BI__builtin_riscv_grev_64: + case RISCV::BI__builtin_riscv_gorc_32: + case RISCV::BI__builtin_riscv_gorc_64: + case RISCV::BI__builtin_riscv_shfl_32: + case RISCV::BI__builtin_riscv_shfl_64: + case RISCV::BI__builtin_riscv_unshfl_32: + case RISCV::BI__builtin_riscv_unshfl_64: + case RISCV::BI__builtin_riscv_xperm_n: + case RISCV::BI__builtin_riscv_xperm_b: + case RISCV::BI__builtin_riscv_xperm_h: + case RISCV::BI__builtin_riscv_xperm_w: case RISCV::BI__builtin_riscv_crc32_b: case RISCV::BI__builtin_riscv_crc32_h: case RISCV::BI__builtin_riscv_crc32_w: @@ -17876,6 +17888,36 @@ ID = Intrinsic::riscv_clmulr; break; + // Zbp + case RISCV::BI__builtin_riscv_grev_32: + case RISCV::BI__builtin_riscv_grev_64: + ID = Intrinsic::riscv_grev; + break; + case RISCV::BI__builtin_riscv_gorc_32: + case RISCV::BI__builtin_riscv_gorc_64: + ID = Intrinsic::riscv_gorc; + break; + case RISCV::BI__builtin_riscv_shfl_32: + case RISCV::BI__builtin_riscv_shfl_64: + ID = Intrinsic::riscv_shfl; + break; + case RISCV::BI__builtin_riscv_unshfl_32: + case RISCV::BI__builtin_riscv_unshfl_64: + ID = Intrinsic::riscv_unshfl; + break; + case RISCV::BI__builtin_riscv_xperm_n: + ID = Intrinsic::riscv_xperm_n; + break; + case RISCV::BI__builtin_riscv_xperm_b: + ID = Intrinsic::riscv_xperm_b; + break; + case RISCV::BI__builtin_riscv_xperm_h: + ID = Intrinsic::riscv_xperm_h; + break; + case RISCV::BI__builtin_riscv_xperm_w: + ID = Intrinsic::riscv_xperm_w; + break; + // Zbr case RISCV::BI__builtin_riscv_crc32_b: ID = Intrinsic::riscv_crc32_b; diff --git a/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv32-zbp.c b/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv32-zbp.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv32-zbp.c @@ -0,0 +1,179 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple riscv32 -target-feature +experimental-zbp -emit-llvm %s -o - \ +// RUN: | FileCheck %s -check-prefix=RV32ZBP + +// RV32ZBP-LABEL: @grev( +// RV32ZBP-NEXT: entry: +// RV32ZBP-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 +// RV32ZBP-NEXT: [[RS2_ADDR:%.*]] = alloca i32, align 4 +// RV32ZBP-NEXT: store i32 [[RS1:%.*]], i32* [[RS1_ADDR]], align 4 +// RV32ZBP-NEXT: store i32 [[RS2:%.*]], i32* [[RS2_ADDR]], align 4 +// RV32ZBP-NEXT: [[TMP0:%.*]] = load i32, i32* [[RS1_ADDR]], align 4 +// RV32ZBP-NEXT: [[TMP1:%.*]] = load i32, i32* [[RS2_ADDR]], align 4 +// RV32ZBP-NEXT: [[TMP2:%.*]] = call i32 @llvm.riscv.grev.i32(i32 [[TMP0]], i32 [[TMP1]]) +// RV32ZBP-NEXT: ret i32 [[TMP2]] +// +long grev(long rs1, long rs2) +{ + return __builtin_riscv_grev_32(rs1, rs2); +} + +// RV32ZBP-LABEL: @grevi( +// RV32ZBP-NEXT: entry: +// RV32ZBP-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 +// RV32ZBP-NEXT: [[I:%.*]] = alloca i32, align 4 +// RV32ZBP-NEXT: store i32 [[RS1:%.*]], i32* [[RS1_ADDR]], align 4 +// RV32ZBP-NEXT: store i32 13, i32* [[I]], align 4 +// RV32ZBP-NEXT: [[TMP0:%.*]] = load i32, i32* [[RS1_ADDR]], align 4 +// RV32ZBP-NEXT: [[TMP1:%.*]] = call i32 @llvm.riscv.grev.i32(i32 [[TMP0]], i32 13) +// RV32ZBP-NEXT: ret i32 [[TMP1]] +// +long grevi(long rs1) +{ + const int i = 13; + return __builtin_riscv_grev_32(rs1, i); +} + +// RV32ZBP-LABEL: @gorc( +// RV32ZBP-NEXT: entry: +// RV32ZBP-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 +// RV32ZBP-NEXT: [[RS2_ADDR:%.*]] = alloca i32, align 4 +// RV32ZBP-NEXT: store i32 [[RS1:%.*]], i32* [[RS1_ADDR]], align 4 +// RV32ZBP-NEXT: store i32 [[RS2:%.*]], i32* [[RS2_ADDR]], align 4 +// RV32ZBP-NEXT: [[TMP0:%.*]] = load i32, i32* [[RS1_ADDR]], align 4 +// RV32ZBP-NEXT: [[TMP1:%.*]] = load i32, i32* [[RS2_ADDR]], align 4 +// RV32ZBP-NEXT: [[TMP2:%.*]] = call i32 @llvm.riscv.gorc.i32(i32 [[TMP0]], i32 [[TMP1]]) +// RV32ZBP-NEXT: ret i32 [[TMP2]] +// +long gorc(long rs1, long rs2) +{ + return __builtin_riscv_gorc_32(rs1, rs2); +} + +// RV32ZBP-LABEL: @gorci( +// RV32ZBP-NEXT: entry: +// RV32ZBP-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 +// RV32ZBP-NEXT: [[I:%.*]] = alloca i32, align 4 +// RV32ZBP-NEXT: store i32 [[RS1:%.*]], i32* [[RS1_ADDR]], align 4 +// RV32ZBP-NEXT: store i32 13, i32* [[I]], align 4 +// RV32ZBP-NEXT: [[TMP0:%.*]] = load i32, i32* [[RS1_ADDR]], align 4 +// RV32ZBP-NEXT: [[TMP1:%.*]] = call i32 @llvm.riscv.gorc.i32(i32 [[TMP0]], i32 13) +// RV32ZBP-NEXT: ret i32 [[TMP1]] +// +long gorci(long rs1) +{ + const int i = 13; + return __builtin_riscv_gorc_32(rs1, i); +} + +// RV32ZBP-LABEL: @shfl( +// RV32ZBP-NEXT: entry: +// RV32ZBP-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 +// RV32ZBP-NEXT: [[RS2_ADDR:%.*]] = alloca i32, align 4 +// RV32ZBP-NEXT: store i32 [[RS1:%.*]], i32* [[RS1_ADDR]], align 4 +// RV32ZBP-NEXT: store i32 [[RS2:%.*]], i32* [[RS2_ADDR]], align 4 +// RV32ZBP-NEXT: [[TMP0:%.*]] = load i32, i32* [[RS1_ADDR]], align 4 +// RV32ZBP-NEXT: [[TMP1:%.*]] = load i32, i32* [[RS2_ADDR]], align 4 +// RV32ZBP-NEXT: [[TMP2:%.*]] = call i32 @llvm.riscv.shfl.i32(i32 [[TMP0]], i32 [[TMP1]]) +// RV32ZBP-NEXT: ret i32 [[TMP2]] +// +long shfl(long rs1, long rs2) +{ + return __builtin_riscv_shfl_32(rs1, rs2); +} + +// RV32ZBP-LABEL: @shfli( +// RV32ZBP-NEXT: entry: +// RV32ZBP-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 +// RV32ZBP-NEXT: [[I:%.*]] = alloca i32, align 4 +// RV32ZBP-NEXT: store i32 [[RS1:%.*]], i32* [[RS1_ADDR]], align 4 +// RV32ZBP-NEXT: store i32 13, i32* [[I]], align 4 +// RV32ZBP-NEXT: [[TMP0:%.*]] = load i32, i32* [[RS1_ADDR]], align 4 +// RV32ZBP-NEXT: [[TMP1:%.*]] = call i32 @llvm.riscv.shfl.i32(i32 [[TMP0]], i32 13) +// RV32ZBP-NEXT: ret i32 [[TMP1]] +// +long shfli(long rs1) +{ + const int i = 13; + return __builtin_riscv_shfl_32(rs1, i); +} + +// RV32ZBP-LABEL: @unshfl( +// RV32ZBP-NEXT: entry: +// RV32ZBP-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 +// RV32ZBP-NEXT: [[RS2_ADDR:%.*]] = alloca i32, align 4 +// RV32ZBP-NEXT: store i32 [[RS1:%.*]], i32* [[RS1_ADDR]], align 4 +// RV32ZBP-NEXT: store i32 [[RS2:%.*]], i32* [[RS2_ADDR]], align 4 +// RV32ZBP-NEXT: [[TMP0:%.*]] = load i32, i32* [[RS1_ADDR]], align 4 +// RV32ZBP-NEXT: [[TMP1:%.*]] = load i32, i32* [[RS2_ADDR]], align 4 +// RV32ZBP-NEXT: [[TMP2:%.*]] = call i32 @llvm.riscv.unshfl.i32(i32 [[TMP0]], i32 [[TMP1]]) +// RV32ZBP-NEXT: ret i32 [[TMP2]] +// +long unshfl(long rs1, long rs2) +{ + return __builtin_riscv_unshfl_32(rs1, rs2); +} + +// RV32ZBP-LABEL: @unshfli( +// RV32ZBP-NEXT: entry: +// RV32ZBP-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 +// RV32ZBP-NEXT: [[I:%.*]] = alloca i32, align 4 +// RV32ZBP-NEXT: store i32 [[RS1:%.*]], i32* [[RS1_ADDR]], align 4 +// RV32ZBP-NEXT: store i32 13, i32* [[I]], align 4 +// RV32ZBP-NEXT: [[TMP0:%.*]] = load i32, i32* [[RS1_ADDR]], align 4 +// RV32ZBP-NEXT: [[TMP1:%.*]] = call i32 @llvm.riscv.unshfl.i32(i32 [[TMP0]], i32 13) +// RV32ZBP-NEXT: ret i32 [[TMP1]] +// +long unshfli(long rs1) +{ + const int i = 13; + return __builtin_riscv_unshfl_32(rs1, i); +} + +// RV32ZBP-LABEL: @xperm_n( +// RV32ZBP-NEXT: entry: +// RV32ZBP-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 +// RV32ZBP-NEXT: [[RS2_ADDR:%.*]] = alloca i32, align 4 +// RV32ZBP-NEXT: store i32 [[RS1:%.*]], i32* [[RS1_ADDR]], align 4 +// RV32ZBP-NEXT: store i32 [[RS2:%.*]], i32* [[RS2_ADDR]], align 4 +// RV32ZBP-NEXT: [[TMP0:%.*]] = load i32, i32* [[RS1_ADDR]], align 4 +// RV32ZBP-NEXT: [[TMP1:%.*]] = load i32, i32* [[RS2_ADDR]], align 4 +// RV32ZBP-NEXT: [[TMP2:%.*]] = call i32 @llvm.riscv.xperm.n.i32(i32 [[TMP0]], i32 [[TMP1]]) +// RV32ZBP-NEXT: ret i32 [[TMP2]] +// +long xperm_n(long rs1, long rs2) +{ + return __builtin_riscv_xperm_n(rs1, rs2); +} + +// RV32ZBP-LABEL: @xperm_b( +// RV32ZBP-NEXT: entry: +// RV32ZBP-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 +// RV32ZBP-NEXT: [[RS2_ADDR:%.*]] = alloca i32, align 4 +// RV32ZBP-NEXT: store i32 [[RS1:%.*]], i32* [[RS1_ADDR]], align 4 +// RV32ZBP-NEXT: store i32 [[RS2:%.*]], i32* [[RS2_ADDR]], align 4 +// RV32ZBP-NEXT: [[TMP0:%.*]] = load i32, i32* [[RS1_ADDR]], align 4 +// RV32ZBP-NEXT: [[TMP1:%.*]] = load i32, i32* [[RS2_ADDR]], align 4 +// RV32ZBP-NEXT: [[TMP2:%.*]] = call i32 @llvm.riscv.xperm.b.i32(i32 [[TMP0]], i32 [[TMP1]]) +// RV32ZBP-NEXT: ret i32 [[TMP2]] +// +long xperm_b(long rs1, long rs2) +{ + return __builtin_riscv_xperm_b(rs1, rs2); +} + +// RV32ZBP-LABEL: @xperm_h( +// RV32ZBP-NEXT: entry: +// RV32ZBP-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 +// RV32ZBP-NEXT: [[RS2_ADDR:%.*]] = alloca i32, align 4 +// RV32ZBP-NEXT: store i32 [[RS1:%.*]], i32* [[RS1_ADDR]], align 4 +// RV32ZBP-NEXT: store i32 [[RS2:%.*]], i32* [[RS2_ADDR]], align 4 +// RV32ZBP-NEXT: [[TMP0:%.*]] = load i32, i32* [[RS1_ADDR]], align 4 +// RV32ZBP-NEXT: [[TMP1:%.*]] = load i32, i32* [[RS2_ADDR]], align 4 +// RV32ZBP-NEXT: [[TMP2:%.*]] = call i32 @llvm.riscv.xperm.h.i32(i32 [[TMP0]], i32 [[TMP1]]) +// RV32ZBP-NEXT: ret i32 [[TMP2]] +// +long xperm_h(long rs1, long rs2) +{ + return __builtin_riscv_xperm_h(rs1, rs2); +} diff --git a/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv64-zbp.c b/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv64-zbp.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv64-zbp.c @@ -0,0 +1,323 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-zbp -emit-llvm %s -o - \ +// RUN: | FileCheck %s -check-prefix=RV64ZBP + +// RV64ZBP-LABEL: @grev( +// RV64ZBP-NEXT: entry: +// RV64ZBP-NEXT: [[RS1_ADDR:%.*]] = alloca i64, align 8 +// RV64ZBP-NEXT: [[RS2_ADDR:%.*]] = alloca i64, align 8 +// RV64ZBP-NEXT: store i64 [[RS1:%.*]], i64* [[RS1_ADDR]], align 8 +// RV64ZBP-NEXT: store i64 [[RS2:%.*]], i64* [[RS2_ADDR]], align 8 +// RV64ZBP-NEXT: [[TMP0:%.*]] = load i64, i64* [[RS1_ADDR]], align 8 +// RV64ZBP-NEXT: [[TMP1:%.*]] = load i64, i64* [[RS2_ADDR]], align 8 +// RV64ZBP-NEXT: [[TMP2:%.*]] = call i64 @llvm.riscv.grev.i64(i64 [[TMP0]], i64 [[TMP1]]) +// RV64ZBP-NEXT: ret i64 [[TMP2]] +// +long grev(long rs1, long rs2) +{ + return __builtin_riscv_grev_64(rs1, rs2); +} + +// RV64ZBP-LABEL: @grevi( +// RV64ZBP-NEXT: entry: +// RV64ZBP-NEXT: [[RS1_ADDR:%.*]] = alloca i64, align 8 +// RV64ZBP-NEXT: [[I:%.*]] = alloca i32, align 4 +// RV64ZBP-NEXT: store i64 [[RS1:%.*]], i64* [[RS1_ADDR]], align 8 +// RV64ZBP-NEXT: store i32 13, i32* [[I]], align 4 +// RV64ZBP-NEXT: [[TMP0:%.*]] = load i64, i64* [[RS1_ADDR]], align 8 +// RV64ZBP-NEXT: [[TMP1:%.*]] = call i64 @llvm.riscv.grev.i64(i64 [[TMP0]], i64 13) +// RV64ZBP-NEXT: ret i64 [[TMP1]] +// +long grevi(long rs1) +{ + const int i = 13; + return __builtin_riscv_grev_64(rs1, i); +} + +// RV64ZBP-LABEL: @grevw( +// RV64ZBP-NEXT: entry: +// RV64ZBP-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 +// RV64ZBP-NEXT: [[RS2_ADDR:%.*]] = alloca i32, align 4 +// RV64ZBP-NEXT: store i32 [[RS1:%.*]], i32* [[RS1_ADDR]], align 4 +// RV64ZBP-NEXT: store i32 [[RS2:%.*]], i32* [[RS2_ADDR]], align 4 +// RV64ZBP-NEXT: [[TMP0:%.*]] = load i32, i32* [[RS1_ADDR]], align 4 +// RV64ZBP-NEXT: [[TMP1:%.*]] = load i32, i32* [[RS2_ADDR]], align 4 +// RV64ZBP-NEXT: [[TMP2:%.*]] = call i32 @llvm.riscv.grev.i32(i32 [[TMP0]], i32 [[TMP1]]) +// RV64ZBP-NEXT: ret i32 [[TMP2]] +// +int grevw(int rs1, int rs2) +{ + return __builtin_riscv_grev_32(rs1, rs2); +} + +// RV64ZBP-LABEL: @greviw( +// RV64ZBP-NEXT: entry: +// RV64ZBP-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 +// RV64ZBP-NEXT: [[I:%.*]] = alloca i32, align 4 +// RV64ZBP-NEXT: store i32 [[RS1:%.*]], i32* [[RS1_ADDR]], align 4 +// RV64ZBP-NEXT: store i32 13, i32* [[I]], align 4 +// RV64ZBP-NEXT: [[TMP0:%.*]] = load i32, i32* [[RS1_ADDR]], align 4 +// RV64ZBP-NEXT: [[TMP1:%.*]] = call i32 @llvm.riscv.grev.i32(i32 [[TMP0]], i32 13) +// RV64ZBP-NEXT: ret i32 [[TMP1]] +// +int greviw(int rs1) +{ + const int i = 13; + return __builtin_riscv_grev_32(rs1, i); +} + +// RV64ZBP-LABEL: @gorc( +// RV64ZBP-NEXT: entry: +// RV64ZBP-NEXT: [[RS1_ADDR:%.*]] = alloca i64, align 8 +// RV64ZBP-NEXT: [[RS2_ADDR:%.*]] = alloca i64, align 8 +// RV64ZBP-NEXT: store i64 [[RS1:%.*]], i64* [[RS1_ADDR]], align 8 +// RV64ZBP-NEXT: store i64 [[RS2:%.*]], i64* [[RS2_ADDR]], align 8 +// RV64ZBP-NEXT: [[TMP0:%.*]] = load i64, i64* [[RS1_ADDR]], align 8 +// RV64ZBP-NEXT: [[TMP1:%.*]] = load i64, i64* [[RS2_ADDR]], align 8 +// RV64ZBP-NEXT: [[TMP2:%.*]] = call i64 @llvm.riscv.gorc.i64(i64 [[TMP0]], i64 [[TMP1]]) +// RV64ZBP-NEXT: ret i64 [[TMP2]] +// +long gorc(long rs1, long rs2) +{ + return __builtin_riscv_gorc_64(rs1, rs2); +} + +// RV64ZBP-LABEL: @gorci( +// RV64ZBP-NEXT: entry: +// RV64ZBP-NEXT: [[RS1_ADDR:%.*]] = alloca i64, align 8 +// RV64ZBP-NEXT: [[I:%.*]] = alloca i32, align 4 +// RV64ZBP-NEXT: store i64 [[RS1:%.*]], i64* [[RS1_ADDR]], align 8 +// RV64ZBP-NEXT: store i32 13, i32* [[I]], align 4 +// RV64ZBP-NEXT: [[TMP0:%.*]] = load i64, i64* [[RS1_ADDR]], align 8 +// RV64ZBP-NEXT: [[TMP1:%.*]] = call i64 @llvm.riscv.gorc.i64(i64 [[TMP0]], i64 13) +// RV64ZBP-NEXT: ret i64 [[TMP1]] +// +long gorci(long rs1) +{ + const int i = 13; + return __builtin_riscv_gorc_64(rs1, i); +} + +// RV64ZBP-LABEL: @gorcw( +// RV64ZBP-NEXT: entry: +// RV64ZBP-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 +// RV64ZBP-NEXT: [[RS2_ADDR:%.*]] = alloca i32, align 4 +// RV64ZBP-NEXT: store i32 [[RS1:%.*]], i32* [[RS1_ADDR]], align 4 +// RV64ZBP-NEXT: store i32 [[RS2:%.*]], i32* [[RS2_ADDR]], align 4 +// RV64ZBP-NEXT: [[TMP0:%.*]] = load i32, i32* [[RS1_ADDR]], align 4 +// RV64ZBP-NEXT: [[TMP1:%.*]] = load i32, i32* [[RS2_ADDR]], align 4 +// RV64ZBP-NEXT: [[TMP2:%.*]] = call i32 @llvm.riscv.gorc.i32(i32 [[TMP0]], i32 [[TMP1]]) +// RV64ZBP-NEXT: ret i32 [[TMP2]] +// +int gorcw(int rs1, int rs2) +{ + return __builtin_riscv_gorc_32(rs1, rs2); +} + +// RV64ZBP-LABEL: @gorciw( +// RV64ZBP-NEXT: entry: +// RV64ZBP-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 +// RV64ZBP-NEXT: [[I:%.*]] = alloca i32, align 4 +// RV64ZBP-NEXT: store i32 [[RS1:%.*]], i32* [[RS1_ADDR]], align 4 +// RV64ZBP-NEXT: store i32 13, i32* [[I]], align 4 +// RV64ZBP-NEXT: [[TMP0:%.*]] = load i32, i32* [[RS1_ADDR]], align 4 +// RV64ZBP-NEXT: [[TMP1:%.*]] = call i32 @llvm.riscv.gorc.i32(i32 [[TMP0]], i32 13) +// RV64ZBP-NEXT: ret i32 [[TMP1]] +// +int gorciw(int rs1) +{ + const int i = 13; + return __builtin_riscv_gorc_32(rs1, i); +} + +// RV64ZBP-LABEL: @shfl( +// RV64ZBP-NEXT: entry: +// RV64ZBP-NEXT: [[RS1_ADDR:%.*]] = alloca i64, align 8 +// RV64ZBP-NEXT: [[RS2_ADDR:%.*]] = alloca i64, align 8 +// RV64ZBP-NEXT: store i64 [[RS1:%.*]], i64* [[RS1_ADDR]], align 8 +// RV64ZBP-NEXT: store i64 [[RS2:%.*]], i64* [[RS2_ADDR]], align 8 +// RV64ZBP-NEXT: [[TMP0:%.*]] = load i64, i64* [[RS1_ADDR]], align 8 +// RV64ZBP-NEXT: [[TMP1:%.*]] = load i64, i64* [[RS2_ADDR]], align 8 +// RV64ZBP-NEXT: [[TMP2:%.*]] = call i64 @llvm.riscv.shfl.i64(i64 [[TMP0]], i64 [[TMP1]]) +// RV64ZBP-NEXT: ret i64 [[TMP2]] +// +long shfl(long rs1, long rs2) +{ + return __builtin_riscv_shfl_64(rs1, rs2); +} + +// RV64ZBP-LABEL: @shfli( +// RV64ZBP-NEXT: entry: +// RV64ZBP-NEXT: [[RS1_ADDR:%.*]] = alloca i64, align 8 +// RV64ZBP-NEXT: [[I:%.*]] = alloca i32, align 4 +// RV64ZBP-NEXT: store i64 [[RS1:%.*]], i64* [[RS1_ADDR]], align 8 +// RV64ZBP-NEXT: store i32 13, i32* [[I]], align 4 +// RV64ZBP-NEXT: [[TMP0:%.*]] = load i64, i64* [[RS1_ADDR]], align 8 +// RV64ZBP-NEXT: [[TMP1:%.*]] = call i64 @llvm.riscv.shfl.i64(i64 [[TMP0]], i64 13) +// RV64ZBP-NEXT: ret i64 [[TMP1]] +// +long shfli(long rs1) +{ + const int i = 13; + return __builtin_riscv_shfl_64(rs1, i); +} + +// RV64ZBP-LABEL: @shflw( +// RV64ZBP-NEXT: entry: +// RV64ZBP-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 +// RV64ZBP-NEXT: [[RS2_ADDR:%.*]] = alloca i32, align 4 +// RV64ZBP-NEXT: store i32 [[RS1:%.*]], i32* [[RS1_ADDR]], align 4 +// RV64ZBP-NEXT: store i32 [[RS2:%.*]], i32* [[RS2_ADDR]], align 4 +// RV64ZBP-NEXT: [[TMP0:%.*]] = load i32, i32* [[RS1_ADDR]], align 4 +// RV64ZBP-NEXT: [[TMP1:%.*]] = load i32, i32* [[RS2_ADDR]], align 4 +// RV64ZBP-NEXT: [[TMP2:%.*]] = call i32 @llvm.riscv.shfl.i32(i32 [[TMP0]], i32 [[TMP1]]) +// RV64ZBP-NEXT: ret i32 [[TMP2]] +// +int shflw(int rs1, int rs2) +{ + return __builtin_riscv_shfl_32(rs1, rs2); +} + +// RV64ZBP-LABEL: @shfli_NOw( +// RV64ZBP-NEXT: entry: +// RV64ZBP-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 +// RV64ZBP-NEXT: [[I:%.*]] = alloca i32, align 4 +// RV64ZBP-NEXT: store i32 [[RS1:%.*]], i32* [[RS1_ADDR]], align 4 +// RV64ZBP-NEXT: store i32 13, i32* [[I]], align 4 +// RV64ZBP-NEXT: [[TMP0:%.*]] = load i32, i32* [[RS1_ADDR]], align 4 +// RV64ZBP-NEXT: [[TMP1:%.*]] = call i32 @llvm.riscv.shfl.i32(i32 [[TMP0]], i32 13) +// RV64ZBP-NEXT: ret i32 [[TMP1]] +// +int shfli_NOw(int rs1) +{ + const int i = 13; + return __builtin_riscv_shfl_32(rs1, i); +} + +// RV64ZBP-LABEL: @unshfl( +// RV64ZBP-NEXT: entry: +// RV64ZBP-NEXT: [[RS1_ADDR:%.*]] = alloca i64, align 8 +// RV64ZBP-NEXT: [[RS2_ADDR:%.*]] = alloca i64, align 8 +// RV64ZBP-NEXT: store i64 [[RS1:%.*]], i64* [[RS1_ADDR]], align 8 +// RV64ZBP-NEXT: store i64 [[RS2:%.*]], i64* [[RS2_ADDR]], align 8 +// RV64ZBP-NEXT: [[TMP0:%.*]] = load i64, i64* [[RS1_ADDR]], align 8 +// RV64ZBP-NEXT: [[TMP1:%.*]] = load i64, i64* [[RS2_ADDR]], align 8 +// RV64ZBP-NEXT: [[TMP2:%.*]] = call i64 @llvm.riscv.unshfl.i64(i64 [[TMP0]], i64 [[TMP1]]) +// RV64ZBP-NEXT: ret i64 [[TMP2]] +// +long unshfl(long rs1, long rs2) +{ + return __builtin_riscv_unshfl_64(rs1, rs2); +} + +// RV64ZBP-LABEL: @unshfli( +// RV64ZBP-NEXT: entry: +// RV64ZBP-NEXT: [[RS1_ADDR:%.*]] = alloca i64, align 8 +// RV64ZBP-NEXT: [[I:%.*]] = alloca i32, align 4 +// RV64ZBP-NEXT: store i64 [[RS1:%.*]], i64* [[RS1_ADDR]], align 8 +// RV64ZBP-NEXT: store i32 13, i32* [[I]], align 4 +// RV64ZBP-NEXT: [[TMP0:%.*]] = load i64, i64* [[RS1_ADDR]], align 8 +// RV64ZBP-NEXT: [[TMP1:%.*]] = call i64 @llvm.riscv.unshfl.i64(i64 [[TMP0]], i64 13) +// RV64ZBP-NEXT: ret i64 [[TMP1]] +// +long unshfli(long rs1) +{ + const int i = 13; + return __builtin_riscv_unshfl_64(rs1, i); +} + +// RV64ZBP-LABEL: @unshflw( +// RV64ZBP-NEXT: entry: +// RV64ZBP-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 +// RV64ZBP-NEXT: [[RS2_ADDR:%.*]] = alloca i32, align 4 +// RV64ZBP-NEXT: store i32 [[RS1:%.*]], i32* [[RS1_ADDR]], align 4 +// RV64ZBP-NEXT: store i32 [[RS2:%.*]], i32* [[RS2_ADDR]], align 4 +// RV64ZBP-NEXT: [[TMP0:%.*]] = load i32, i32* [[RS1_ADDR]], align 4 +// RV64ZBP-NEXT: [[TMP1:%.*]] = load i32, i32* [[RS2_ADDR]], align 4 +// RV64ZBP-NEXT: [[TMP2:%.*]] = call i32 @llvm.riscv.unshfl.i32(i32 [[TMP0]], i32 [[TMP1]]) +// RV64ZBP-NEXT: ret i32 [[TMP2]] +// +int unshflw(int rs1, int rs2) +{ + return __builtin_riscv_unshfl_32(rs1, rs2); +} + +// RV64ZBP-LABEL: @unshfli_NOw( +// RV64ZBP-NEXT: entry: +// RV64ZBP-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 +// RV64ZBP-NEXT: [[I:%.*]] = alloca i32, align 4 +// RV64ZBP-NEXT: store i32 [[RS1:%.*]], i32* [[RS1_ADDR]], align 4 +// RV64ZBP-NEXT: store i32 13, i32* [[I]], align 4 +// RV64ZBP-NEXT: [[TMP0:%.*]] = load i32, i32* [[RS1_ADDR]], align 4 +// RV64ZBP-NEXT: [[TMP1:%.*]] = call i32 @llvm.riscv.unshfl.i32(i32 [[TMP0]], i32 13) +// RV64ZBP-NEXT: ret i32 [[TMP1]] +// +int unshfli_NOw(int rs1) +{ + const int i = 13; + return __builtin_riscv_unshfl_32(rs1, i); +} + +// RV64ZBP-LABEL: @xperm_n( +// RV64ZBP-NEXT: entry: +// RV64ZBP-NEXT: [[RS1_ADDR:%.*]] = alloca i64, align 8 +// RV64ZBP-NEXT: [[RS2_ADDR:%.*]] = alloca i64, align 8 +// RV64ZBP-NEXT: store i64 [[RS1:%.*]], i64* [[RS1_ADDR]], align 8 +// RV64ZBP-NEXT: store i64 [[RS2:%.*]], i64* [[RS2_ADDR]], align 8 +// RV64ZBP-NEXT: [[TMP0:%.*]] = load i64, i64* [[RS1_ADDR]], align 8 +// RV64ZBP-NEXT: [[TMP1:%.*]] = load i64, i64* [[RS2_ADDR]], align 8 +// RV64ZBP-NEXT: [[TMP2:%.*]] = call i64 @llvm.riscv.xperm.n.i64(i64 [[TMP0]], i64 [[TMP1]]) +// RV64ZBP-NEXT: ret i64 [[TMP2]] +// +long xperm_n(long rs1, long rs2) +{ + return __builtin_riscv_xperm_n(rs1, rs2); +} + +// RV64ZBP-LABEL: @xperm_b( +// RV64ZBP-NEXT: entry: +// RV64ZBP-NEXT: [[RS1_ADDR:%.*]] = alloca i64, align 8 +// RV64ZBP-NEXT: [[RS2_ADDR:%.*]] = alloca i64, align 8 +// RV64ZBP-NEXT: store i64 [[RS1:%.*]], i64* [[RS1_ADDR]], align 8 +// RV64ZBP-NEXT: store i64 [[RS2:%.*]], i64* [[RS2_ADDR]], align 8 +// RV64ZBP-NEXT: [[TMP0:%.*]] = load i64, i64* [[RS1_ADDR]], align 8 +// RV64ZBP-NEXT: [[TMP1:%.*]] = load i64, i64* [[RS2_ADDR]], align 8 +// RV64ZBP-NEXT: [[TMP2:%.*]] = call i64 @llvm.riscv.xperm.b.i64(i64 [[TMP0]], i64 [[TMP1]]) +// RV64ZBP-NEXT: ret i64 [[TMP2]] +// +long xperm_b(long rs1, long rs2) +{ + return __builtin_riscv_xperm_b(rs1, rs2); +} + +// RV64ZBP-LABEL: @xperm_h( +// RV64ZBP-NEXT: entry: +// RV64ZBP-NEXT: [[RS1_ADDR:%.*]] = alloca i64, align 8 +// RV64ZBP-NEXT: [[RS2_ADDR:%.*]] = alloca i64, align 8 +// RV64ZBP-NEXT: store i64 [[RS1:%.*]], i64* [[RS1_ADDR]], align 8 +// RV64ZBP-NEXT: store i64 [[RS2:%.*]], i64* [[RS2_ADDR]], align 8 +// RV64ZBP-NEXT: [[TMP0:%.*]] = load i64, i64* [[RS1_ADDR]], align 8 +// RV64ZBP-NEXT: [[TMP1:%.*]] = load i64, i64* [[RS2_ADDR]], align 8 +// RV64ZBP-NEXT: [[TMP2:%.*]] = call i64 @llvm.riscv.xperm.h.i64(i64 [[TMP0]], i64 [[TMP1]]) +// RV64ZBP-NEXT: ret i64 [[TMP2]] +// +long xperm_h(long rs1, long rs2) +{ + return __builtin_riscv_xperm_h(rs1, rs2); +} + +// RV64ZBP-LABEL: @xperm_w( +// RV64ZBP-NEXT: entry: +// RV64ZBP-NEXT: [[RS1_ADDR:%.*]] = alloca i64, align 8 +// RV64ZBP-NEXT: [[RS2_ADDR:%.*]] = alloca i64, align 8 +// RV64ZBP-NEXT: store i64 [[RS1:%.*]], i64* [[RS1_ADDR]], align 8 +// RV64ZBP-NEXT: store i64 [[RS2:%.*]], i64* [[RS2_ADDR]], align 8 +// RV64ZBP-NEXT: [[TMP0:%.*]] = load i64, i64* [[RS1_ADDR]], align 8 +// RV64ZBP-NEXT: [[TMP1:%.*]] = load i64, i64* [[RS2_ADDR]], align 8 +// RV64ZBP-NEXT: [[TMP2:%.*]] = call i64 @llvm.riscv.xperm.w.i64(i64 [[TMP0]], i64 [[TMP1]]) +// RV64ZBP-NEXT: ret i64 [[TMP2]] +// +long xperm_w(long rs1, long rs2) +{ + return __builtin_riscv_xperm_w(rs1, rs2); +} diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td --- a/llvm/include/llvm/IR/IntrinsicsRISCV.td +++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td @@ -89,6 +89,16 @@ def int_riscv_clmulh : BitManipGPRGPRIntrinsics; def int_riscv_clmulr : BitManipGPRGPRIntrinsics; + // Zbp + def int_riscv_grev : BitManipGPRGPRIntrinsics; + def int_riscv_gorc : BitManipGPRGPRIntrinsics; + def int_riscv_shfl : BitManipGPRGPRIntrinsics; + def int_riscv_unshfl : BitManipGPRGPRIntrinsics; + def int_riscv_xperm_n : BitManipGPRGPRIntrinsics; + def int_riscv_xperm_b : BitManipGPRGPRIntrinsics; + def int_riscv_xperm_h : BitManipGPRGPRIntrinsics; + def int_riscv_xperm_w : BitManipGPRGPRIntrinsics; + // Zbr def int_riscv_crc32_b : BitManipGPRIntrinsics; def int_riscv_crc32_h : BitManipGPRIntrinsics; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -96,6 +96,9 @@ GORC, GORCW, SHFL, + SHFLW, + UNSHFL, + UNSHFLW, // Vector Extension // VMV_V_X_VL matches the semantics of vmv.v.x but includes an extra operand // for the VL value to be used for the operation. diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -269,6 +269,7 @@ if (Subtarget.is64Bit()) { setOperationAction(ISD::BITREVERSE, MVT::i32, Custom); setOperationAction(ISD::BSWAP, MVT::i32, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom); } } else { // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll @@ -3021,6 +3022,18 @@ // Lower to the GORCI encoding for orc.b. return DAG.getNode(RISCVISD::GORC, DL, XLenVT, Op.getOperand(1), DAG.getConstant(7, DL, XLenVT)); + case Intrinsic::riscv_grev: + case Intrinsic::riscv_gorc: { + unsigned Opc = + IntNo == Intrinsic::riscv_grev ? RISCVISD::GREV : RISCVISD::GORC; + return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2)); + } + case Intrinsic::riscv_shfl: + case Intrinsic::riscv_unshfl: { + unsigned Opc = + IntNo == Intrinsic::riscv_shfl ? RISCVISD::SHFL : RISCVISD::UNSHFL; + return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2)); + } case Intrinsic::riscv_vmv_x_s: assert(Op.getValueType() == XLenVT && "Unexpected VT!"); return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(), @@ -4528,6 +4541,39 @@ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); return; } + case Intrinsic::riscv_grev: + case Intrinsic::riscv_gorc: { + assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && + Subtarget.hasStdExtZbp() && "Unexpected custom legalisation"); + SDValue NewOp1 = + DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); + SDValue NewOp2 = + DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2)); + unsigned Opc = + IntNo == Intrinsic::riscv_grev ? RISCVISD::GREVW : RISCVISD::GORCW; + SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp1, NewOp2); + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); + break; + } + case Intrinsic::riscv_shfl: + case Intrinsic::riscv_unshfl: { + assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && + Subtarget.hasStdExtZbp() && "Unexpected custom legalisation"); + SDValue NewOp1 = + DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); + SDValue NewOp2 = + DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2)); + unsigned Opc = + IntNo == Intrinsic::riscv_shfl ? RISCVISD::SHFLW : RISCVISD::UNSHFLW; + if (isa(N->getOperand(2))) { + NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2, + DAG.getConstant(0xf, DL, MVT::i64)); + Opc = IntNo == Intrinsic::riscv_shfl ? RISCVISD::SHFL : RISCVISD::UNSHFL; + } + SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp1, NewOp2); + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); + break; + } case Intrinsic::riscv_vmv_x_s: { EVT VT = N->getValueType(0); MVT XLenVT = Subtarget.getXLenVT(); @@ -5472,11 +5518,14 @@ case RISCVISD::GORCW: case RISCVISD::FSLW: case RISCVISD::FSRW: + case RISCVISD::SHFLW: + case RISCVISD::UNSHFLW: // TODO: As the result is sign-extended, this is conservatively correct. A // more precise answer could be calculated for SRAW depending on known // bits in the shift amount. return 33; - case RISCVISD::SHFL: { + case RISCVISD::SHFL: + case RISCVISD::UNSHFL: { // There is no SHFLIW, but a i64 SHFLI with bit 4 of the control word // cleared doesn't affect bit 31. The upper 32 bits will be shuffled, but // will stay within the upper 32 bits. If there were more than 32 sign bits @@ -7218,6 +7267,9 @@ NODE_NAME_CASE(GORC) NODE_NAME_CASE(GORCW) NODE_NAME_CASE(SHFL) + NODE_NAME_CASE(SHFLW) + NODE_NAME_CASE(UNSHFL) + NODE_NAME_CASE(UNSHFLW) NODE_NAME_CASE(VMV_V_X_VL) NODE_NAME_CASE(VFMV_V_F_VL) NODE_NAME_CASE(VMV_X_S) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td @@ -30,6 +30,9 @@ def riscv_gorc : SDNode<"RISCVISD::GORC", SDTIntBinOp>; def riscv_gorcw : SDNode<"RISCVISD::GORCW", SDT_RISCVIntBinOpW>; def riscv_shfl : SDNode<"RISCVISD::SHFL", SDTIntBinOp>; +def riscv_shflw : SDNode<"RISCVISD::SHFLW", SDT_RISCVIntBinOpW>; +def riscv_unshfl : SDNode<"RISCVISD::UNSHFL", SDTIntBinOp>; +def riscv_unshflw: SDNode<"RISCVISD::UNSHFLW", SDT_RISCVIntBinOpW>; def UImmLog2XLenHalfAsmOperand : AsmOperandClass { let Name = "UImmLog2XLenHalf"; @@ -627,6 +630,24 @@ (RORIW GPR:$rd, GPR:$rs1, uimm5:$shamt), 0>; } // Predicates = [HasStdExtZbbOrZbp, IsRV64] +let Predicates = [HasStdExtZbp] in { +def : InstAlias<"grev $rd, $rs1, $shamt", + (GREVI GPR:$rd, GPR:$rs1, uimmlog2xlen:$shamt), 0>; +def : InstAlias<"gorc $rd, $rs1, $shamt", + (GORCI GPR:$rd, GPR:$rs1, uimmlog2xlen:$shamt), 0>; +def : InstAlias<"shfl $rd, $rs1, $shamt", + (SHFLI GPR:$rd, GPR:$rs1, shfl_uimm:$shamt), 0>; +def : InstAlias<"unshfl $rd, $rs1, $shamt", + (UNSHFLI GPR:$rd, GPR:$rs1, shfl_uimm:$shamt), 0>; +} // Predicates = [HasStdExtZbp] + +let Predicates = [HasStdExtZbp, IsRV64] in { +def : InstAlias<"grevw $rd, $rs1, $shamt", + (GREVIW GPR:$rd, GPR:$rs1, uimm5:$shamt), 0>; +def : InstAlias<"gorcw $rd, $rs1, $shamt", + (GORCIW GPR:$rd, GPR:$rs1, uimm5:$shamt), 0>; +} // Predicates = [HasStdExtZbp, IsRV64] + let Predicates = [HasStdExtZbs] in { def : InstAlias<"bset $rd, $rs1, $shamt", (BSETI GPR:$rd, GPR:$rs1, uimmlog2xlen:$shamt), 0>; @@ -706,7 +727,16 @@ } let Predicates = [HasStdExtZbp] in { +def : PatGprGpr; +def : PatGprGpr; +def : PatGprGpr; +def : PatGprGpr; +def : PatGprGpr; +def : PatGprGpr; +def : PatGprGpr; +def : PatGprGpr; def : PatGprImm; +def : PatGprImm; def : PatGprImm; def : PatGprImm; } // Predicates = [HasStdExtZbp] @@ -863,6 +893,10 @@ let Predicates = [HasStdExtZbp, IsRV64] in { def : Pat<(riscv_rorw (riscv_grevw GPR:$rs1, 24), 16), (GREVIW GPR:$rs1, 8)>; def : Pat<(riscv_rolw (riscv_grevw GPR:$rs1, 24), 16), (GREVIW GPR:$rs1, 8)>; +def : PatGprGpr; +def : PatGprGpr; +def : PatGprGpr; +def : PatGprGpr; def : PatGprImm; def : PatGprImm; } // Predicates = [HasStdExtZbp, IsRV64] diff --git a/llvm/test/CodeGen/RISCV/rv32zbp-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv32zbp-intrinsic.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rv32zbp-intrinsic.ll @@ -0,0 +1,181 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-b -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32IB +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbp -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32IBP + +declare i32 @llvm.riscv.grev.i32(i32 %a, i32 %b) + +define i32 @grev32(i32 %a, i32 %b) nounwind { +; RV32IB-LABEL: grev32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: grev a0, a0, a1 +; RV32IB-NEXT: ret +; +; RV32IBP-LABEL: grev32: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: grev a0, a0, a1 +; RV32IBP-NEXT: ret + %tmp = call i32 @llvm.riscv.grev.i32(i32 %a, i32 %b) + ret i32 %tmp +} + +declare i32 @llvm.riscv.grevi.i32(i32 %a) + +define i32 @grevi32(i32 %a) nounwind { +; RV32IB-LABEL: grevi32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: grevi a0, a0, 13 +; RV32IB-NEXT: ret +; +; RV32IBP-LABEL: grevi32: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: grevi a0, a0, 13 +; RV32IBP-NEXT: ret + %tmp = call i32 @llvm.riscv.grev.i32(i32 %a, i32 13) + ret i32 %tmp +} + +declare i32 @llvm.riscv.gorc.i32(i32 %a, i32 %b) + +define i32 @gorc32(i32 %a, i32 %b) nounwind { +; RV32IB-LABEL: gorc32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: gorc a0, a0, a1 +; RV32IB-NEXT: ret +; +; RV32IBP-LABEL: gorc32: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: gorc a0, a0, a1 +; RV32IBP-NEXT: ret + %tmp = call i32 @llvm.riscv.gorc.i32(i32 %a, i32 %b) + ret i32 %tmp +} + +declare i32 @llvm.riscv.gorci.i32(i32 %a) + +define i32 @gorci32(i32 %a) nounwind { +; RV32IB-LABEL: gorci32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: gorci a0, a0, 13 +; RV32IB-NEXT: ret +; +; RV32IBP-LABEL: gorci32: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: gorci a0, a0, 13 +; RV32IBP-NEXT: ret + %tmp = call i32 @llvm.riscv.gorc.i32(i32 %a, i32 13) + ret i32 %tmp +} + +declare i32 @llvm.riscv.shfl.i32(i32 %a, i32 %b) + +define i32 @shfl32(i32 %a, i32 %b) nounwind { +; RV32IB-LABEL: shfl32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: shfl a0, a0, a1 +; RV32IB-NEXT: ret +; +; RV32IBP-LABEL: shfl32: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: shfl a0, a0, a1 +; RV32IBP-NEXT: ret + %tmp = call i32 @llvm.riscv.shfl.i32(i32 %a, i32 %b) + ret i32 %tmp +} + +declare i32 @llvm.riscv.shfli.i32(i32 %a) + +define i32 @shfli32(i32 %a) nounwind { +; RV32IB-LABEL: shfli32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: shfli a0, a0, 13 +; RV32IB-NEXT: ret +; +; RV32IBP-LABEL: shfli32: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: shfli a0, a0, 13 +; RV32IBP-NEXT: ret + %tmp = call i32 @llvm.riscv.shfl.i32(i32 %a, i32 13) + ret i32 %tmp +} + +declare i32 @llvm.riscv.unshfl.i32(i32 %a, i32 %b) + +define i32 @unshfl32(i32 %a, i32 %b) nounwind { +; RV32IB-LABEL: unshfl32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: unshfl a0, a0, a1 +; RV32IB-NEXT: ret +; +; RV32IBP-LABEL: unshfl32: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: unshfl a0, a0, a1 +; RV32IBP-NEXT: ret + %tmp = call i32 @llvm.riscv.unshfl.i32(i32 %a, i32 %b) + ret i32 %tmp +} + +declare i32 @llvm.riscv.unshfli.i32(i32 %a) + +define i32 @unshfli32(i32 %a) nounwind { +; RV32IB-LABEL: unshfli32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: unshfli a0, a0, 13 +; RV32IB-NEXT: ret +; +; RV32IBP-LABEL: unshfli32: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: unshfli a0, a0, 13 +; RV32IBP-NEXT: ret + %tmp = call i32 @llvm.riscv.unshfl.i32(i32 %a, i32 13) + ret i32 %tmp +} + +declare i32 @llvm.riscv.xperm.n.i32(i32 %a, i32 %b) + +define i32 @xpermn32(i32 %a, i32 %b) nounwind { +; RV32IB-LABEL: xpermn32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: xperm.n a0, a0, a1 +; RV32IB-NEXT: ret +; +; RV32IBP-LABEL: xpermn32: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: xperm.n a0, a0, a1 +; RV32IBP-NEXT: ret + %tmp = call i32 @llvm.riscv.xperm.n.i32(i32 %a, i32 %b) + ret i32 %tmp +} + +declare i32 @llvm.riscv.xperm.b.i32(i32 %a, i32 %b) + +define i32 @xpermb32(i32 %a, i32 %b) nounwind { +; RV32IB-LABEL: xpermb32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: xperm.b a0, a0, a1 +; RV32IB-NEXT: ret +; +; RV32IBP-LABEL: xpermb32: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: xperm.b a0, a0, a1 +; RV32IBP-NEXT: ret + %tmp = call i32 @llvm.riscv.xperm.b.i32(i32 %a, i32 %b) + ret i32 %tmp +} + +declare i32 @llvm.riscv.xperm.h.i32(i32 %a, i32 %b) + +define i32 @xpermh32(i32 %a, i32 %b) nounwind { +; RV32IB-LABEL: xpermh32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: xperm.h a0, a0, a1 +; RV32IB-NEXT: ret +; +; RV32IBP-LABEL: xpermh32: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: xperm.h a0, a0, a1 +; RV32IBP-NEXT: ret + %tmp = call i32 @llvm.riscv.xperm.h.i32(i32 %a, i32 %b) + ret i32 %tmp +} diff --git a/llvm/test/CodeGen/RISCV/rv64zbp-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv64zbp-intrinsic.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rv64zbp-intrinsic.ll @@ -0,0 +1,325 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+experimental-b -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64IB +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zbp -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64IBP + +declare i32 @llvm.riscv.grev.i32(i32 %a, i32 %b) + +define signext i32 @grev32(i32 signext %a, i32 signext %b) nounwind { +; RV64IB-LABEL: grev32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: grevw a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: grev32: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: grevw a0, a0, a1 +; RV64IBP-NEXT: ret + %tmp = call i32 @llvm.riscv.grev.i32(i32 %a, i32 %b) + ret i32 %tmp +} + +declare i32 @llvm.riscv.grevi.i32(i32 %a) + +define signext i32 @grevi32(i32 signext %a) nounwind { +; RV64IB-LABEL: grevi32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: greviw a0, a0, 13 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: grevi32: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: greviw a0, a0, 13 +; RV64IBP-NEXT: ret + %tmp = call i32 @llvm.riscv.grev.i32(i32 %a, i32 13) + ret i32 %tmp +} + +declare i32 @llvm.riscv.gorc.i32(i32 %a, i32 %b) + +define signext i32 @gorc32(i32 signext %a, i32 signext %b) nounwind { +; RV64IB-LABEL: gorc32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: gorcw a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: gorc32: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: gorcw a0, a0, a1 +; RV64IBP-NEXT: ret + %tmp = call i32 @llvm.riscv.gorc.i32(i32 %a, i32 %b) + ret i32 %tmp +} + +declare i32 @llvm.riscv.gorci.i32(i32 %a) + +define signext i32 @gorci32(i32 signext %a) nounwind { +; RV64IB-LABEL: gorci32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: gorciw a0, a0, 13 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: gorci32: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: gorciw a0, a0, 13 +; RV64IBP-NEXT: ret + %tmp = call i32 @llvm.riscv.gorc.i32(i32 %a, i32 13) + ret i32 %tmp +} + +declare i32 @llvm.riscv.shfl.i32(i32 %a, i32 %b) + +define signext i32 @shfl32(i32 signext %a, i32 signext %b) nounwind { +; RV64IB-LABEL: shfl32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: shflw a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: shfl32: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: shflw a0, a0, a1 +; RV64IBP-NEXT: ret + %tmp = call i32 @llvm.riscv.shfl.i32(i32 %a, i32 %b) + ret i32 %tmp +} + +declare i32 @llvm.riscv.shfli.i32(i32 %a) + +define signext i32 @shfli32(i32 signext %a) nounwind { +; RV64IB-LABEL: shfli32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: shfli a0, a0, 13 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: shfli32: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: shfli a0, a0, 13 +; RV64IBP-NEXT: ret + %tmp = call i32 @llvm.riscv.shfl.i32(i32 %a, i32 13) + ret i32 %tmp +} + +declare i32 @llvm.riscv.unshfl.i32(i32 %a, i32 %b) + +define signext i32 @unshfl32(i32 signext %a, i32 signext %b) nounwind { +; RV64IB-LABEL: unshfl32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: unshflw a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: unshfl32: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: unshflw a0, a0, a1 +; RV64IBP-NEXT: ret + %tmp = call i32 @llvm.riscv.unshfl.i32(i32 %a, i32 %b) + ret i32 %tmp +} + +declare i32 @llvm.riscv.unshfli.i32(i32 %a) + +define signext i32 @unshfli32(i32 signext %a) nounwind { +; RV64IB-LABEL: unshfli32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: unshfli a0, a0, 13 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: unshfli32: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: unshfli a0, a0, 13 +; RV64IBP-NEXT: ret + %tmp = call i32 @llvm.riscv.unshfl.i32(i32 %a, i32 13) + ret i32 %tmp +} + +declare i64 @llvm.riscv.grev.i64(i64 %a, i64 %b) + +define i64 @grev64(i64 %a, i64 %b) nounwind { +; RV64IB-LABEL: grev64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: grev a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: grev64: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: grev a0, a0, a1 +; RV64IBP-NEXT: ret + %tmp = call i64 @llvm.riscv.grev.i64(i64 %a, i64 %b) + ret i64 %tmp +} + +declare i64 @llvm.riscv.grevi.i64(i64 %a) + +define i64 @grevi64(i64 %a) nounwind { +; RV64IB-LABEL: grevi64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: grevi a0, a0, 13 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: grevi64: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: grevi a0, a0, 13 +; RV64IBP-NEXT: ret + %tmp = call i64 @llvm.riscv.grev.i64(i64 %a, i64 13) + ret i64 %tmp +} + +declare i64 @llvm.riscv.gorc.i64(i64 %a, i64 %b) + +define i64 @gorc64(i64 %a, i64 %b) nounwind { +; RV64IB-LABEL: gorc64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: gorc a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: gorc64: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: gorc a0, a0, a1 +; RV64IBP-NEXT: ret + %tmp = call i64 @llvm.riscv.gorc.i64(i64 %a, i64 %b) + ret i64 %tmp +} + +declare i64 @llvm.riscv.gorci.i64(i64 %a) + +define i64 @gorci64(i64 %a) nounwind { +; RV64IB-LABEL: gorci64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: gorci a0, a0, 13 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: gorci64: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: gorci a0, a0, 13 +; RV64IBP-NEXT: ret + %tmp = call i64 @llvm.riscv.gorc.i64(i64 %a, i64 13) + ret i64 %tmp +} + +declare i64 @llvm.riscv.shfl.i64(i64 %a, i64 %b) + +define i64 @shfl64(i64 %a, i64 %b) nounwind { +; RV64IB-LABEL: shfl64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: shfl a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: shfl64: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: shfl a0, a0, a1 +; RV64IBP-NEXT: ret + %tmp = call i64 @llvm.riscv.shfl.i64(i64 %a, i64 %b) + ret i64 %tmp +} + +declare i64 @llvm.riscv.shfli.i64(i64 %a) + +define i64 @shfli64(i64 %a) nounwind { +; RV64IB-LABEL: shfli64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: shfli a0, a0, 13 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: shfli64: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: shfli a0, a0, 13 +; RV64IBP-NEXT: ret + %tmp = call i64 @llvm.riscv.shfl.i64(i64 %a, i64 13) + ret i64 %tmp +} + +declare i64 @llvm.riscv.unshfl.i64(i64 %a, i64 %b) + +define i64 @unshfl64(i64 %a, i64 %b) nounwind { +; RV64IB-LABEL: unshfl64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: unshfl a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: unshfl64: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: unshfl a0, a0, a1 +; RV64IBP-NEXT: ret + %tmp = call i64 @llvm.riscv.unshfl.i64(i64 %a, i64 %b) + ret i64 %tmp +} + +declare i64 @llvm.riscv.unshfli.i64(i64 %a) + +define i64 @unshfli64(i64 %a) nounwind { +; RV64IB-LABEL: unshfli64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: unshfli a0, a0, 13 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: unshfli64: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: unshfli a0, a0, 13 +; RV64IBP-NEXT: ret + %tmp = call i64 @llvm.riscv.unshfl.i64(i64 %a, i64 13) + ret i64 %tmp +} + +declare i64 @llvm.riscv.xperm.n.i64(i64 %a, i64 %b) + +define i64 @xpermn64(i64 %a, i64 %b) nounwind { +; RV64IB-LABEL: xpermn64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: xperm.n a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: xpermn64: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: xperm.n a0, a0, a1 +; RV64IBP-NEXT: ret + %tmp = call i64 @llvm.riscv.xperm.n.i64(i64 %a, i64 %b) + ret i64 %tmp +} + +declare i64 @llvm.riscv.xperm.b.i64(i64 %a, i64 %b) + +define i64 @xpermb64(i64 %a, i64 %b) nounwind { +; RV64IB-LABEL: xpermb64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: xperm.b a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: xpermb64: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: xperm.b a0, a0, a1 +; RV64IBP-NEXT: ret + %tmp = call i64 @llvm.riscv.xperm.b.i64(i64 %a, i64 %b) + ret i64 %tmp +} + +declare i64 @llvm.riscv.xperm.h.i64(i64 %a, i64 %b) + +define i64 @xpermh64(i64 %a, i64 %b) nounwind { +; RV64IB-LABEL: xpermh64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: xperm.h a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: xpermh64: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: xperm.h a0, a0, a1 +; RV64IBP-NEXT: ret + %tmp = call i64 @llvm.riscv.xperm.h.i64(i64 %a, i64 %b) + ret i64 %tmp +} + +declare i64 @llvm.riscv.xperm.w.i64(i64 %a, i64 %b) + +define i64 @xpermw64(i64 %a, i64 %b) nounwind { +; RV64IB-LABEL: xpermw64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: xperm.w a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: xpermw64: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: xperm.w a0, a0, a1 +; RV64IBP-NEXT: ret + %tmp = call i64 @llvm.riscv.xperm.w.i64(i64 %a, i64 %b) + ret i64 %tmp +} diff --git a/llvm/test/MC/RISCV/rv32b-aliases-valid.s b/llvm/test/MC/RISCV/rv32b-aliases-valid.s --- a/llvm/test/MC/RISCV/rv32b-aliases-valid.s +++ b/llvm/test/MC/RISCV/rv32b-aliases-valid.s @@ -242,3 +242,19 @@ # CHECK-S-OBJ-NOALIAS: bexti t0, t1, 8 # CHECK-S-OBJ: bexti t0, t1, 8 bext x5, x6, 8 + +# CHECK-S-OBJ-NOALIAS: grevi t0, t1, 13 +# CHECK-S-OBJ: grevi t0, t1, 13 +grev x5, x6, 13 + +# CHECK-S-OBJ-NOALIAS: gorci t0, t1, 13 +# CHECK-S-OBJ: gorci t0, t1, 13 +gorc x5, x6, 13 + +# CHECK-S-OBJ-NOALIAS: shfli t0, t1, 13 +# CHECK-S-OBJ: shfli t0, t1, 13 +shfl x5, x6, 13 + +# CHECK-S-OBJ-NOALIAS: unshfli t0, t1, 13 +# CHECK-S-OBJ: unshfli t0, t1, 13 +unshfl x5, x6, 13 diff --git a/llvm/test/MC/RISCV/rv64b-aliases-valid.s b/llvm/test/MC/RISCV/rv64b-aliases-valid.s --- a/llvm/test/MC/RISCV/rv64b-aliases-valid.s +++ b/llvm/test/MC/RISCV/rv64b-aliases-valid.s @@ -338,3 +338,27 @@ # CHECK-S-OBJ-NOALIAS: bexti t0, t1, 8 # CHECK-S-OBJ: bexti t0, t1, 8 bext x5, x6, 8 + +# CHECK-S-OBJ-NOALIAS: grevi t0, t1, 13 +# CHECK-S-OBJ: grevi t0, t1, 13 +grev x5, x6, 13 + +# CHECK-S-OBJ-NOALIAS: gorci t0, t1, 13 +# CHECK-S-OBJ: gorci t0, t1, 13 +gorc x5, x6, 13 + +# CHECK-S-OBJ-NOALIAS: shfli t0, t1, 13 +# CHECK-S-OBJ: shfli t0, t1, 13 +shfl x5, x6, 13 + +# CHECK-S-OBJ-NOALIAS: unshfli t0, t1, 13 +# CHECK-S-OBJ: unshfli t0, t1, 13 +unshfl x5, x6, 13 + +# CHECK-S-OBJ-NOALIAS: greviw t0, t1, 13 +# CHECK-S-OBJ: greviw t0, t1, 13 +grevw x5, x6, 13 + +# CHECK-S-OBJ-NOALIAS: gorciw t0, t1, 13 +# CHECK-S-OBJ: gorciw t0, t1, 13 +gorcw x5, x6, 13