diff --git a/clang/include/clang/Basic/BuiltinsRISCV.def b/clang/include/clang/Basic/BuiltinsRISCV.def --- a/clang/include/clang/Basic/BuiltinsRISCV.def +++ b/clang/include/clang/Basic/BuiltinsRISCV.def @@ -26,6 +26,15 @@ TARGET_BUILTIN(__builtin_riscv_clmulh, "LiLiLi", "nc", "experimental-zbc") TARGET_BUILTIN(__builtin_riscv_clmulr, "LiLiLi", "nc", "experimental-zbc") +// Zbe extension +TARGET_BUILTIN(__builtin_riscv_bcompress_32, "ZiZiZi", "nc", "experimental-zbe") +TARGET_BUILTIN(__builtin_riscv_bcompress_64, "WiWiWi", "nc", + "experimental-zbe,64bit") +TARGET_BUILTIN(__builtin_riscv_bdecompress_32, "ZiZiZi", "nc", + "experimental-zbe") +TARGET_BUILTIN(__builtin_riscv_bdecompress_64, "WiWiWi", "nc", + "experimental-zbe,64bit") + // Zbp extension TARGET_BUILTIN(__builtin_riscv_grev_32, "ZiZiZi", "nc", "experimental-zbp") TARGET_BUILTIN(__builtin_riscv_grev_64, "WiWiWi", "nc", "experimental-zbp,64bit") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -17849,6 +17849,10 @@ case RISCV::BI__builtin_riscv_clmul: case RISCV::BI__builtin_riscv_clmulh: case RISCV::BI__builtin_riscv_clmulr: + case RISCV::BI__builtin_riscv_bcompress_32: + case RISCV::BI__builtin_riscv_bcompress_64: + case RISCV::BI__builtin_riscv_bdecompress_32: + case RISCV::BI__builtin_riscv_bdecompress_64: case RISCV::BI__builtin_riscv_grev_32: case RISCV::BI__builtin_riscv_grev_64: case RISCV::BI__builtin_riscv_gorc_32: @@ -17888,6 +17892,16 @@ ID = Intrinsic::riscv_clmulr; break; + // Zbe + case RISCV::BI__builtin_riscv_bcompress_32: + case RISCV::BI__builtin_riscv_bcompress_64: + ID = Intrinsic::riscv_bcompress; + break; + case RISCV::BI__builtin_riscv_bdecompress_32: + case RISCV::BI__builtin_riscv_bdecompress_64: + ID = Intrinsic::riscv_bdecompress; + break; + // Zbp case RISCV::BI__builtin_riscv_grev_32: case RISCV::BI__builtin_riscv_grev_64: diff --git a/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv32-zbe.c b/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv32-zbe.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv32-zbe.c @@ -0,0 +1,35 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple riscv32 -target-feature +experimental-zbe -emit-llvm %s -o - \ +// RUN: | FileCheck %s -check-prefix=RV32ZBE + +// RV32ZBE-LABEL: @bcompress( +// RV32ZBE-NEXT: entry: +// RV32ZBE-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 +// RV32ZBE-NEXT: [[RS2_ADDR:%.*]] = alloca i32, align 4 +// RV32ZBE-NEXT: store i32 [[RS1:%.*]], i32* [[RS1_ADDR]], align 4 +// RV32ZBE-NEXT: store i32 [[RS2:%.*]], i32* [[RS2_ADDR]], align 4 +// RV32ZBE-NEXT: [[TMP0:%.*]] = load i32, i32* [[RS1_ADDR]], align 4 +// RV32ZBE-NEXT: [[TMP1:%.*]] = load i32, i32* [[RS2_ADDR]], align 4 +// RV32ZBE-NEXT: [[TMP2:%.*]] = call i32 @llvm.riscv.bcompress.i32(i32 [[TMP0]], i32 [[TMP1]]) +// RV32ZBE-NEXT: ret i32 [[TMP2]] +// +long bcompress(long rs1, long rs2) +{ + return __builtin_riscv_bcompress_32(rs1, rs2); +} + +// RV32ZBE-LABEL: @bdecompress( +// RV32ZBE-NEXT: entry: +// RV32ZBE-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 +// RV32ZBE-NEXT: [[RS2_ADDR:%.*]] = alloca i32, align 4 +// RV32ZBE-NEXT: store i32 [[RS1:%.*]], i32* [[RS1_ADDR]], align 4 +// RV32ZBE-NEXT: store i32 [[RS2:%.*]], i32* [[RS2_ADDR]], align 4 +// RV32ZBE-NEXT: [[TMP0:%.*]] = load i32, i32* [[RS1_ADDR]], align 4 +// RV32ZBE-NEXT: [[TMP1:%.*]] = load i32, i32* [[RS2_ADDR]], align 4 +// RV32ZBE-NEXT: [[TMP2:%.*]] = call i32 @llvm.riscv.bdecompress.i32(i32 [[TMP0]], i32 [[TMP1]]) +// RV32ZBE-NEXT: ret i32 [[TMP2]] +// +long bdecompress(long rs1, long rs2) +{ + return __builtin_riscv_bdecompress_32(rs1, rs2); +} diff --git a/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv64-zbe.c b/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv64-zbe.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv64-zbe.c @@ -0,0 +1,67 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-zbe -emit-llvm %s -o - \ +// RUN: | FileCheck %s -check-prefix=RV64ZBE + +// RV64ZBE-LABEL: @bcompressw( +// RV64ZBE-NEXT: entry: +// RV64ZBE-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 +// RV64ZBE-NEXT: [[RS2_ADDR:%.*]] = alloca i32, align 4 +// RV64ZBE-NEXT: store i32 [[RS1:%.*]], i32* [[RS1_ADDR]], align 4 +// RV64ZBE-NEXT: store i32 [[RS2:%.*]], i32* [[RS2_ADDR]], align 4 +// RV64ZBE-NEXT: [[TMP0:%.*]] = load i32, i32* [[RS1_ADDR]], align 4 +// RV64ZBE-NEXT: [[TMP1:%.*]] = load i32, i32* [[RS2_ADDR]], align 4 +// RV64ZBE-NEXT: [[TMP2:%.*]] = call i32 @llvm.riscv.bcompress.i32(i32 [[TMP0]], i32 [[TMP1]]) +// RV64ZBE-NEXT: ret i32 [[TMP2]] +// +int bcompressw(int rs1, int rs2) +{ + return __builtin_riscv_bcompress_32(rs1, rs2); +} + +// RV64ZBE-LABEL: @bdecompressw( +// RV64ZBE-NEXT: entry: +// RV64ZBE-NEXT: [[RS1_ADDR:%.*]] = alloca i32, align 4 +// RV64ZBE-NEXT: [[RS2_ADDR:%.*]] = alloca i32, align 4 +// RV64ZBE-NEXT: store i32 [[RS1:%.*]], i32* [[RS1_ADDR]], align 4 +// RV64ZBE-NEXT: store i32 [[RS2:%.*]], i32* [[RS2_ADDR]], align 4 +// RV64ZBE-NEXT: [[TMP0:%.*]] = load i32, i32* [[RS1_ADDR]], align 4 +// RV64ZBE-NEXT: [[TMP1:%.*]] = load i32, i32* [[RS2_ADDR]], align 4 +// RV64ZBE-NEXT: [[TMP2:%.*]] = call i32 @llvm.riscv.bdecompress.i32(i32 [[TMP0]], i32 [[TMP1]]) +// RV64ZBE-NEXT: ret i32 [[TMP2]] +// +int bdecompressw(int rs1, int rs2) +{ + return __builtin_riscv_bdecompress_32(rs1, rs2); +} + +// RV64ZBE-LABEL: @bcompress( +// RV64ZBE-NEXT: entry: +// RV64ZBE-NEXT: [[RS1_ADDR:%.*]] = alloca i64, align 8 +// RV64ZBE-NEXT: [[RS2_ADDR:%.*]] = alloca i64, align 8 +// RV64ZBE-NEXT: store i64 [[RS1:%.*]], i64* [[RS1_ADDR]], align 8 +// RV64ZBE-NEXT: store i64 [[RS2:%.*]], i64* [[RS2_ADDR]], align 8 +// RV64ZBE-NEXT: [[TMP0:%.*]] = load i64, i64* [[RS1_ADDR]], align 8 +// RV64ZBE-NEXT: [[TMP1:%.*]] = load i64, i64* [[RS2_ADDR]], align 8 +// RV64ZBE-NEXT: [[TMP2:%.*]] = call i64 @llvm.riscv.bcompress.i64(i64 [[TMP0]], i64 [[TMP1]]) +// RV64ZBE-NEXT: ret i64 [[TMP2]] +// +long bcompress(long rs1, long rs2) +{ + return __builtin_riscv_bcompress_64(rs1, rs2); +} + +// RV64ZBE-LABEL: @bdecompress( +// RV64ZBE-NEXT: entry: +// RV64ZBE-NEXT: [[RS1_ADDR:%.*]] = alloca i64, align 8 +// RV64ZBE-NEXT: [[RS2_ADDR:%.*]] = alloca i64, align 8 +// RV64ZBE-NEXT: store i64 [[RS1:%.*]], i64* [[RS1_ADDR]], align 8 +// RV64ZBE-NEXT: store i64 [[RS2:%.*]], i64* [[RS2_ADDR]], align 8 +// RV64ZBE-NEXT: [[TMP0:%.*]] = load i64, i64* [[RS1_ADDR]], align 8 +// RV64ZBE-NEXT: [[TMP1:%.*]] = load i64, i64* [[RS2_ADDR]], align 8 +// RV64ZBE-NEXT: [[TMP2:%.*]] = call i64 @llvm.riscv.bdecompress.i64(i64 [[TMP0]], i64 [[TMP1]]) +// RV64ZBE-NEXT: ret i64 [[TMP2]] +// +long bdecompress(long rs1, long rs2) +{ + return __builtin_riscv_bdecompress_64(rs1, rs2); +} diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td --- a/llvm/include/llvm/IR/IntrinsicsRISCV.td +++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td @@ -89,6 +89,10 @@ def int_riscv_clmulh : BitManipGPRGPRIntrinsics; def int_riscv_clmulr : BitManipGPRGPRIntrinsics; + // Zbe + def int_riscv_bcompress : BitManipGPRGPRIntrinsics; + def int_riscv_bdecompress : BitManipGPRGPRIntrinsics; + // Zbp def int_riscv_grev : BitManipGPRGPRIntrinsics; def int_riscv_gorc : BitManipGPRGPRIntrinsics; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -99,6 +99,14 @@ SHFLW, UNSHFL, UNSHFLW, + // Bit Compress/Decompress implement the generic bit extract and bit deposit + // functions. This operation is also referred to as bit gather/scatter, bit + // pack/unpack, parallel extract/deposit, compress/expand, or right + // compress/right expand. + BCOMPRESS, + BCOMPRESSW, + BDECOMPRESS, + BDECOMPRESSW, // Vector Extension // VMV_V_X_VL matches the semantics of vmv.v.x but includes an extra operand // for the VL value to be used for the operation. diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -201,6 +201,9 @@ if (Subtarget.hasStdExtZbb() && Subtarget.is64Bit()) setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom); + if (Subtarget.hasStdExtZbe() && Subtarget.is64Bit()) + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom); + if (Subtarget.is64Bit()) { setOperationAction(ISD::ADD, MVT::i32, Custom); setOperationAction(ISD::SUB, MVT::i32, Custom); @@ -3097,6 +3100,12 @@ IntNo == Intrinsic::riscv_shfl ? RISCVISD::SHFL : RISCVISD::UNSHFL; return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2)); } + case Intrinsic::riscv_bcompress: + case Intrinsic::riscv_bdecompress: { + unsigned Opc = IntNo == Intrinsic::riscv_bcompress ? RISCVISD::BCOMPRESS + : RISCVISD::BDECOMPRESS; + return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2)); + } case Intrinsic::riscv_vmv_x_s: assert(Op.getValueType() == XLenVT && "Unexpected VT!"); return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(), @@ -4700,6 +4709,21 @@ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); break; } + case Intrinsic::riscv_bcompress: + case Intrinsic::riscv_bdecompress: { + assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && + "Unexpected custom legalisation"); + SDValue NewOp1 = + DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); + SDValue NewOp2 = + DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2)); + unsigned Opc = IntNo == Intrinsic::riscv_bcompress + ? RISCVISD::BCOMPRESSW + : RISCVISD::BDECOMPRESSW; + SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp1, NewOp2); + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); + break; + } case Intrinsic::riscv_vmv_x_s: { EVT VT = N->getValueType(0); MVT XLenVT = Subtarget.getXLenVT(); @@ -5655,6 +5679,8 @@ case RISCVISD::FSRW: case RISCVISD::SHFLW: case RISCVISD::UNSHFLW: + case RISCVISD::BCOMPRESSW: + case RISCVISD::BDECOMPRESSW: // TODO: As the result is sign-extended, this is conservatively correct. A // more precise answer could be calculated for SRAW depending on known // bits in the shift amount. @@ -7405,6 +7431,10 @@ NODE_NAME_CASE(SHFLW) NODE_NAME_CASE(UNSHFL) NODE_NAME_CASE(UNSHFLW) + NODE_NAME_CASE(BCOMPRESS) + NODE_NAME_CASE(BCOMPRESSW) + NODE_NAME_CASE(BDECOMPRESS) + NODE_NAME_CASE(BDECOMPRESSW) NODE_NAME_CASE(VMV_V_X_VL) NODE_NAME_CASE(VFMV_V_F_VL) NODE_NAME_CASE(VMV_X_S) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td @@ -33,6 +33,10 @@ def riscv_shflw : SDNode<"RISCVISD::SHFLW", SDT_RISCVIntBinOpW>; def riscv_unshfl : SDNode<"RISCVISD::UNSHFL", SDTIntBinOp>; def riscv_unshflw: SDNode<"RISCVISD::UNSHFLW",SDT_RISCVIntBinOpW>; +def riscv_bcompress : SDNode<"RISCVISD::BCOMPRESS", SDTIntBinOp>; +def riscv_bcompressw : SDNode<"RISCVISD::BCOMPRESSW", SDT_RISCVIntBinOpW>; +def riscv_bdecompress : SDNode<"RISCVISD::BDECOMPRESS", SDTIntBinOp>; +def riscv_bdecompressw : SDNode<"RISCVISD::BDECOMPRESSW", SDT_RISCVIntBinOpW>; def UImmLog2XLenHalfAsmOperand : AsmOperandClass { let Name = "UImmLog2XLenHalf"; @@ -934,6 +938,16 @@ def : PatGprGpr; } // Predicates = [HasStdExtZbc] +let Predicates = [HasStdExtZbe] in { +def : PatGprGpr; +def : PatGprGpr; +} // Predicates = [HasStdExtZbe] + +let Predicates = [HasStdExtZbe, IsRV64] in { +def : PatGprGpr; +def : PatGprGpr; +} // Predicates = [HasStdExtZbe, IsRV64] + let Predicates = [HasStdExtZbr] in { def : PatGpr; def : PatGpr; diff --git a/llvm/test/CodeGen/RISCV/rv32zbe-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv32zbe-intrinsic.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rv32zbe-intrinsic.ll @@ -0,0 +1,37 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-b -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32IB +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbe -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32IBE + +declare i32 @llvm.riscv.bcompress.i32(i32 %a, i32 %b) + +define i32 @bcompress32(i32 %a, i32 %b) nounwind { +; RV32IB-LABEL: bcompress32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: bcompress a0, a0, a1 +; RV32IB-NEXT: ret +; +; RV32IBE-LABEL: bcompress32: +; RV32IBE: # %bb.0: +; RV32IBE-NEXT: bcompress a0, a0, a1 +; RV32IBE-NEXT: ret + %tmp = call i32 @llvm.riscv.bcompress.i32(i32 %a, i32 %b) + ret i32 %tmp +} + +declare i32 @llvm.riscv.bdecompress.i32(i32 %a, i32 %b) + +define i32 @bdecompress32(i32 %a, i32 %b) nounwind { +; RV32IB-LABEL: bdecompress32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: bdecompress a0, a0, a1 +; RV32IB-NEXT: ret +; +; RV32IBE-LABEL: bdecompress32: +; RV32IBE: # %bb.0: +; RV32IBE-NEXT: bdecompress a0, a0, a1 +; RV32IBE-NEXT: ret + %tmp = call i32 @llvm.riscv.bdecompress.i32(i32 %a, i32 %b) + ret i32 %tmp +} diff --git a/llvm/test/CodeGen/RISCV/rv64zbe-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv64zbe-intrinsic.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rv64zbe-intrinsic.ll @@ -0,0 +1,69 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+experimental-b -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64IB +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zbe -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64IBE + +declare i32 @llvm.riscv.bcompress.i32(i32 %a, i32 %b) + +define signext i32 @bcompress32(i32 signext %a, i32 signext %b) nounwind { +; RV64IB-LABEL: bcompress32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: bcompressw a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBE-LABEL: bcompress32: +; RV64IBE: # %bb.0: +; RV64IBE-NEXT: bcompressw a0, a0, a1 +; RV64IBE-NEXT: ret + %tmp = call i32 @llvm.riscv.bcompress.i32(i32 %a, i32 %b) + ret i32 %tmp +} + +declare i32 @llvm.riscv.bdecompress.i32(i32 %a, i32 %b) + +define signext i32 @bdecompress32(i32 signext %a, i32 signext %b) nounwind { +; RV64IB-LABEL: bdecompress32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: bdecompressw a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBE-LABEL: bdecompress32: +; RV64IBE: # %bb.0: +; RV64IBE-NEXT: bdecompressw a0, a0, a1 +; RV64IBE-NEXT: ret + %tmp = call i32 @llvm.riscv.bdecompress.i32(i32 %a, i32 %b) + ret i32 %tmp +} + +declare i64 @llvm.riscv.bcompress.i64(i64 %a, i64 %b) + +define i64 @bcompress64(i64 %a, i64 %b) nounwind { +; RV64IB-LABEL: bcompress64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: bcompress a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBE-LABEL: bcompress64: +; RV64IBE: # %bb.0: +; RV64IBE-NEXT: bcompress a0, a0, a1 +; RV64IBE-NEXT: ret + %tmp = call i64 @llvm.riscv.bcompress.i64(i64 %a, i64 %b) + ret i64 %tmp +} + +declare i64 @llvm.riscv.bdecompress.i64(i64 %a, i64 %b) + +define i64 @bdecompress64(i64 %a, i64 %b) nounwind { +; RV64IB-LABEL: bdecompress64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: bdecompress a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBE-LABEL: bdecompress64: +; RV64IBE: # %bb.0: +; RV64IBE-NEXT: bdecompress a0, a0, a1 +; RV64IBE-NEXT: ret + %tmp = call i64 @llvm.riscv.bdecompress.i64(i64 %a, i64 %b) + ret i64 %tmp +}