diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -108,6 +108,8 @@ BCOMPRESSW, BDECOMPRESS, BDECOMPRESSW, + // Packed SIMD Extension + BITREV, // Vector Extension // VMV_V_X_VL matches the semantics of vmv.v.x but includes an extra operand // for the VL value to be used for the operation. diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -839,6 +839,7 @@ } setOperationAction(ISD::BSWAP, XLenVT, Legal); + setOperationAction(ISD::BITREVERSE, XLenVT, Custom); // Expand all truncating stores and extending loads. for (MVT VT0 : MVT::vector_valuetypes()) { @@ -2021,11 +2022,16 @@ case ISD::BSWAP: case ISD::BITREVERSE: { // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining. - assert(Subtarget.hasStdExtZbp() && "Unexpected custom legalisation"); + assert((Subtarget.hasStdExtZbp() || Subtarget.hasStdExtP()) && + "Unexpected custom legalisation"); MVT VT = Op.getSimpleValueType(); SDLoc DL(Op); // Start with the maximum immediate value which is the bitwidth - 1. unsigned Imm = VT.getSizeInBits() - 1; + // If RVP is enabled, lower BITREVERSE to customized BITREV node. + if (Subtarget.hasStdExtP() && Op.getOpcode() == ISD::BITREVERSE) + return DAG.getNode(RISCVISD::BITREV, DL, VT, Op.getOperand(0), + DAG.getConstant(Imm, DL, VT)); // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits. if (Op.getOpcode() == ISD::BSWAP) Imm &= ~0x7U; @@ -7951,6 +7957,7 @@ NODE_NAME_CASE(BCOMPRESSW) NODE_NAME_CASE(BDECOMPRESS) NODE_NAME_CASE(BDECOMPRESSW) + NODE_NAME_CASE(BITREV) NODE_NAME_CASE(VMV_V_X_VL) NODE_NAME_CASE(VFMV_V_F_VL) NODE_NAME_CASE(VMV_X_S) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td @@ -17,6 +17,8 @@ // Operand and SDNode transformation definitions. //===----------------------------------------------------------------------===// +def riscv_bitrev : SDNode<"RISCVISD::BITREV", SDT_RISCVIntBinOp>; + def uimm3 : Operand, ImmLeaf(Imm);}]> { let ParserMatchClass = UImmAsmOperand<3>; let DecoderMethod = "decodeUImmOperand<3>"; @@ -1268,3 +1270,8 @@ let Predicates = [HasStdExtZpn, IsRV64] in def : Pat<(bswap i64:$rs1), (PKBT32 (SWAP16 (SWAP8 GPR:$rs1)), (SWAP16 (SWAP8 GPR:$rs1)))>; + +// bitreverse +let Predicates = [HasStdExtZpn] in +def : Pat<(riscv_bitrev GPR:$rs1, uimmlog2xlen:$imm), + (BITREVI GPR:$rs1, uimmlog2xlen:$imm)>; diff --git a/llvm/test/CodeGen/RISCV/rvp/bitreverse.ll b/llvm/test/CodeGen/RISCV/rvp/bitreverse.ll --- a/llvm/test/CodeGen/RISCV/rvp/bitreverse.ll +++ b/llvm/test/CodeGen/RISCV/rvp/bitreverse.ll @@ -12,40 +12,14 @@ define i8 @bitreverse8(i8 %a) nounwind { ; RV32-LABEL: bitreverse8: ; RV32: # %bb.0: -; RV32-NEXT: andi a1, a0, 15 -; RV32-NEXT: slli a1, a1, 4 -; RV32-NEXT: andi a0, a0, 240 -; RV32-NEXT: srli a0, a0, 4 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: andi a1, a0, 51 -; RV32-NEXT: slli a1, a1, 2 -; RV32-NEXT: andi a0, a0, 204 -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: andi a1, a0, 85 -; RV32-NEXT: slli a1, a1, 1 -; RV32-NEXT: andi a0, a0, 170 -; RV32-NEXT: srli a0, a0, 1 -; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: bitrevi a0, a0, 31 +; RV32-NEXT: srli a0, a0, 24 ; RV32-NEXT: ret ; ; RV64-LABEL: bitreverse8: ; RV64: # %bb.0: -; RV64-NEXT: andi a1, a0, 15 -; RV64-NEXT: slli a1, a1, 4 -; RV64-NEXT: andi a0, a0, 240 -; RV64-NEXT: srli a0, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: andi a1, a0, 51 -; RV64-NEXT: slli a1, a1, 2 -; RV64-NEXT: andi a0, a0, 204 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: andi a1, a0, 85 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: andi a0, a0, 170 -; RV64-NEXT: srli a0, a0, 1 -; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: bitrevi a0, a0, 63 +; RV64-NEXT: srli a0, a0, 56 ; RV64-NEXT: ret %ret = call i8 @llvm.bitreverse.i8(i8 %a) ret i8 %ret @@ -54,65 +28,14 @@ define i16 @bitreverse16(i16 %a) nounwind { ; RV32-LABEL: bitreverse16: ; RV32: # %bb.0: -; RV32-NEXT: swap8 a0, a0 -; RV32-NEXT: swap16 a0, a0 -; RV32-NEXT: srli a1, a0, 12 -; RV32-NEXT: lui a2, 15 -; RV32-NEXT: addi a2, a2, 240 -; RV32-NEXT: and a1, a1, a2 -; RV32-NEXT: srli a0, a0, 20 -; RV32-NEXT: andi a0, a0, -241 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: lui a1, 3 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: and a1, a0, a1 -; RV32-NEXT: slli a1, a1, 2 -; RV32-NEXT: lui a2, 13 -; RV32-NEXT: addi a2, a2, -820 -; RV32-NEXT: and a0, a0, a2 -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: lui a1, 5 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: and a1, a0, a1 -; RV32-NEXT: slli a1, a1, 1 -; RV32-NEXT: lui a2, 11 -; RV32-NEXT: addi a2, a2, -1366 -; RV32-NEXT: and a0, a0, a2 -; RV32-NEXT: srli a0, a0, 1 -; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: bitrevi a0, a0, 31 +; RV32-NEXT: srli a0, a0, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: bitreverse16: ; RV64: # %bb.0: -; RV64-NEXT: swap8 a0, a0 -; RV64-NEXT: swap16 a0, a0 -; RV64-NEXT: pkbt32 a0, a0, a0 -; RV64-NEXT: srli a1, a0, 44 -; RV64-NEXT: lui a2, 15 -; RV64-NEXT: addiw a2, a2, 240 -; RV64-NEXT: and a1, a1, a2 -; RV64-NEXT: srli a0, a0, 52 -; RV64-NEXT: andi a0, a0, -241 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: lui a1, 3 -; RV64-NEXT: addiw a1, a1, 819 -; RV64-NEXT: and a1, a0, a1 -; RV64-NEXT: slli a1, a1, 2 -; RV64-NEXT: lui a2, 13 -; RV64-NEXT: addiw a2, a2, -820 -; RV64-NEXT: and a0, a0, a2 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: lui a1, 5 -; RV64-NEXT: addiw a1, a1, 1365 -; RV64-NEXT: and a1, a0, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: lui a2, 11 -; RV64-NEXT: addiw a2, a2, -1366 -; RV64-NEXT: and a0, a0, a2 -; RV64-NEXT: srli a0, a0, 1 -; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: bitrevi a0, a0, 63 +; RV64-NEXT: srli a0, a0, 48 ; RV64-NEXT: ret %ret = call i16 @llvm.bitreverse.i16(i16 %a) ret i16 %ret @@ -121,71 +44,13 @@ define i32 @bitreverse32(i32 %a) nounwind { ; RV32-LABEL: bitreverse32: ; RV32: # %bb.0: -; RV32-NEXT: swap8 a0, a0 -; RV32-NEXT: swap16 a0, a0 -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: and a1, a0, a1 -; RV32-NEXT: slli a1, a1, 4 -; RV32-NEXT: lui a2, 986895 -; RV32-NEXT: addi a2, a2, 240 -; RV32-NEXT: and a0, a0, a2 -; RV32-NEXT: srli a0, a0, 4 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: and a1, a0, a1 -; RV32-NEXT: slli a1, a1, 2 -; RV32-NEXT: lui a2, 838861 -; RV32-NEXT: addi a2, a2, -820 -; RV32-NEXT: and a0, a0, a2 -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: and a1, a0, a1 -; RV32-NEXT: slli a1, a1, 1 -; RV32-NEXT: lui a2, 699051 -; RV32-NEXT: addi a2, a2, -1366 -; RV32-NEXT: and a0, a0, a2 -; RV32-NEXT: srli a0, a0, 1 -; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: bitrevi a0, a0, 31 ; RV32-NEXT: ret ; ; RV64-LABEL: bitreverse32: ; RV64: # %bb.0: -; RV64-NEXT: swap8 a0, a0 -; RV64-NEXT: swap16 a0, a0 -; RV64-NEXT: pkbt32 a0, a0, a0 -; RV64-NEXT: srli a1, a0, 28 -; RV64-NEXT: lui a2, 241 -; RV64-NEXT: addiw a2, a2, -241 -; RV64-NEXT: slli a2, a2, 12 -; RV64-NEXT: addi a2, a2, 240 -; RV64-NEXT: and a1, a1, a2 -; RV64-NEXT: srli a0, a0, 36 -; RV64-NEXT: lui a2, 61681 -; RV64-NEXT: addiw a2, a2, -241 -; RV64-NEXT: and a0, a0, a2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: lui a1, 209715 -; RV64-NEXT: addiw a1, a1, 819 -; RV64-NEXT: and a1, a0, a1 -; RV64-NEXT: slli a1, a1, 2 -; RV64-NEXT: lui a2, 838861 -; RV64-NEXT: addiw a2, a2, -820 -; RV64-NEXT: and a0, a0, a2 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: lui a1, 349525 -; RV64-NEXT: addiw a1, a1, 1365 -; RV64-NEXT: and a1, a0, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: lui a2, 699051 -; RV64-NEXT: addiw a2, a2, -1366 -; RV64-NEXT: and a0, a0, a2 -; RV64-NEXT: srli a0, a0, 1 -; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: bitrevi a0, a0, 63 +; RV64-NEXT: srli a0, a0, 32 ; RV64-NEXT: ret %ret = call i32 @llvm.bitreverse.i32(i32 %a) ret i32 %ret @@ -194,123 +59,14 @@ define i64 @bitreverse64(i64 %a) nounwind { ; RV32-LABEL: bitreverse64: ; RV32: # %bb.0: -; RV32-NEXT: swap8 a1, a1 -; RV32-NEXT: swap16 a1, a1 -; RV32-NEXT: lui a2, 61681 -; RV32-NEXT: addi a6, a2, -241 -; RV32-NEXT: and a2, a1, a6 -; RV32-NEXT: slli a2, a2, 4 -; RV32-NEXT: lui a4, 986895 -; RV32-NEXT: addi t0, a4, 240 -; RV32-NEXT: and a1, a1, t0 -; RV32-NEXT: srli a1, a1, 4 -; RV32-NEXT: or a1, a1, a2 -; RV32-NEXT: lui a2, 209715 -; RV32-NEXT: addi t1, a2, 819 -; RV32-NEXT: and a2, a1, t1 -; RV32-NEXT: slli a2, a2, 2 -; RV32-NEXT: lui a3, 838861 -; RV32-NEXT: addi a3, a3, -820 -; RV32-NEXT: and a1, a1, a3 -; RV32-NEXT: srli a1, a1, 2 -; RV32-NEXT: or a1, a1, a2 -; RV32-NEXT: lui a2, 349525 -; RV32-NEXT: addi a2, a2, 1365 -; RV32-NEXT: and a4, a1, a2 -; RV32-NEXT: slli a4, a4, 1 -; RV32-NEXT: lui a5, 699051 -; RV32-NEXT: addi a5, a5, -1366 -; RV32-NEXT: and a1, a1, a5 -; RV32-NEXT: srli a1, a1, 1 -; RV32-NEXT: or a7, a1, a4 -; RV32-NEXT: swap8 a0, a0 -; RV32-NEXT: swap16 a0, a0 -; RV32-NEXT: and a1, a0, a6 -; RV32-NEXT: slli a1, a1, 4 -; RV32-NEXT: and a0, a0, t0 -; RV32-NEXT: srli a0, a0, 4 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: and a1, a0, t1 -; RV32-NEXT: slli a1, a1, 2 -; RV32-NEXT: and a0, a0, a3 -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: and a1, a0, a2 -; RV32-NEXT: slli a1, a1, 1 -; RV32-NEXT: and a0, a0, a5 -; RV32-NEXT: srli a0, a0, 1 -; RV32-NEXT: or a1, a0, a1 -; RV32-NEXT: mv a0, a7 +; RV32-NEXT: bitrevi a2, a1, 31 +; RV32-NEXT: bitrevi a1, a0, 31 +; RV32-NEXT: mv a0, a2 ; RV32-NEXT: ret ; ; RV64-LABEL: bitreverse64: ; RV64: # %bb.0: -; RV64-NEXT: swap8 a0, a0 -; RV64-NEXT: swap16 a0, a0 -; RV64-NEXT: pkbt32 a0, a0, a0 -; RV64-NEXT: lui a1, 3855 -; RV64-NEXT: addiw a1, a1, 241 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -241 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 241 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -241 -; RV64-NEXT: and a1, a0, a1 -; RV64-NEXT: slli a1, a1, 4 -; RV64-NEXT: lui a2, 1044721 -; RV64-NEXT: addiw a2, a2, -241 -; RV64-NEXT: slli a2, a2, 12 -; RV64-NEXT: addi a2, a2, 241 -; RV64-NEXT: slli a2, a2, 12 -; RV64-NEXT: addi a2, a2, -241 -; RV64-NEXT: slli a2, a2, 12 -; RV64-NEXT: addi a2, a2, 240 -; RV64-NEXT: and a0, a0, a2 -; RV64-NEXT: srli a0, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: lui a1, 13107 -; RV64-NEXT: addiw a1, a1, 819 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 819 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 819 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 819 -; RV64-NEXT: and a1, a0, a1 -; RV64-NEXT: slli a1, a1, 2 -; RV64-NEXT: lui a2, 1035469 -; RV64-NEXT: addiw a2, a2, -819 -; RV64-NEXT: slli a2, a2, 12 -; RV64-NEXT: addi a2, a2, -819 -; RV64-NEXT: slli a2, a2, 12 -; RV64-NEXT: addi a2, a2, -819 -; RV64-NEXT: slli a2, a2, 12 -; RV64-NEXT: addi a2, a2, -820 -; RV64-NEXT: and a0, a0, a2 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: lui a1, 21845 -; RV64-NEXT: addiw a1, a1, 1365 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 1365 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 1365 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 1365 -; RV64-NEXT: and a1, a0, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: lui a2, 1026731 -; RV64-NEXT: addiw a2, a2, -1365 -; RV64-NEXT: slli a2, a2, 12 -; RV64-NEXT: addi a2, a2, -1365 -; RV64-NEXT: slli a2, a2, 12 -; RV64-NEXT: addi a2, a2, -1365 -; RV64-NEXT: slli a2, a2, 12 -; RV64-NEXT: addi a2, a2, -1366 -; RV64-NEXT: and a0, a0, a2 -; RV64-NEXT: srli a0, a0, 1 -; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: bitrevi a0, a0, 63 ; RV64-NEXT: ret %ret = call i64 @llvm.bitreverse.i64(i64 %a) ret i64 %ret