diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -99,6 +99,8 @@ // READ_CYCLE_WIDE - A read of the 64-bit cycle CSR on a 32-bit target // (returns (Lo, Hi)). It takes a chain operand. READ_CYCLE_WIDE, + // Reverse bits in each byte. + BREV8, // Generalized Reverse and Generalized Or-Combine - directly matching the // semantics of the named RISC-V instructions. Lowered as custom nodes as // TableGen chokes when faced with commutative permutations in deeply-nested diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -282,6 +282,9 @@ (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) ? Legal : Expand); + // Zbkb can use rev8+brev8 to implement bitreverse. + setOperationAction(ISD::BITREVERSE, XLenVT, + Subtarget.hasStdExtZbkb() ? Custom : Expand); } if (Subtarget.hasStdExtZbb()) { @@ -2955,17 +2958,23 @@ return LowerINTRINSIC_VOID(Op, DAG); case ISD::BSWAP: case ISD::BITREVERSE: { - // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining. - assert(Subtarget.hasStdExtZbp() && "Unexpected custom legalisation"); MVT VT = Op.getSimpleValueType(); SDLoc DL(Op); - // Start with the maximum immediate value which is the bitwidth - 1. - unsigned Imm = VT.getSizeInBits() - 1; - // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits. - if (Op.getOpcode() == ISD::BSWAP) - Imm &= ~0x7U; - return DAG.getNode(RISCVISD::GREV, DL, VT, Op.getOperand(0), - DAG.getConstant(Imm, DL, VT)); + if (Subtarget.hasStdExtZbp()) { + // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining. + // Start with the maximum immediate value which is the bitwidth - 1. + unsigned Imm = VT.getSizeInBits() - 1; + // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits. + if (Op.getOpcode() == ISD::BSWAP) + Imm &= ~0x7U; + return DAG.getNode(RISCVISD::GREV, DL, VT, Op.getOperand(0), + DAG.getConstant(Imm, DL, VT)); + } + assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization"); + assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode"); + // Expand bitreverse to a bswap(rev8) followed by brev8. + SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0)); + return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap); } case ISD::FSHL: case ISD::FSHR: { @@ -10058,6 +10067,7 @@ NODE_NAME_CASE(STRICT_FCVT_W_RV64) NODE_NAME_CASE(STRICT_FCVT_WU_RV64) NODE_NAME_CASE(READ_CYCLE_WIDE) + NODE_NAME_CASE(BREV8) NODE_NAME_CASE(GREV) NODE_NAME_CASE(GREVW) NODE_NAME_CASE(GORC) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td @@ -43,6 +43,7 @@ def riscv_fsrw : SDNode<"RISCVISD::FSRW", SDT_RISCVIntShiftDOpW>; def riscv_fsl : SDNode<"RISCVISD::FSL", SDTIntShiftDOp>; def riscv_fsr : SDNode<"RISCVISD::FSR", SDTIntShiftDOp>; +def riscv_brev8 : SDNode<"RISCVISD::BREV8", SDTIntUnaryOp>; def riscv_grev : SDNode<"RISCVISD::GREV", SDTIntBinOp>; def riscv_grevw : SDNode<"RISCVISD::GREVW", SDT_RISCVIntBinOpW>; def riscv_gorc : SDNode<"RISCVISD::GORC", SDTIntBinOp>; @@ -1190,6 +1191,7 @@ def : PatGprGpr; let Predicates = [HasStdExtZbkb] in { +def : PatGpr; def : PatGpr; } // Predicates = [HasStdExtZbkb] diff --git a/llvm/test/CodeGen/RISCV/bswap-bitreverse.ll b/llvm/test/CodeGen/RISCV/bswap-bitreverse.ll --- a/llvm/test/CodeGen/RISCV/bswap-bitreverse.ll +++ b/llvm/test/CodeGen/RISCV/bswap-bitreverse.ll @@ -4,13 +4,13 @@ ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV64I ; RUN: llc -mtriple=riscv32 -mattr=+zbb -verify-machineinstrs < %s \ -; RUN: | FileCheck %s -check-prefixes=RV32ZB +; RUN: | FileCheck %s -check-prefixes=RV32ZB,RV32ZBB ; RUN: llc -mtriple=riscv64 -mattr=+zbb -verify-machineinstrs < %s \ -; RUN: | FileCheck %s -check-prefixes=RV64ZB +; RUN: | FileCheck %s -check-prefixes=RV64ZB,RV64ZBB ; RUN: llc -mtriple=riscv32 -mattr=+zbkb -verify-machineinstrs < %s \ -; RUN: | FileCheck %s -check-prefixes=RV32ZB +; RUN: | FileCheck %s -check-prefixes=RV32ZB,RV32ZBKB ; RUN: llc -mtriple=riscv64 -mattr=+zbkb -verify-machineinstrs < %s \ -; RUN: | FileCheck %s -check-prefixes=RV64ZB +; RUN: | FileCheck %s -check-prefixes=RV64ZB,RV64ZBKB declare i16 @llvm.bswap.i16(i16) declare i32 @llvm.bswap.i32(i32) @@ -212,43 +212,57 @@ ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret ; -; RV32ZB-LABEL: test_bitreverse_i8: -; RV32ZB: # %bb.0: -; RV32ZB-NEXT: andi a1, a0, 15 -; RV32ZB-NEXT: slli a1, a1, 4 -; RV32ZB-NEXT: slli a0, a0, 24 -; RV32ZB-NEXT: srli a0, a0, 28 -; RV32ZB-NEXT: or a0, a0, a1 -; RV32ZB-NEXT: andi a1, a0, 51 -; RV32ZB-NEXT: slli a1, a1, 2 -; RV32ZB-NEXT: srli a0, a0, 2 -; RV32ZB-NEXT: andi a0, a0, 51 -; RV32ZB-NEXT: or a0, a0, a1 -; RV32ZB-NEXT: andi a1, a0, 85 -; RV32ZB-NEXT: slli a1, a1, 1 -; RV32ZB-NEXT: srli a0, a0, 1 -; RV32ZB-NEXT: andi a0, a0, 85 -; RV32ZB-NEXT: or a0, a0, a1 -; RV32ZB-NEXT: ret +; RV32ZBB-LABEL: test_bitreverse_i8: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: andi a1, a0, 15 +; RV32ZBB-NEXT: slli a1, a1, 4 +; RV32ZBB-NEXT: slli a0, a0, 24 +; RV32ZBB-NEXT: srli a0, a0, 28 +; RV32ZBB-NEXT: or a0, a0, a1 +; RV32ZBB-NEXT: andi a1, a0, 51 +; RV32ZBB-NEXT: slli a1, a1, 2 +; RV32ZBB-NEXT: srli a0, a0, 2 +; RV32ZBB-NEXT: andi a0, a0, 51 +; RV32ZBB-NEXT: or a0, a0, a1 +; RV32ZBB-NEXT: andi a1, a0, 85 +; RV32ZBB-NEXT: slli a1, a1, 1 +; RV32ZBB-NEXT: srli a0, a0, 1 +; RV32ZBB-NEXT: andi a0, a0, 85 +; RV32ZBB-NEXT: or a0, a0, a1 +; RV32ZBB-NEXT: ret ; -; RV64ZB-LABEL: test_bitreverse_i8: -; RV64ZB: # %bb.0: -; RV64ZB-NEXT: andi a1, a0, 15 -; RV64ZB-NEXT: slli a1, a1, 4 -; RV64ZB-NEXT: slli a0, a0, 56 -; RV64ZB-NEXT: srli a0, a0, 60 -; RV64ZB-NEXT: or a0, a0, a1 -; RV64ZB-NEXT: andi a1, a0, 51 -; RV64ZB-NEXT: slli a1, a1, 2 -; RV64ZB-NEXT: srli a0, a0, 2 -; RV64ZB-NEXT: andi a0, a0, 51 -; RV64ZB-NEXT: or a0, a0, a1 -; RV64ZB-NEXT: andi a1, a0, 85 -; RV64ZB-NEXT: slli a1, a1, 1 -; RV64ZB-NEXT: srli a0, a0, 1 -; RV64ZB-NEXT: andi a0, a0, 85 -; RV64ZB-NEXT: or a0, a0, a1 -; RV64ZB-NEXT: ret +; RV64ZBB-LABEL: test_bitreverse_i8: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: andi a1, a0, 15 +; RV64ZBB-NEXT: slli a1, a1, 4 +; RV64ZBB-NEXT: slli a0, a0, 56 +; RV64ZBB-NEXT: srli a0, a0, 60 +; RV64ZBB-NEXT: or a0, a0, a1 +; RV64ZBB-NEXT: andi a1, a0, 51 +; RV64ZBB-NEXT: slli a1, a1, 2 +; RV64ZBB-NEXT: srli a0, a0, 2 +; RV64ZBB-NEXT: andi a0, a0, 51 +; RV64ZBB-NEXT: or a0, a0, a1 +; RV64ZBB-NEXT: andi a1, a0, 85 +; RV64ZBB-NEXT: slli a1, a1, 1 +; RV64ZBB-NEXT: srli a0, a0, 1 +; RV64ZBB-NEXT: andi a0, a0, 85 +; RV64ZBB-NEXT: or a0, a0, a1 +; RV64ZBB-NEXT: ret +; +; RV32ZBKB-LABEL: test_bitreverse_i8: +; RV32ZBKB: # %bb.0: +; RV32ZBKB-NEXT: rev8 a0, a0 +; RV32ZBKB-NEXT: brev8 a0, a0 +; RV32ZBKB-NEXT: srli a0, a0, 24 +; RV32ZBKB-NEXT: ret +; +; RV64ZBKB-LABEL: test_bitreverse_i8: +; RV64ZBKB: # %bb.0: +; RV64ZBKB-NEXT: rev8 a0, a0 +; RV64ZBKB-NEXT: brev8 a0, a0 +; RV64ZBKB-NEXT: srli a0, a0, 56 +; RV64ZBKB-NEXT: ret %tmp = call i8 @llvm.bitreverse.i8(i8 %a) ret i8 %tmp } @@ -312,57 +326,71 @@ ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret ; -; RV32ZB-LABEL: test_bitreverse_i16: -; RV32ZB: # %bb.0: -; RV32ZB-NEXT: rev8 a0, a0 -; RV32ZB-NEXT: srli a1, a0, 12 -; RV32ZB-NEXT: lui a2, 15 -; RV32ZB-NEXT: addi a2, a2, 240 -; RV32ZB-NEXT: and a1, a1, a2 -; RV32ZB-NEXT: srli a0, a0, 20 -; RV32ZB-NEXT: andi a0, a0, -241 -; RV32ZB-NEXT: or a0, a0, a1 -; RV32ZB-NEXT: srli a1, a0, 2 -; RV32ZB-NEXT: lui a2, 3 -; RV32ZB-NEXT: addi a2, a2, 819 -; RV32ZB-NEXT: and a1, a1, a2 -; RV32ZB-NEXT: and a0, a0, a2 -; RV32ZB-NEXT: slli a0, a0, 2 -; RV32ZB-NEXT: or a0, a1, a0 -; RV32ZB-NEXT: srli a1, a0, 1 -; RV32ZB-NEXT: lui a2, 5 -; RV32ZB-NEXT: addi a2, a2, 1365 -; RV32ZB-NEXT: and a1, a1, a2 -; RV32ZB-NEXT: and a0, a0, a2 -; RV32ZB-NEXT: slli a0, a0, 1 -; RV32ZB-NEXT: or a0, a1, a0 -; RV32ZB-NEXT: ret +; RV32ZBB-LABEL: test_bitreverse_i16: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: rev8 a0, a0 +; RV32ZBB-NEXT: srli a1, a0, 12 +; RV32ZBB-NEXT: lui a2, 15 +; RV32ZBB-NEXT: addi a2, a2, 240 +; RV32ZBB-NEXT: and a1, a1, a2 +; RV32ZBB-NEXT: srli a0, a0, 20 +; RV32ZBB-NEXT: andi a0, a0, -241 +; RV32ZBB-NEXT: or a0, a0, a1 +; RV32ZBB-NEXT: srli a1, a0, 2 +; RV32ZBB-NEXT: lui a2, 3 +; RV32ZBB-NEXT: addi a2, a2, 819 +; RV32ZBB-NEXT: and a1, a1, a2 +; RV32ZBB-NEXT: and a0, a0, a2 +; RV32ZBB-NEXT: slli a0, a0, 2 +; RV32ZBB-NEXT: or a0, a1, a0 +; RV32ZBB-NEXT: srli a1, a0, 1 +; RV32ZBB-NEXT: lui a2, 5 +; RV32ZBB-NEXT: addi a2, a2, 1365 +; RV32ZBB-NEXT: and a1, a1, a2 +; RV32ZBB-NEXT: and a0, a0, a2 +; RV32ZBB-NEXT: slli a0, a0, 1 +; RV32ZBB-NEXT: or a0, a1, a0 +; RV32ZBB-NEXT: ret ; -; RV64ZB-LABEL: test_bitreverse_i16: -; RV64ZB: # %bb.0: -; RV64ZB-NEXT: rev8 a0, a0 -; RV64ZB-NEXT: srli a1, a0, 44 -; RV64ZB-NEXT: lui a2, 15 -; RV64ZB-NEXT: addiw a2, a2, 240 -; RV64ZB-NEXT: and a1, a1, a2 -; RV64ZB-NEXT: srli a0, a0, 52 -; RV64ZB-NEXT: andi a0, a0, -241 -; RV64ZB-NEXT: or a0, a0, a1 -; RV64ZB-NEXT: srli a1, a0, 2 -; RV64ZB-NEXT: lui a2, 3 -; RV64ZB-NEXT: addiw a2, a2, 819 -; RV64ZB-NEXT: and a1, a1, a2 -; RV64ZB-NEXT: and a0, a0, a2 -; RV64ZB-NEXT: slli a0, a0, 2 -; RV64ZB-NEXT: or a0, a1, a0 -; RV64ZB-NEXT: srli a1, a0, 1 -; RV64ZB-NEXT: lui a2, 5 -; RV64ZB-NEXT: addiw a2, a2, 1365 -; RV64ZB-NEXT: and a1, a1, a2 -; RV64ZB-NEXT: and a0, a0, a2 -; RV64ZB-NEXT: slli a0, a0, 1 -; RV64ZB-NEXT: or a0, a1, a0 -; RV64ZB-NEXT: ret +; RV64ZBB-LABEL: test_bitreverse_i16: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: rev8 a0, a0 +; RV64ZBB-NEXT: srli a1, a0, 44 +; RV64ZBB-NEXT: lui a2, 15 +; RV64ZBB-NEXT: addiw a2, a2, 240 +; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: srli a0, a0, 52 +; RV64ZBB-NEXT: andi a0, a0, -241 +; RV64ZBB-NEXT: or a0, a0, a1 +; RV64ZBB-NEXT: srli a1, a0, 2 +; RV64ZBB-NEXT: lui a2, 3 +; RV64ZBB-NEXT: addiw a2, a2, 819 +; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: and a0, a0, a2 +; RV64ZBB-NEXT: slli a0, a0, 2 +; RV64ZBB-NEXT: or a0, a1, a0 +; RV64ZBB-NEXT: srli a1, a0, 1 +; RV64ZBB-NEXT: lui a2, 5 +; RV64ZBB-NEXT: addiw a2, a2, 1365 +; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: and a0, a0, a2 +; RV64ZBB-NEXT: slli a0, a0, 1 +; RV64ZBB-NEXT: or a0, a1, a0 +; RV64ZBB-NEXT: ret +; +; RV32ZBKB-LABEL: test_bitreverse_i16: +; RV32ZBKB: # %bb.0: +; RV32ZBKB-NEXT: rev8 a0, a0 +; RV32ZBKB-NEXT: brev8 a0, a0 +; RV32ZBKB-NEXT: srli a0, a0, 16 +; RV32ZBKB-NEXT: ret +; +; RV64ZBKB-LABEL: test_bitreverse_i16: +; RV64ZBKB: # %bb.0: +; RV64ZBKB-NEXT: rev8 a0, a0 +; RV64ZBKB-NEXT: brev8 a0, a0 +; RV64ZBKB-NEXT: srli a0, a0, 48 +; RV64ZBKB-NEXT: ret %tmp = call i16 @llvm.bitreverse.i16(i16 %a) ret i16 %tmp } @@ -442,60 +470,73 @@ ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret ; -; RV32ZB-LABEL: test_bitreverse_i32: -; RV32ZB: # %bb.0: -; RV32ZB-NEXT: rev8 a0, a0 -; RV32ZB-NEXT: srli a1, a0, 4 -; RV32ZB-NEXT: lui a2, 61681 -; RV32ZB-NEXT: addi a2, a2, -241 -; RV32ZB-NEXT: and a1, a1, a2 -; RV32ZB-NEXT: and a0, a0, a2 -; RV32ZB-NEXT: slli a0, a0, 4 -; RV32ZB-NEXT: or a0, a1, a0 -; RV32ZB-NEXT: srli a1, a0, 2 -; RV32ZB-NEXT: lui a2, 209715 -; RV32ZB-NEXT: addi a2, a2, 819 -; RV32ZB-NEXT: and a1, a1, a2 -; RV32ZB-NEXT: and a0, a0, a2 -; RV32ZB-NEXT: slli a0, a0, 2 -; RV32ZB-NEXT: or a0, a1, a0 -; RV32ZB-NEXT: srli a1, a0, 1 -; RV32ZB-NEXT: lui a2, 349525 -; RV32ZB-NEXT: addi a2, a2, 1365 -; RV32ZB-NEXT: and a1, a1, a2 -; RV32ZB-NEXT: and a0, a0, a2 -; RV32ZB-NEXT: slli a0, a0, 1 -; RV32ZB-NEXT: or a0, a1, a0 -; RV32ZB-NEXT: ret +; RV32ZBB-LABEL: test_bitreverse_i32: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: rev8 a0, a0 +; RV32ZBB-NEXT: srli a1, a0, 4 +; RV32ZBB-NEXT: lui a2, 61681 +; RV32ZBB-NEXT: addi a2, a2, -241 +; RV32ZBB-NEXT: and a1, a1, a2 +; RV32ZBB-NEXT: and a0, a0, a2 +; RV32ZBB-NEXT: slli a0, a0, 4 +; RV32ZBB-NEXT: or a0, a1, a0 +; RV32ZBB-NEXT: srli a1, a0, 2 +; RV32ZBB-NEXT: lui a2, 209715 +; RV32ZBB-NEXT: addi a2, a2, 819 +; RV32ZBB-NEXT: and a1, a1, a2 +; RV32ZBB-NEXT: and a0, a0, a2 +; RV32ZBB-NEXT: slli a0, a0, 2 +; RV32ZBB-NEXT: or a0, a1, a0 +; RV32ZBB-NEXT: srli a1, a0, 1 +; RV32ZBB-NEXT: lui a2, 349525 +; RV32ZBB-NEXT: addi a2, a2, 1365 +; RV32ZBB-NEXT: and a1, a1, a2 +; RV32ZBB-NEXT: and a0, a0, a2 +; RV32ZBB-NEXT: slli a0, a0, 1 +; RV32ZBB-NEXT: or a0, a1, a0 +; RV32ZBB-NEXT: ret ; -; RV64ZB-LABEL: test_bitreverse_i32: -; RV64ZB: # %bb.0: -; RV64ZB-NEXT: rev8 a0, a0 -; RV64ZB-NEXT: srli a1, a0, 36 -; RV64ZB-NEXT: lui a2, 61681 -; RV64ZB-NEXT: addiw a2, a2, -241 -; RV64ZB-NEXT: and a1, a1, a2 -; RV64ZB-NEXT: srli a0, a0, 28 -; RV64ZB-NEXT: lui a2, 986895 -; RV64ZB-NEXT: addiw a2, a2, 240 -; RV64ZB-NEXT: and a0, a0, a2 -; RV64ZB-NEXT: sext.w a0, a0 -; RV64ZB-NEXT: or a0, a1, a0 -; RV64ZB-NEXT: srli a1, a0, 2 -; RV64ZB-NEXT: lui a2, 209715 -; RV64ZB-NEXT: addiw a2, a2, 819 -; RV64ZB-NEXT: and a1, a1, a2 -; RV64ZB-NEXT: and a0, a0, a2 -; RV64ZB-NEXT: slliw a0, a0, 2 -; RV64ZB-NEXT: or a0, a1, a0 -; RV64ZB-NEXT: srli a1, a0, 1 -; RV64ZB-NEXT: lui a2, 349525 -; RV64ZB-NEXT: addiw a2, a2, 1365 -; RV64ZB-NEXT: and a1, a1, a2 -; RV64ZB-NEXT: and a0, a0, a2 -; RV64ZB-NEXT: slliw a0, a0, 1 -; RV64ZB-NEXT: or a0, a1, a0 -; RV64ZB-NEXT: ret +; RV64ZBB-LABEL: test_bitreverse_i32: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: rev8 a0, a0 +; RV64ZBB-NEXT: srli a1, a0, 36 +; RV64ZBB-NEXT: lui a2, 61681 +; RV64ZBB-NEXT: addiw a2, a2, -241 +; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: srli a0, a0, 28 +; RV64ZBB-NEXT: lui a2, 986895 +; RV64ZBB-NEXT: addiw a2, a2, 240 +; RV64ZBB-NEXT: and a0, a0, a2 +; RV64ZBB-NEXT: sext.w a0, a0 +; RV64ZBB-NEXT: or a0, a1, a0 +; RV64ZBB-NEXT: srli a1, a0, 2 +; RV64ZBB-NEXT: lui a2, 209715 +; RV64ZBB-NEXT: addiw a2, a2, 819 +; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: and a0, a0, a2 +; RV64ZBB-NEXT: slliw a0, a0, 2 +; RV64ZBB-NEXT: or a0, a1, a0 +; RV64ZBB-NEXT: srli a1, a0, 1 +; RV64ZBB-NEXT: lui a2, 349525 +; RV64ZBB-NEXT: addiw a2, a2, 1365 +; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: and a0, a0, a2 +; RV64ZBB-NEXT: slliw a0, a0, 1 +; RV64ZBB-NEXT: or a0, a1, a0 +; RV64ZBB-NEXT: ret +; +; RV32ZBKB-LABEL: test_bitreverse_i32: +; RV32ZBKB: # %bb.0: +; RV32ZBKB-NEXT: rev8 a0, a0 +; RV32ZBKB-NEXT: brev8 a0, a0 +; RV32ZBKB-NEXT: ret +; +; RV64ZBKB-LABEL: test_bitreverse_i32: +; RV64ZBKB: # %bb.0: +; RV64ZBKB-NEXT: rev8 a0, a0 +; RV64ZBKB-NEXT: brev8 a0, a0 +; RV64ZBKB-NEXT: srli a0, a0, 32 +; RV64ZBKB-NEXT: ret %tmp = call i32 @llvm.bitreverse.i32(i32 %a) ret i32 %tmp } @@ -616,78 +657,94 @@ ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret ; -; RV32ZB-LABEL: test_bitreverse_i64: -; RV32ZB: # %bb.0: -; RV32ZB-NEXT: rev8 a1, a1 -; RV32ZB-NEXT: srli a2, a1, 4 -; RV32ZB-NEXT: lui a3, 61681 -; RV32ZB-NEXT: addi a3, a3, -241 -; RV32ZB-NEXT: and a2, a2, a3 -; RV32ZB-NEXT: and a1, a1, a3 -; RV32ZB-NEXT: slli a1, a1, 4 -; RV32ZB-NEXT: or a1, a2, a1 -; RV32ZB-NEXT: srli a2, a1, 2 -; RV32ZB-NEXT: lui a4, 209715 -; RV32ZB-NEXT: addi a4, a4, 819 -; RV32ZB-NEXT: and a2, a2, a4 -; RV32ZB-NEXT: and a1, a1, a4 -; RV32ZB-NEXT: slli a1, a1, 2 -; RV32ZB-NEXT: or a1, a2, a1 -; RV32ZB-NEXT: srli a2, a1, 1 -; RV32ZB-NEXT: lui a5, 349525 -; RV32ZB-NEXT: addi a5, a5, 1365 -; RV32ZB-NEXT: and a2, a2, a5 -; RV32ZB-NEXT: and a1, a1, a5 -; RV32ZB-NEXT: slli a1, a1, 1 -; RV32ZB-NEXT: or a2, a2, a1 -; RV32ZB-NEXT: rev8 a0, a0 -; RV32ZB-NEXT: srli a1, a0, 4 -; RV32ZB-NEXT: and a1, a1, a3 -; RV32ZB-NEXT: and a0, a0, a3 -; RV32ZB-NEXT: slli a0, a0, 4 -; RV32ZB-NEXT: or a0, a1, a0 -; RV32ZB-NEXT: srli a1, a0, 2 -; RV32ZB-NEXT: and a1, a1, a4 -; RV32ZB-NEXT: and a0, a0, a4 -; RV32ZB-NEXT: slli a0, a0, 2 -; RV32ZB-NEXT: or a0, a1, a0 -; RV32ZB-NEXT: srli a1, a0, 1 -; RV32ZB-NEXT: and a1, a1, a5 -; RV32ZB-NEXT: and a0, a0, a5 -; RV32ZB-NEXT: slli a0, a0, 1 -; RV32ZB-NEXT: or a1, a1, a0 -; RV32ZB-NEXT: mv a0, a2 -; RV32ZB-NEXT: ret +; RV32ZBB-LABEL: test_bitreverse_i64: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: rev8 a1, a1 +; RV32ZBB-NEXT: srli a2, a1, 4 +; RV32ZBB-NEXT: lui a3, 61681 +; RV32ZBB-NEXT: addi a3, a3, -241 +; RV32ZBB-NEXT: and a2, a2, a3 +; RV32ZBB-NEXT: and a1, a1, a3 +; RV32ZBB-NEXT: slli a1, a1, 4 +; RV32ZBB-NEXT: or a1, a2, a1 +; RV32ZBB-NEXT: srli a2, a1, 2 +; RV32ZBB-NEXT: lui a4, 209715 +; RV32ZBB-NEXT: addi a4, a4, 819 +; RV32ZBB-NEXT: and a2, a2, a4 +; RV32ZBB-NEXT: and a1, a1, a4 +; RV32ZBB-NEXT: slli a1, a1, 2 +; RV32ZBB-NEXT: or a1, a2, a1 +; RV32ZBB-NEXT: srli a2, a1, 1 +; RV32ZBB-NEXT: lui a5, 349525 +; RV32ZBB-NEXT: addi a5, a5, 1365 +; RV32ZBB-NEXT: and a2, a2, a5 +; RV32ZBB-NEXT: and a1, a1, a5 +; RV32ZBB-NEXT: slli a1, a1, 1 +; RV32ZBB-NEXT: or a2, a2, a1 +; RV32ZBB-NEXT: rev8 a0, a0 +; RV32ZBB-NEXT: srli a1, a0, 4 +; RV32ZBB-NEXT: and a1, a1, a3 +; RV32ZBB-NEXT: and a0, a0, a3 +; RV32ZBB-NEXT: slli a0, a0, 4 +; RV32ZBB-NEXT: or a0, a1, a0 +; RV32ZBB-NEXT: srli a1, a0, 2 +; RV32ZBB-NEXT: and a1, a1, a4 +; RV32ZBB-NEXT: and a0, a0, a4 +; RV32ZBB-NEXT: slli a0, a0, 2 +; RV32ZBB-NEXT: or a0, a1, a0 +; RV32ZBB-NEXT: srli a1, a0, 1 +; RV32ZBB-NEXT: and a1, a1, a5 +; RV32ZBB-NEXT: and a0, a0, a5 +; RV32ZBB-NEXT: slli a0, a0, 1 +; RV32ZBB-NEXT: or a1, a1, a0 +; RV32ZBB-NEXT: mv a0, a2 +; RV32ZBB-NEXT: ret ; -; RV64ZB-LABEL: test_bitreverse_i64: -; RV64ZB: # %bb.0: -; RV64ZB-NEXT: lui a1, %hi(.LCPI6_0) -; RV64ZB-NEXT: ld a1, %lo(.LCPI6_0)(a1) -; RV64ZB-NEXT: rev8 a0, a0 -; RV64ZB-NEXT: srli a2, a0, 4 -; RV64ZB-NEXT: and a2, a2, a1 -; RV64ZB-NEXT: and a0, a0, a1 -; RV64ZB-NEXT: lui a1, %hi(.LCPI6_1) -; RV64ZB-NEXT: ld a1, %lo(.LCPI6_1)(a1) -; RV64ZB-NEXT: slli a0, a0, 4 -; RV64ZB-NEXT: or a0, a2, a0 -; RV64ZB-NEXT: srli a2, a0, 2 -; RV64ZB-NEXT: and a2, a2, a1 -; RV64ZB-NEXT: and a0, a0, a1 -; RV64ZB-NEXT: lui a1, %hi(.LCPI6_2) -; RV64ZB-NEXT: ld a1, %lo(.LCPI6_2)(a1) -; RV64ZB-NEXT: slli a0, a0, 2 -; RV64ZB-NEXT: or a0, a2, a0 -; RV64ZB-NEXT: srli a2, a0, 1 -; RV64ZB-NEXT: and a2, a2, a1 -; RV64ZB-NEXT: and a0, a0, a1 -; RV64ZB-NEXT: slli a0, a0, 1 -; RV64ZB-NEXT: or a0, a2, a0 -; RV64ZB-NEXT: ret +; RV64ZBB-LABEL: test_bitreverse_i64: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: lui a1, %hi(.LCPI6_0) +; RV64ZBB-NEXT: ld a1, %lo(.LCPI6_0)(a1) +; RV64ZBB-NEXT: rev8 a0, a0 +; RV64ZBB-NEXT: srli a2, a0, 4 +; RV64ZBB-NEXT: and a2, a2, a1 +; RV64ZBB-NEXT: and a0, a0, a1 +; RV64ZBB-NEXT: lui a1, %hi(.LCPI6_1) +; RV64ZBB-NEXT: ld a1, %lo(.LCPI6_1)(a1) +; RV64ZBB-NEXT: slli a0, a0, 4 +; RV64ZBB-NEXT: or a0, a2, a0 +; RV64ZBB-NEXT: srli a2, a0, 2 +; RV64ZBB-NEXT: and a2, a2, a1 +; RV64ZBB-NEXT: and a0, a0, a1 +; RV64ZBB-NEXT: lui a1, %hi(.LCPI6_2) +; RV64ZBB-NEXT: ld a1, %lo(.LCPI6_2)(a1) +; RV64ZBB-NEXT: slli a0, a0, 2 +; RV64ZBB-NEXT: or a0, a2, a0 +; RV64ZBB-NEXT: srli a2, a0, 1 +; RV64ZBB-NEXT: and a2, a2, a1 +; RV64ZBB-NEXT: and a0, a0, a1 +; RV64ZBB-NEXT: slli a0, a0, 1 +; RV64ZBB-NEXT: or a0, a2, a0 +; RV64ZBB-NEXT: ret +; +; RV32ZBKB-LABEL: test_bitreverse_i64: +; RV32ZBKB: # %bb.0: +; RV32ZBKB-NEXT: rev8 a1, a1 +; RV32ZBKB-NEXT: brev8 a2, a1 +; RV32ZBKB-NEXT: rev8 a0, a0 +; RV32ZBKB-NEXT: brev8 a1, a0 +; RV32ZBKB-NEXT: mv a0, a2 +; RV32ZBKB-NEXT: ret +; +; RV64ZBKB-LABEL: test_bitreverse_i64: +; RV64ZBKB: # %bb.0: +; RV64ZBKB-NEXT: rev8 a0, a0 +; RV64ZBKB-NEXT: brev8 a0, a0 +; RV64ZBKB-NEXT: ret %tmp = call i64 @llvm.bitreverse.i64(i64 %a) ret i64 %tmp } +; FIXME: Merge the away the two rev8s in the Zbkb code. define i16 @test_bswap_bitreverse_i16(i16 %a) nounwind { ; RV32I-LABEL: test_bswap_bitreverse_i16: ; RV32I: # %bb.0: @@ -739,60 +796,79 @@ ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret ; -; RV32ZB-LABEL: test_bswap_bitreverse_i16: -; RV32ZB: # %bb.0: -; RV32ZB-NEXT: srli a1, a0, 4 -; RV32ZB-NEXT: lui a2, 1 -; RV32ZB-NEXT: addi a2, a2, -241 -; RV32ZB-NEXT: and a1, a1, a2 -; RV32ZB-NEXT: and a0, a0, a2 -; RV32ZB-NEXT: slli a0, a0, 4 -; RV32ZB-NEXT: or a0, a1, a0 -; RV32ZB-NEXT: srli a1, a0, 2 -; RV32ZB-NEXT: lui a2, 3 -; RV32ZB-NEXT: addi a2, a2, 819 -; RV32ZB-NEXT: and a1, a1, a2 -; RV32ZB-NEXT: and a0, a0, a2 -; RV32ZB-NEXT: slli a0, a0, 2 -; RV32ZB-NEXT: or a0, a1, a0 -; RV32ZB-NEXT: srli a1, a0, 1 -; RV32ZB-NEXT: lui a2, 5 -; RV32ZB-NEXT: addi a2, a2, 1365 -; RV32ZB-NEXT: and a1, a1, a2 -; RV32ZB-NEXT: and a0, a0, a2 -; RV32ZB-NEXT: slli a0, a0, 1 -; RV32ZB-NEXT: or a0, a1, a0 -; RV32ZB-NEXT: ret +; RV32ZBB-LABEL: test_bswap_bitreverse_i16: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: srli a1, a0, 4 +; RV32ZBB-NEXT: lui a2, 1 +; RV32ZBB-NEXT: addi a2, a2, -241 +; RV32ZBB-NEXT: and a1, a1, a2 +; RV32ZBB-NEXT: and a0, a0, a2 +; RV32ZBB-NEXT: slli a0, a0, 4 +; RV32ZBB-NEXT: or a0, a1, a0 +; RV32ZBB-NEXT: srli a1, a0, 2 +; RV32ZBB-NEXT: lui a2, 3 +; RV32ZBB-NEXT: addi a2, a2, 819 +; RV32ZBB-NEXT: and a1, a1, a2 +; RV32ZBB-NEXT: and a0, a0, a2 +; RV32ZBB-NEXT: slli a0, a0, 2 +; RV32ZBB-NEXT: or a0, a1, a0 +; RV32ZBB-NEXT: srli a1, a0, 1 +; RV32ZBB-NEXT: lui a2, 5 +; RV32ZBB-NEXT: addi a2, a2, 1365 +; RV32ZBB-NEXT: and a1, a1, a2 +; RV32ZBB-NEXT: and a0, a0, a2 +; RV32ZBB-NEXT: slli a0, a0, 1 +; RV32ZBB-NEXT: or a0, a1, a0 +; RV32ZBB-NEXT: ret ; -; RV64ZB-LABEL: test_bswap_bitreverse_i16: -; RV64ZB: # %bb.0: -; RV64ZB-NEXT: srli a1, a0, 4 -; RV64ZB-NEXT: lui a2, 1 -; RV64ZB-NEXT: addiw a2, a2, -241 -; RV64ZB-NEXT: and a1, a1, a2 -; RV64ZB-NEXT: and a0, a0, a2 -; RV64ZB-NEXT: slli a0, a0, 4 -; RV64ZB-NEXT: or a0, a1, a0 -; RV64ZB-NEXT: srli a1, a0, 2 -; RV64ZB-NEXT: lui a2, 3 -; RV64ZB-NEXT: addiw a2, a2, 819 -; RV64ZB-NEXT: and a1, a1, a2 -; RV64ZB-NEXT: and a0, a0, a2 -; RV64ZB-NEXT: slli a0, a0, 2 -; RV64ZB-NEXT: or a0, a1, a0 -; RV64ZB-NEXT: srli a1, a0, 1 -; RV64ZB-NEXT: lui a2, 5 -; RV64ZB-NEXT: addiw a2, a2, 1365 -; RV64ZB-NEXT: and a1, a1, a2 -; RV64ZB-NEXT: and a0, a0, a2 -; RV64ZB-NEXT: slli a0, a0, 1 -; RV64ZB-NEXT: or a0, a1, a0 -; RV64ZB-NEXT: ret +; RV64ZBB-LABEL: test_bswap_bitreverse_i16: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: srli a1, a0, 4 +; RV64ZBB-NEXT: lui a2, 1 +; RV64ZBB-NEXT: addiw a2, a2, -241 +; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: and a0, a0, a2 +; RV64ZBB-NEXT: slli a0, a0, 4 +; RV64ZBB-NEXT: or a0, a1, a0 +; RV64ZBB-NEXT: srli a1, a0, 2 +; RV64ZBB-NEXT: lui a2, 3 +; RV64ZBB-NEXT: addiw a2, a2, 819 +; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: and a0, a0, a2 +; RV64ZBB-NEXT: slli a0, a0, 2 +; RV64ZBB-NEXT: or a0, a1, a0 +; RV64ZBB-NEXT: srli a1, a0, 1 +; RV64ZBB-NEXT: lui a2, 5 +; RV64ZBB-NEXT: addiw a2, a2, 1365 +; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: and a0, a0, a2 +; RV64ZBB-NEXT: slli a0, a0, 1 +; RV64ZBB-NEXT: or a0, a1, a0 +; RV64ZBB-NEXT: ret +; +; RV32ZBKB-LABEL: test_bswap_bitreverse_i16: +; RV32ZBKB: # %bb.0: +; RV32ZBKB-NEXT: rev8 a0, a0 +; RV32ZBKB-NEXT: srli a0, a0, 16 +; RV32ZBKB-NEXT: rev8 a0, a0 +; RV32ZBKB-NEXT: brev8 a0, a0 +; RV32ZBKB-NEXT: srli a0, a0, 16 +; RV32ZBKB-NEXT: ret +; +; RV64ZBKB-LABEL: test_bswap_bitreverse_i16: +; RV64ZBKB: # %bb.0: +; RV64ZBKB-NEXT: rev8 a0, a0 +; RV64ZBKB-NEXT: srli a0, a0, 48 +; RV64ZBKB-NEXT: rev8 a0, a0 +; RV64ZBKB-NEXT: brev8 a0, a0 +; RV64ZBKB-NEXT: srli a0, a0, 48 +; RV64ZBKB-NEXT: ret %tmp = call i16 @llvm.bswap.i16(i16 %a) %tmp2 = call i16 @llvm.bitreverse.i16(i16 %tmp) ret i16 %tmp2 } +; FIXME: Merge the away the two rev8s in the Zbkb code. define i32 @test_bswap_bitreverse_i32(i32 %a) nounwind { ; RV32I-LABEL: test_bswap_bitreverse_i32: ; RV32I: # %bb.0: @@ -844,55 +920,69 @@ ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret ; -; RV32ZB-LABEL: test_bswap_bitreverse_i32: -; RV32ZB: # %bb.0: -; RV32ZB-NEXT: srli a1, a0, 4 -; RV32ZB-NEXT: lui a2, 61681 -; RV32ZB-NEXT: addi a2, a2, -241 -; RV32ZB-NEXT: and a1, a1, a2 -; RV32ZB-NEXT: and a0, a0, a2 -; RV32ZB-NEXT: slli a0, a0, 4 -; RV32ZB-NEXT: or a0, a1, a0 -; RV32ZB-NEXT: srli a1, a0, 2 -; RV32ZB-NEXT: lui a2, 209715 -; RV32ZB-NEXT: addi a2, a2, 819 -; RV32ZB-NEXT: and a1, a1, a2 -; RV32ZB-NEXT: and a0, a0, a2 -; RV32ZB-NEXT: slli a0, a0, 2 -; RV32ZB-NEXT: or a0, a1, a0 -; RV32ZB-NEXT: srli a1, a0, 1 -; RV32ZB-NEXT: lui a2, 349525 -; RV32ZB-NEXT: addi a2, a2, 1365 -; RV32ZB-NEXT: and a1, a1, a2 -; RV32ZB-NEXT: and a0, a0, a2 -; RV32ZB-NEXT: slli a0, a0, 1 -; RV32ZB-NEXT: or a0, a1, a0 -; RV32ZB-NEXT: ret +; RV32ZBB-LABEL: test_bswap_bitreverse_i32: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: srli a1, a0, 4 +; RV32ZBB-NEXT: lui a2, 61681 +; RV32ZBB-NEXT: addi a2, a2, -241 +; RV32ZBB-NEXT: and a1, a1, a2 +; RV32ZBB-NEXT: and a0, a0, a2 +; RV32ZBB-NEXT: slli a0, a0, 4 +; RV32ZBB-NEXT: or a0, a1, a0 +; RV32ZBB-NEXT: srli a1, a0, 2 +; RV32ZBB-NEXT: lui a2, 209715 +; RV32ZBB-NEXT: addi a2, a2, 819 +; RV32ZBB-NEXT: and a1, a1, a2 +; RV32ZBB-NEXT: and a0, a0, a2 +; RV32ZBB-NEXT: slli a0, a0, 2 +; RV32ZBB-NEXT: or a0, a1, a0 +; RV32ZBB-NEXT: srli a1, a0, 1 +; RV32ZBB-NEXT: lui a2, 349525 +; RV32ZBB-NEXT: addi a2, a2, 1365 +; RV32ZBB-NEXT: and a1, a1, a2 +; RV32ZBB-NEXT: and a0, a0, a2 +; RV32ZBB-NEXT: slli a0, a0, 1 +; RV32ZBB-NEXT: or a0, a1, a0 +; RV32ZBB-NEXT: ret ; -; RV64ZB-LABEL: test_bswap_bitreverse_i32: -; RV64ZB: # %bb.0: -; RV64ZB-NEXT: srli a1, a0, 4 -; RV64ZB-NEXT: lui a2, 61681 -; RV64ZB-NEXT: addiw a2, a2, -241 -; RV64ZB-NEXT: and a1, a1, a2 -; RV64ZB-NEXT: and a0, a0, a2 -; RV64ZB-NEXT: slliw a0, a0, 4 -; RV64ZB-NEXT: or a0, a1, a0 -; RV64ZB-NEXT: srli a1, a0, 2 -; RV64ZB-NEXT: lui a2, 209715 -; RV64ZB-NEXT: addiw a2, a2, 819 -; RV64ZB-NEXT: and a1, a1, a2 -; RV64ZB-NEXT: and a0, a0, a2 -; RV64ZB-NEXT: slliw a0, a0, 2 -; RV64ZB-NEXT: or a0, a1, a0 -; RV64ZB-NEXT: srli a1, a0, 1 -; RV64ZB-NEXT: lui a2, 349525 -; RV64ZB-NEXT: addiw a2, a2, 1365 -; RV64ZB-NEXT: and a1, a1, a2 -; RV64ZB-NEXT: and a0, a0, a2 -; RV64ZB-NEXT: slliw a0, a0, 1 -; RV64ZB-NEXT: or a0, a1, a0 -; RV64ZB-NEXT: ret +; RV64ZBB-LABEL: test_bswap_bitreverse_i32: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: srli a1, a0, 4 +; RV64ZBB-NEXT: lui a2, 61681 +; RV64ZBB-NEXT: addiw a2, a2, -241 +; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: and a0, a0, a2 +; RV64ZBB-NEXT: slliw a0, a0, 4 +; RV64ZBB-NEXT: or a0, a1, a0 +; RV64ZBB-NEXT: srli a1, a0, 2 +; RV64ZBB-NEXT: lui a2, 209715 +; RV64ZBB-NEXT: addiw a2, a2, 819 +; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: and a0, a0, a2 +; RV64ZBB-NEXT: slliw a0, a0, 2 +; RV64ZBB-NEXT: or a0, a1, a0 +; RV64ZBB-NEXT: srli a1, a0, 1 +; RV64ZBB-NEXT: lui a2, 349525 +; RV64ZBB-NEXT: addiw a2, a2, 1365 +; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: and a0, a0, a2 +; RV64ZBB-NEXT: slliw a0, a0, 1 +; RV64ZBB-NEXT: or a0, a1, a0 +; RV64ZBB-NEXT: ret +; +; RV32ZBKB-LABEL: test_bswap_bitreverse_i32: +; RV32ZBKB: # %bb.0: +; RV32ZBKB-NEXT: brev8 a0, a0 +; RV32ZBKB-NEXT: ret +; +; RV64ZBKB-LABEL: test_bswap_bitreverse_i32: +; RV64ZBKB: # %bb.0: +; RV64ZBKB-NEXT: rev8 a0, a0 +; RV64ZBKB-NEXT: srli a0, a0, 32 +; RV64ZBKB-NEXT: rev8 a0, a0 +; RV64ZBKB-NEXT: brev8 a0, a0 +; RV64ZBKB-NEXT: srli a0, a0, 32 +; RV64ZBKB-NEXT: ret %tmp = call i32 @llvm.bswap.i32(i32 %a) %tmp2 = call i32 @llvm.bitreverse.i32(i32 %tmp) ret i32 %tmp2 @@ -964,75 +1054,87 @@ ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: ret ; -; RV32ZB-LABEL: test_bswap_bitreverse_i64: -; RV32ZB: # %bb.0: -; RV32ZB-NEXT: srli a2, a0, 4 -; RV32ZB-NEXT: lui a3, 61681 -; RV32ZB-NEXT: addi a3, a3, -241 -; RV32ZB-NEXT: and a2, a2, a3 -; RV32ZB-NEXT: and a0, a0, a3 -; RV32ZB-NEXT: slli a0, a0, 4 -; RV32ZB-NEXT: or a0, a2, a0 -; RV32ZB-NEXT: srli a2, a0, 2 -; RV32ZB-NEXT: lui a4, 209715 -; RV32ZB-NEXT: addi a4, a4, 819 -; RV32ZB-NEXT: and a2, a2, a4 -; RV32ZB-NEXT: and a0, a0, a4 -; RV32ZB-NEXT: slli a0, a0, 2 -; RV32ZB-NEXT: or a0, a2, a0 -; RV32ZB-NEXT: srli a2, a0, 1 -; RV32ZB-NEXT: lui a5, 349525 -; RV32ZB-NEXT: addi a5, a5, 1365 -; RV32ZB-NEXT: and a2, a2, a5 -; RV32ZB-NEXT: and a0, a0, a5 -; RV32ZB-NEXT: slli a0, a0, 1 -; RV32ZB-NEXT: or a0, a2, a0 -; RV32ZB-NEXT: srli a2, a1, 4 -; RV32ZB-NEXT: and a2, a2, a3 -; RV32ZB-NEXT: and a1, a1, a3 -; RV32ZB-NEXT: slli a1, a1, 4 -; RV32ZB-NEXT: or a1, a2, a1 -; RV32ZB-NEXT: srli a2, a1, 2 -; RV32ZB-NEXT: and a2, a2, a4 -; RV32ZB-NEXT: and a1, a1, a4 -; RV32ZB-NEXT: slli a1, a1, 2 -; RV32ZB-NEXT: or a1, a2, a1 -; RV32ZB-NEXT: srli a2, a1, 1 -; RV32ZB-NEXT: and a2, a2, a5 -; RV32ZB-NEXT: and a1, a1, a5 -; RV32ZB-NEXT: slli a1, a1, 1 -; RV32ZB-NEXT: or a1, a2, a1 -; RV32ZB-NEXT: ret +; RV32ZBB-LABEL: test_bswap_bitreverse_i64: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: srli a2, a0, 4 +; RV32ZBB-NEXT: lui a3, 61681 +; RV32ZBB-NEXT: addi a3, a3, -241 +; RV32ZBB-NEXT: and a2, a2, a3 +; RV32ZBB-NEXT: and a0, a0, a3 +; RV32ZBB-NEXT: slli a0, a0, 4 +; RV32ZBB-NEXT: or a0, a2, a0 +; RV32ZBB-NEXT: srli a2, a0, 2 +; RV32ZBB-NEXT: lui a4, 209715 +; RV32ZBB-NEXT: addi a4, a4, 819 +; RV32ZBB-NEXT: and a2, a2, a4 +; RV32ZBB-NEXT: and a0, a0, a4 +; RV32ZBB-NEXT: slli a0, a0, 2 +; RV32ZBB-NEXT: or a0, a2, a0 +; RV32ZBB-NEXT: srli a2, a0, 1 +; RV32ZBB-NEXT: lui a5, 349525 +; RV32ZBB-NEXT: addi a5, a5, 1365 +; RV32ZBB-NEXT: and a2, a2, a5 +; RV32ZBB-NEXT: and a0, a0, a5 +; RV32ZBB-NEXT: slli a0, a0, 1 +; RV32ZBB-NEXT: or a0, a2, a0 +; RV32ZBB-NEXT: srli a2, a1, 4 +; RV32ZBB-NEXT: and a2, a2, a3 +; RV32ZBB-NEXT: and a1, a1, a3 +; RV32ZBB-NEXT: slli a1, a1, 4 +; RV32ZBB-NEXT: or a1, a2, a1 +; RV32ZBB-NEXT: srli a2, a1, 2 +; RV32ZBB-NEXT: and a2, a2, a4 +; RV32ZBB-NEXT: and a1, a1, a4 +; RV32ZBB-NEXT: slli a1, a1, 2 +; RV32ZBB-NEXT: or a1, a2, a1 +; RV32ZBB-NEXT: srli a2, a1, 1 +; RV32ZBB-NEXT: and a2, a2, a5 +; RV32ZBB-NEXT: and a1, a1, a5 +; RV32ZBB-NEXT: slli a1, a1, 1 +; RV32ZBB-NEXT: or a1, a2, a1 +; RV32ZBB-NEXT: ret ; -; RV64ZB-LABEL: test_bswap_bitreverse_i64: -; RV64ZB: # %bb.0: -; RV64ZB-NEXT: lui a1, %hi(.LCPI9_0) -; RV64ZB-NEXT: ld a1, %lo(.LCPI9_0)(a1) -; RV64ZB-NEXT: srli a2, a0, 4 -; RV64ZB-NEXT: and a2, a2, a1 -; RV64ZB-NEXT: and a0, a0, a1 -; RV64ZB-NEXT: lui a1, %hi(.LCPI9_1) -; RV64ZB-NEXT: ld a1, %lo(.LCPI9_1)(a1) -; RV64ZB-NEXT: slli a0, a0, 4 -; RV64ZB-NEXT: or a0, a2, a0 -; RV64ZB-NEXT: srli a2, a0, 2 -; RV64ZB-NEXT: and a2, a2, a1 -; RV64ZB-NEXT: and a0, a0, a1 -; RV64ZB-NEXT: lui a1, %hi(.LCPI9_2) -; RV64ZB-NEXT: ld a1, %lo(.LCPI9_2)(a1) -; RV64ZB-NEXT: slli a0, a0, 2 -; RV64ZB-NEXT: or a0, a2, a0 -; RV64ZB-NEXT: srli a2, a0, 1 -; RV64ZB-NEXT: and a2, a2, a1 -; RV64ZB-NEXT: and a0, a0, a1 -; RV64ZB-NEXT: slli a0, a0, 1 -; RV64ZB-NEXT: or a0, a2, a0 -; RV64ZB-NEXT: ret +; RV64ZBB-LABEL: test_bswap_bitreverse_i64: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: lui a1, %hi(.LCPI9_0) +; RV64ZBB-NEXT: ld a1, %lo(.LCPI9_0)(a1) +; RV64ZBB-NEXT: srli a2, a0, 4 +; RV64ZBB-NEXT: and a2, a2, a1 +; RV64ZBB-NEXT: and a0, a0, a1 +; RV64ZBB-NEXT: lui a1, %hi(.LCPI9_1) +; RV64ZBB-NEXT: ld a1, %lo(.LCPI9_1)(a1) +; RV64ZBB-NEXT: slli a0, a0, 4 +; RV64ZBB-NEXT: or a0, a2, a0 +; RV64ZBB-NEXT: srli a2, a0, 2 +; RV64ZBB-NEXT: and a2, a2, a1 +; RV64ZBB-NEXT: and a0, a0, a1 +; RV64ZBB-NEXT: lui a1, %hi(.LCPI9_2) +; RV64ZBB-NEXT: ld a1, %lo(.LCPI9_2)(a1) +; RV64ZBB-NEXT: slli a0, a0, 2 +; RV64ZBB-NEXT: or a0, a2, a0 +; RV64ZBB-NEXT: srli a2, a0, 1 +; RV64ZBB-NEXT: and a2, a2, a1 +; RV64ZBB-NEXT: and a0, a0, a1 +; RV64ZBB-NEXT: slli a0, a0, 1 +; RV64ZBB-NEXT: or a0, a2, a0 +; RV64ZBB-NEXT: ret +; +; RV32ZBKB-LABEL: test_bswap_bitreverse_i64: +; RV32ZBKB: # %bb.0: +; RV32ZBKB-NEXT: brev8 a0, a0 +; RV32ZBKB-NEXT: brev8 a1, a1 +; RV32ZBKB-NEXT: ret +; +; RV64ZBKB-LABEL: test_bswap_bitreverse_i64: +; RV64ZBKB: # %bb.0: +; RV64ZBKB-NEXT: brev8 a0, a0 +; RV64ZBKB-NEXT: ret %tmp = call i64 @llvm.bswap.i64(i64 %a) %tmp2 = call i64 @llvm.bitreverse.i64(i64 %tmp) ret i64 %tmp2 } +; FIXME: Merge the away the two rev8s in the Zbkb code. define i16 @test_bitreverse_bswap_i16(i16 %a) nounwind { ; RV32I-LABEL: test_bitreverse_bswap_i16: ; RV32I: # %bb.0: @@ -1084,60 +1186,79 @@ ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret ; -; RV32ZB-LABEL: test_bitreverse_bswap_i16: -; RV32ZB: # %bb.0: -; RV32ZB-NEXT: srli a1, a0, 4 -; RV32ZB-NEXT: lui a2, 1 -; RV32ZB-NEXT: addi a2, a2, -241 -; RV32ZB-NEXT: and a1, a1, a2 -; RV32ZB-NEXT: and a0, a0, a2 -; RV32ZB-NEXT: slli a0, a0, 4 -; RV32ZB-NEXT: or a0, a1, a0 -; RV32ZB-NEXT: srli a1, a0, 2 -; RV32ZB-NEXT: lui a2, 3 -; RV32ZB-NEXT: addi a2, a2, 819 -; RV32ZB-NEXT: and a1, a1, a2 -; RV32ZB-NEXT: and a0, a0, a2 -; RV32ZB-NEXT: slli a0, a0, 2 -; RV32ZB-NEXT: or a0, a1, a0 -; RV32ZB-NEXT: srli a1, a0, 1 -; RV32ZB-NEXT: lui a2, 5 -; RV32ZB-NEXT: addi a2, a2, 1365 -; RV32ZB-NEXT: and a1, a1, a2 -; RV32ZB-NEXT: and a0, a0, a2 -; RV32ZB-NEXT: slli a0, a0, 1 -; RV32ZB-NEXT: or a0, a1, a0 -; RV32ZB-NEXT: ret +; RV32ZBB-LABEL: test_bitreverse_bswap_i16: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: srli a1, a0, 4 +; RV32ZBB-NEXT: lui a2, 1 +; RV32ZBB-NEXT: addi a2, a2, -241 +; RV32ZBB-NEXT: and a1, a1, a2 +; RV32ZBB-NEXT: and a0, a0, a2 +; RV32ZBB-NEXT: slli a0, a0, 4 +; RV32ZBB-NEXT: or a0, a1, a0 +; RV32ZBB-NEXT: srli a1, a0, 2 +; RV32ZBB-NEXT: lui a2, 3 +; RV32ZBB-NEXT: addi a2, a2, 819 +; RV32ZBB-NEXT: and a1, a1, a2 +; RV32ZBB-NEXT: and a0, a0, a2 +; RV32ZBB-NEXT: slli a0, a0, 2 +; RV32ZBB-NEXT: or a0, a1, a0 +; RV32ZBB-NEXT: srli a1, a0, 1 +; RV32ZBB-NEXT: lui a2, 5 +; RV32ZBB-NEXT: addi a2, a2, 1365 +; RV32ZBB-NEXT: and a1, a1, a2 +; RV32ZBB-NEXT: and a0, a0, a2 +; RV32ZBB-NEXT: slli a0, a0, 1 +; RV32ZBB-NEXT: or a0, a1, a0 +; RV32ZBB-NEXT: ret ; -; RV64ZB-LABEL: test_bitreverse_bswap_i16: -; RV64ZB: # %bb.0: -; RV64ZB-NEXT: srli a1, a0, 4 -; RV64ZB-NEXT: lui a2, 1 -; RV64ZB-NEXT: addiw a2, a2, -241 -; RV64ZB-NEXT: and a1, a1, a2 -; RV64ZB-NEXT: and a0, a0, a2 -; RV64ZB-NEXT: slli a0, a0, 4 -; RV64ZB-NEXT: or a0, a1, a0 -; RV64ZB-NEXT: srli a1, a0, 2 -; RV64ZB-NEXT: lui a2, 3 -; RV64ZB-NEXT: addiw a2, a2, 819 -; RV64ZB-NEXT: and a1, a1, a2 -; RV64ZB-NEXT: and a0, a0, a2 -; RV64ZB-NEXT: slli a0, a0, 2 -; RV64ZB-NEXT: or a0, a1, a0 -; RV64ZB-NEXT: srli a1, a0, 1 -; RV64ZB-NEXT: lui a2, 5 -; RV64ZB-NEXT: addiw a2, a2, 1365 -; RV64ZB-NEXT: and a1, a1, a2 -; RV64ZB-NEXT: and a0, a0, a2 -; RV64ZB-NEXT: slli a0, a0, 1 -; RV64ZB-NEXT: or a0, a1, a0 -; RV64ZB-NEXT: ret +; RV64ZBB-LABEL: test_bitreverse_bswap_i16: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: srli a1, a0, 4 +; RV64ZBB-NEXT: lui a2, 1 +; RV64ZBB-NEXT: addiw a2, a2, -241 +; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: and a0, a0, a2 +; RV64ZBB-NEXT: slli a0, a0, 4 +; RV64ZBB-NEXT: or a0, a1, a0 +; RV64ZBB-NEXT: srli a1, a0, 2 +; RV64ZBB-NEXT: lui a2, 3 +; RV64ZBB-NEXT: addiw a2, a2, 819 +; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: and a0, a0, a2 +; RV64ZBB-NEXT: slli a0, a0, 2 +; RV64ZBB-NEXT: or a0, a1, a0 +; RV64ZBB-NEXT: srli a1, a0, 1 +; RV64ZBB-NEXT: lui a2, 5 +; RV64ZBB-NEXT: addiw a2, a2, 1365 +; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: and a0, a0, a2 +; RV64ZBB-NEXT: slli a0, a0, 1 +; RV64ZBB-NEXT: or a0, a1, a0 +; RV64ZBB-NEXT: ret +; +; RV32ZBKB-LABEL: test_bitreverse_bswap_i16: +; RV32ZBKB: # %bb.0: +; RV32ZBKB-NEXT: rev8 a0, a0 +; RV32ZBKB-NEXT: srli a0, a0, 16 +; RV32ZBKB-NEXT: rev8 a0, a0 +; RV32ZBKB-NEXT: brev8 a0, a0 +; RV32ZBKB-NEXT: srli a0, a0, 16 +; RV32ZBKB-NEXT: ret +; +; RV64ZBKB-LABEL: test_bitreverse_bswap_i16: +; RV64ZBKB: # %bb.0: +; RV64ZBKB-NEXT: rev8 a0, a0 +; RV64ZBKB-NEXT: srli a0, a0, 48 +; RV64ZBKB-NEXT: rev8 a0, a0 +; RV64ZBKB-NEXT: brev8 a0, a0 +; RV64ZBKB-NEXT: srli a0, a0, 48 +; RV64ZBKB-NEXT: ret %tmp = call i16 @llvm.bitreverse.i16(i16 %a) %tmp2 = call i16 @llvm.bswap.i16(i16 %tmp) ret i16 %tmp2 } +; FIXME: Merge the away the two rev8s in the Zbkb code. define i32 @test_bitreverse_bswap_i32(i32 %a) nounwind { ; RV32I-LABEL: test_bitreverse_bswap_i32: ; RV32I: # %bb.0: @@ -1189,55 +1310,69 @@ ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret ; -; RV32ZB-LABEL: test_bitreverse_bswap_i32: -; RV32ZB: # %bb.0: -; RV32ZB-NEXT: srli a1, a0, 4 -; RV32ZB-NEXT: lui a2, 61681 -; RV32ZB-NEXT: addi a2, a2, -241 -; RV32ZB-NEXT: and a1, a1, a2 -; RV32ZB-NEXT: and a0, a0, a2 -; RV32ZB-NEXT: slli a0, a0, 4 -; RV32ZB-NEXT: or a0, a1, a0 -; RV32ZB-NEXT: srli a1, a0, 2 -; RV32ZB-NEXT: lui a2, 209715 -; RV32ZB-NEXT: addi a2, a2, 819 -; RV32ZB-NEXT: and a1, a1, a2 -; RV32ZB-NEXT: and a0, a0, a2 -; RV32ZB-NEXT: slli a0, a0, 2 -; RV32ZB-NEXT: or a0, a1, a0 -; RV32ZB-NEXT: srli a1, a0, 1 -; RV32ZB-NEXT: lui a2, 349525 -; RV32ZB-NEXT: addi a2, a2, 1365 -; RV32ZB-NEXT: and a1, a1, a2 -; RV32ZB-NEXT: and a0, a0, a2 -; RV32ZB-NEXT: slli a0, a0, 1 -; RV32ZB-NEXT: or a0, a1, a0 -; RV32ZB-NEXT: ret +; RV32ZBB-LABEL: test_bitreverse_bswap_i32: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: srli a1, a0, 4 +; RV32ZBB-NEXT: lui a2, 61681 +; RV32ZBB-NEXT: addi a2, a2, -241 +; RV32ZBB-NEXT: and a1, a1, a2 +; RV32ZBB-NEXT: and a0, a0, a2 +; RV32ZBB-NEXT: slli a0, a0, 4 +; RV32ZBB-NEXT: or a0, a1, a0 +; RV32ZBB-NEXT: srli a1, a0, 2 +; RV32ZBB-NEXT: lui a2, 209715 +; RV32ZBB-NEXT: addi a2, a2, 819 +; RV32ZBB-NEXT: and a1, a1, a2 +; RV32ZBB-NEXT: and a0, a0, a2 +; RV32ZBB-NEXT: slli a0, a0, 2 +; RV32ZBB-NEXT: or a0, a1, a0 +; RV32ZBB-NEXT: srli a1, a0, 1 +; RV32ZBB-NEXT: lui a2, 349525 +; RV32ZBB-NEXT: addi a2, a2, 1365 +; RV32ZBB-NEXT: and a1, a1, a2 +; RV32ZBB-NEXT: and a0, a0, a2 +; RV32ZBB-NEXT: slli a0, a0, 1 +; RV32ZBB-NEXT: or a0, a1, a0 +; RV32ZBB-NEXT: ret ; -; RV64ZB-LABEL: test_bitreverse_bswap_i32: -; RV64ZB: # %bb.0: -; RV64ZB-NEXT: srli a1, a0, 4 -; RV64ZB-NEXT: lui a2, 61681 -; RV64ZB-NEXT: addiw a2, a2, -241 -; RV64ZB-NEXT: and a1, a1, a2 -; RV64ZB-NEXT: and a0, a0, a2 -; RV64ZB-NEXT: slliw a0, a0, 4 -; RV64ZB-NEXT: or a0, a1, a0 -; RV64ZB-NEXT: srli a1, a0, 2 -; RV64ZB-NEXT: lui a2, 209715 -; RV64ZB-NEXT: addiw a2, a2, 819 -; RV64ZB-NEXT: and a1, a1, a2 -; RV64ZB-NEXT: and a0, a0, a2 -; RV64ZB-NEXT: slliw a0, a0, 2 -; RV64ZB-NEXT: or a0, a1, a0 -; RV64ZB-NEXT: srli a1, a0, 1 -; RV64ZB-NEXT: lui a2, 349525 -; RV64ZB-NEXT: addiw a2, a2, 1365 -; RV64ZB-NEXT: and a1, a1, a2 -; RV64ZB-NEXT: and a0, a0, a2 -; RV64ZB-NEXT: slliw a0, a0, 1 -; RV64ZB-NEXT: or a0, a1, a0 -; RV64ZB-NEXT: ret +; RV64ZBB-LABEL: test_bitreverse_bswap_i32: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: srli a1, a0, 4 +; RV64ZBB-NEXT: lui a2, 61681 +; RV64ZBB-NEXT: addiw a2, a2, -241 +; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: and a0, a0, a2 +; RV64ZBB-NEXT: slliw a0, a0, 4 +; RV64ZBB-NEXT: or a0, a1, a0 +; RV64ZBB-NEXT: srli a1, a0, 2 +; RV64ZBB-NEXT: lui a2, 209715 +; RV64ZBB-NEXT: addiw a2, a2, 819 +; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: and a0, a0, a2 +; RV64ZBB-NEXT: slliw a0, a0, 2 +; RV64ZBB-NEXT: or a0, a1, a0 +; RV64ZBB-NEXT: srli a1, a0, 1 +; RV64ZBB-NEXT: lui a2, 349525 +; RV64ZBB-NEXT: addiw a2, a2, 1365 +; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: and a0, a0, a2 +; RV64ZBB-NEXT: slliw a0, a0, 1 +; RV64ZBB-NEXT: or a0, a1, a0 +; RV64ZBB-NEXT: ret +; +; RV32ZBKB-LABEL: test_bitreverse_bswap_i32: +; RV32ZBKB: # %bb.0: +; RV32ZBKB-NEXT: brev8 a0, a0 +; RV32ZBKB-NEXT: ret +; +; RV64ZBKB-LABEL: test_bitreverse_bswap_i32: +; RV64ZBKB: # %bb.0: +; RV64ZBKB-NEXT: rev8 a0, a0 +; RV64ZBKB-NEXT: srli a0, a0, 32 +; RV64ZBKB-NEXT: rev8 a0, a0 +; RV64ZBKB-NEXT: brev8 a0, a0 +; RV64ZBKB-NEXT: srli a0, a0, 32 +; RV64ZBKB-NEXT: ret %tmp = call i32 @llvm.bitreverse.i32(i32 %a) %tmp2 = call i32 @llvm.bswap.i32(i32 %tmp) ret i32 %tmp2 @@ -1309,70 +1444,81 @@ ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: ret ; -; RV32ZB-LABEL: test_bitreverse_bswap_i64: -; RV32ZB: # %bb.0: -; RV32ZB-NEXT: srli a2, a0, 4 -; RV32ZB-NEXT: lui a3, 61681 -; RV32ZB-NEXT: addi a3, a3, -241 -; RV32ZB-NEXT: and a2, a2, a3 -; RV32ZB-NEXT: and a0, a0, a3 -; RV32ZB-NEXT: slli a0, a0, 4 -; RV32ZB-NEXT: or a0, a2, a0 -; RV32ZB-NEXT: srli a2, a0, 2 -; RV32ZB-NEXT: lui a4, 209715 -; RV32ZB-NEXT: addi a4, a4, 819 -; RV32ZB-NEXT: and a2, a2, a4 -; RV32ZB-NEXT: and a0, a0, a4 -; RV32ZB-NEXT: slli a0, a0, 2 -; RV32ZB-NEXT: or a0, a2, a0 -; RV32ZB-NEXT: srli a2, a0, 1 -; RV32ZB-NEXT: lui a5, 349525 -; RV32ZB-NEXT: addi a5, a5, 1365 -; RV32ZB-NEXT: and a2, a2, a5 -; RV32ZB-NEXT: and a0, a0, a5 -; RV32ZB-NEXT: slli a0, a0, 1 -; RV32ZB-NEXT: or a0, a2, a0 -; RV32ZB-NEXT: srli a2, a1, 4 -; RV32ZB-NEXT: and a2, a2, a3 -; RV32ZB-NEXT: and a1, a1, a3 -; RV32ZB-NEXT: slli a1, a1, 4 -; RV32ZB-NEXT: or a1, a2, a1 -; RV32ZB-NEXT: srli a2, a1, 2 -; RV32ZB-NEXT: and a2, a2, a4 -; RV32ZB-NEXT: and a1, a1, a4 -; RV32ZB-NEXT: slli a1, a1, 2 -; RV32ZB-NEXT: or a1, a2, a1 -; RV32ZB-NEXT: srli a2, a1, 1 -; RV32ZB-NEXT: and a2, a2, a5 -; RV32ZB-NEXT: and a1, a1, a5 -; RV32ZB-NEXT: slli a1, a1, 1 -; RV32ZB-NEXT: or a1, a2, a1 -; RV32ZB-NEXT: ret +; RV32ZBB-LABEL: test_bitreverse_bswap_i64: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: srli a2, a0, 4 +; RV32ZBB-NEXT: lui a3, 61681 +; RV32ZBB-NEXT: addi a3, a3, -241 +; RV32ZBB-NEXT: and a2, a2, a3 +; RV32ZBB-NEXT: and a0, a0, a3 +; RV32ZBB-NEXT: slli a0, a0, 4 +; RV32ZBB-NEXT: or a0, a2, a0 +; RV32ZBB-NEXT: srli a2, a0, 2 +; RV32ZBB-NEXT: lui a4, 209715 +; RV32ZBB-NEXT: addi a4, a4, 819 +; RV32ZBB-NEXT: and a2, a2, a4 +; RV32ZBB-NEXT: and a0, a0, a4 +; RV32ZBB-NEXT: slli a0, a0, 2 +; RV32ZBB-NEXT: or a0, a2, a0 +; RV32ZBB-NEXT: srli a2, a0, 1 +; RV32ZBB-NEXT: lui a5, 349525 +; RV32ZBB-NEXT: addi a5, a5, 1365 +; RV32ZBB-NEXT: and a2, a2, a5 +; RV32ZBB-NEXT: and a0, a0, a5 +; RV32ZBB-NEXT: slli a0, a0, 1 +; RV32ZBB-NEXT: or a0, a2, a0 +; RV32ZBB-NEXT: srli a2, a1, 4 +; RV32ZBB-NEXT: and a2, a2, a3 +; RV32ZBB-NEXT: and a1, a1, a3 +; RV32ZBB-NEXT: slli a1, a1, 4 +; RV32ZBB-NEXT: or a1, a2, a1 +; RV32ZBB-NEXT: srli a2, a1, 2 +; RV32ZBB-NEXT: and a2, a2, a4 +; RV32ZBB-NEXT: and a1, a1, a4 +; RV32ZBB-NEXT: slli a1, a1, 2 +; RV32ZBB-NEXT: or a1, a2, a1 +; RV32ZBB-NEXT: srli a2, a1, 1 +; RV32ZBB-NEXT: and a2, a2, a5 +; RV32ZBB-NEXT: and a1, a1, a5 +; RV32ZBB-NEXT: slli a1, a1, 1 +; RV32ZBB-NEXT: or a1, a2, a1 +; RV32ZBB-NEXT: ret ; -; RV64ZB-LABEL: test_bitreverse_bswap_i64: -; RV64ZB: # %bb.0: -; RV64ZB-NEXT: lui a1, %hi(.LCPI12_0) -; RV64ZB-NEXT: ld a1, %lo(.LCPI12_0)(a1) -; RV64ZB-NEXT: srli a2, a0, 4 -; RV64ZB-NEXT: and a2, a2, a1 -; RV64ZB-NEXT: and a0, a0, a1 -; RV64ZB-NEXT: lui a1, %hi(.LCPI12_1) -; RV64ZB-NEXT: ld a1, %lo(.LCPI12_1)(a1) -; RV64ZB-NEXT: slli a0, a0, 4 -; RV64ZB-NEXT: or a0, a2, a0 -; RV64ZB-NEXT: srli a2, a0, 2 -; RV64ZB-NEXT: and a2, a2, a1 -; RV64ZB-NEXT: and a0, a0, a1 -; RV64ZB-NEXT: lui a1, %hi(.LCPI12_2) -; RV64ZB-NEXT: ld a1, %lo(.LCPI12_2)(a1) -; RV64ZB-NEXT: slli a0, a0, 2 -; RV64ZB-NEXT: or a0, a2, a0 -; RV64ZB-NEXT: srli a2, a0, 1 -; RV64ZB-NEXT: and a2, a2, a1 -; RV64ZB-NEXT: and a0, a0, a1 -; RV64ZB-NEXT: slli a0, a0, 1 -; RV64ZB-NEXT: or a0, a2, a0 -; RV64ZB-NEXT: ret +; RV64ZBB-LABEL: test_bitreverse_bswap_i64: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: lui a1, %hi(.LCPI12_0) +; RV64ZBB-NEXT: ld a1, %lo(.LCPI12_0)(a1) +; RV64ZBB-NEXT: srli a2, a0, 4 +; RV64ZBB-NEXT: and a2, a2, a1 +; RV64ZBB-NEXT: and a0, a0, a1 +; RV64ZBB-NEXT: lui a1, %hi(.LCPI12_1) +; RV64ZBB-NEXT: ld a1, %lo(.LCPI12_1)(a1) +; RV64ZBB-NEXT: slli a0, a0, 4 +; RV64ZBB-NEXT: or a0, a2, a0 +; RV64ZBB-NEXT: srli a2, a0, 2 +; RV64ZBB-NEXT: and a2, a2, a1 +; RV64ZBB-NEXT: and a0, a0, a1 +; RV64ZBB-NEXT: lui a1, %hi(.LCPI12_2) +; RV64ZBB-NEXT: ld a1, %lo(.LCPI12_2)(a1) +; RV64ZBB-NEXT: slli a0, a0, 2 +; RV64ZBB-NEXT: or a0, a2, a0 +; RV64ZBB-NEXT: srli a2, a0, 1 +; RV64ZBB-NEXT: and a2, a2, a1 +; RV64ZBB-NEXT: and a0, a0, a1 +; RV64ZBB-NEXT: slli a0, a0, 1 +; RV64ZBB-NEXT: or a0, a2, a0 +; RV64ZBB-NEXT: ret +; +; RV32ZBKB-LABEL: test_bitreverse_bswap_i64: +; RV32ZBKB: # %bb.0: +; RV32ZBKB-NEXT: brev8 a0, a0 +; RV32ZBKB-NEXT: brev8 a1, a1 +; RV32ZBKB-NEXT: ret +; +; RV64ZBKB-LABEL: test_bitreverse_bswap_i64: +; RV64ZBKB: # %bb.0: +; RV64ZBKB-NEXT: brev8 a0, a0 +; RV64ZBKB-NEXT: ret %tmp = call i64 @llvm.bitreverse.i64(i64 %a) %tmp2 = call i64 @llvm.bswap.i64(i64 %tmp) ret i64 %tmp2