diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -162,7 +162,8 @@ } if (Subtarget.hasStdExtZbp()) { - setOperationAction(ISD::BITREVERSE, XLenVT, Legal); + setOperationAction(ISD::BITREVERSE, XLenVT, Custom); + setOperationAction(ISD::BSWAP, XLenVT, Custom); if (Subtarget.is64Bit()) { setOperationAction(ISD::BITREVERSE, MVT::i32, Custom); @@ -495,6 +496,20 @@ } case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); + case ISD::BSWAP: + case ISD::BITREVERSE: { + // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining. + assert(Subtarget.hasStdExtZbp() && "Unexpected custom legalisation"); + MVT VT = Op.getSimpleValueType(); + SDLoc DL(Op); + // Start with the maximum immediate value which is the bitwidth - 1. + unsigned Imm = VT.getSizeInBits() - 1; + // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits. + if (Op.getOpcode() == ISD::BSWAP) + Imm &= ~0x7U; + return DAG.getNode(RISCVISD::GREVI, DL, VT, Op.getOperand(0), + DAG.getTargetConstant(Imm, DL, Subtarget.getXLenVT())); + } } } @@ -1288,6 +1303,29 @@ return SDValue(); } +static SDValue combineGREVI(SDNode *N, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + // Combine (GREVI (GREVI x, C2), C1) -> (GREVI x, C1^C2) when C1^C2 is + // non-zero, and to x when it is. Any repeated GREVI stage undoes itself. + uint64_t ShAmt1 = N->getConstantOperandVal(1); + SDValue GREVSrc = N->getOperand(0); + + if (GREVSrc->getOpcode() != N->getOpcode()) + return SDValue(); + + uint64_t ShAmt2 = GREVSrc.getConstantOperandVal(1); + GREVSrc = GREVSrc->getOperand(0); + + uint64_t CombinedShAmt = ShAmt1 ^ ShAmt2; + if (CombinedShAmt == 0) + return GREVSrc; + + SDLoc DL(N); + return DAG.getNode( + N->getOpcode(), DL, N->getValueType(0), GREVSrc, + DAG.getTargetConstant(CombinedShAmt, DL, Subtarget.getXLenVT())); +} + SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -1383,6 +1421,11 @@ DCI.AddToWorklist(N); return SDValue(N, 0); } + + if (N->getOpcode() == RISCVISD::GREVIW) + if (SDValue V = combineGREVI(N, DCI.DAG, Subtarget)) + return V; + break; } case RISCVISD::FMV_X_ANYEXTW_RV64: { @@ -1415,23 +1458,8 @@ return DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV, DAG.getConstant(~SignBit, DL, MVT::i64)); } - case RISCVISD::GREVI: { - // Combine (GREVI (GREVI x, C2), C1) -> (GREVI x, C1^C2) when C1^C2 is - // non-zero, and to x when it is. Any repeated GREVI stage undoes itself. - SDLoc DL(N); - auto GREVSrc = N->getOperand(0); - uint64_t ShAmt1 = N->getConstantOperandVal(1); - if (GREVSrc->getOpcode() != RISCVISD::GREVI) - break; - uint64_t ShAmt2 = GREVSrc.getConstantOperandVal(1); - GREVSrc = GREVSrc->getOperand(0); - uint64_t CombinedShAmt = ShAmt1 ^ ShAmt2; - if (CombinedShAmt == 0) - return GREVSrc; - return DAG.getNode( - RISCVISD::GREVI, DL, N->getValueType(0), GREVSrc, - DAG.getTargetConstant(CombinedShAmt, DL, Subtarget.getXLenVT())); - } + case RISCVISD::GREVI: + return combineGREVI(N, DCI.DAG, Subtarget); case ISD::OR: if (auto GREV = combineORToGREV(SDValue(N, 0), DCI.DAG, Subtarget)) return GREV; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td @@ -730,17 +730,10 @@ } // Predicates = [HasStdExtZbp] let Predicates = [HasStdExtZbp, IsRV32] in { -def : Pat<(rotr (bswap GPR:$rs1), (i32 16)), (GREVI GPR:$rs1, 8)>; -def : Pat<(rotl (bswap GPR:$rs1), (i32 16)), (GREVI GPR:$rs1, 8)>; -def : Pat<(bswap GPR:$rs1), (GREVI GPR:$rs1, (i32 24))>; -def : Pat<(bitreverse GPR:$rs1), (GREVI GPR:$rs1, (i32 31))>; +def : Pat<(rotr (riscv_grevi GPR:$rs1, (i32 24)), (i32 16)), (GREVI GPR:$rs1, 8)>; +def : Pat<(rotl (riscv_grevi GPR:$rs1, (i32 24)), (i32 16)), (GREVI GPR:$rs1, 8)>; } // Predicates = [HasStdExtZbp, IsRV32] -let Predicates = [HasStdExtZbp, IsRV64] in { -def : Pat<(bswap GPR:$rs1), (GREVI GPR:$rs1, (i64 56))>; -def : Pat<(bitreverse GPR:$rs1), (GREVI GPR:$rs1, (i64 63))>; -} // Predicates = [HasStdExtZbp, IsRV64] - let Predicates = [HasStdExtZbt] in { def : Pat<(or (and (not GPR:$rs2), GPR:$rs3), (and GPR:$rs2, GPR:$rs1)), (CMIX GPR:$rs1, GPR:$rs2, GPR:$rs3)>; diff --git a/llvm/test/CodeGen/RISCV/rv32Zbp.ll b/llvm/test/CodeGen/RISCV/rv32Zbp.ll --- a/llvm/test/CodeGen/RISCV/rv32Zbp.ll +++ b/llvm/test/CodeGen/RISCV/rv32Zbp.ll @@ -1849,6 +1849,175 @@ ret i32 %2 } +define i32 @bitreverse_bswap_i32(i32 %a) { +; RV32I-LABEL: bitreverse_bswap_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: srli a1, a0, 8 +; RV32I-NEXT: lui a2, 16 +; RV32I-NEXT: addi a2, a2, -256 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: srli a3, a0, 24 +; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: slli a3, a0, 8 +; RV32I-NEXT: lui a4, 4080 +; RV32I-NEXT: and a3, a3, a4 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or a0, a0, a3 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: lui a1, 61681 +; RV32I-NEXT: addi a1, a1, -241 +; RV32I-NEXT: and a1, a0, a1 +; RV32I-NEXT: slli a1, a1, 4 +; RV32I-NEXT: lui a3, 986895 +; RV32I-NEXT: addi a3, a3, 240 +; RV32I-NEXT: and a0, a0, a3 +; RV32I-NEXT: srli a0, a0, 4 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: lui a1, 209715 +; RV32I-NEXT: addi a1, a1, 819 +; RV32I-NEXT: and a1, a0, a1 +; RV32I-NEXT: slli a1, a1, 2 +; RV32I-NEXT: lui a3, 838861 +; RV32I-NEXT: addi a3, a3, -820 +; RV32I-NEXT: and a0, a0, a3 +; RV32I-NEXT: srli a0, a0, 2 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: lui a1, 349525 +; RV32I-NEXT: addi a1, a1, 1365 +; RV32I-NEXT: and a1, a0, a1 +; RV32I-NEXT: slli a1, a1, 1 +; RV32I-NEXT: lui a3, 699051 +; RV32I-NEXT: addi a3, a3, -1366 +; RV32I-NEXT: and a0, a0, a3 +; RV32I-NEXT: srli a0, a0, 1 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: srli a1, a0, 8 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: srli a2, a0, 24 +; RV32I-NEXT: or a1, a1, a2 +; RV32I-NEXT: slli a2, a0, 8 +; RV32I-NEXT: and a2, a2, a4 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: bitreverse_bswap_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: rev.b a0, a0 +; RV32IB-NEXT: ret +; +; RV32IBP-LABEL: bitreverse_bswap_i32: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: rev.b a0, a0 +; RV32IBP-NEXT: ret + %1 = call i32 @llvm.bitreverse.i32(i32 %a) + %2 = call i32 @llvm.bswap.i32(i32 %1) + ret i32 %2 +} + +define i64 @bitreverse_bswap_i64(i64 %a) { +; RV32I-LABEL: bitreverse_bswap_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: srli a3, a1, 8 +; RV32I-NEXT: lui a2, 16 +; RV32I-NEXT: addi t1, a2, -256 +; RV32I-NEXT: and a3, a3, t1 +; RV32I-NEXT: srli a4, a1, 24 +; RV32I-NEXT: or a4, a3, a4 +; RV32I-NEXT: slli a5, a1, 8 +; RV32I-NEXT: lui a6, 4080 +; RV32I-NEXT: and a5, a5, a6 +; RV32I-NEXT: slli a1, a1, 24 +; RV32I-NEXT: or a1, a1, a5 +; RV32I-NEXT: or a1, a1, a4 +; RV32I-NEXT: lui a4, 61681 +; RV32I-NEXT: addi a7, a4, -241 +; RV32I-NEXT: and a5, a1, a7 +; RV32I-NEXT: slli a5, a5, 4 +; RV32I-NEXT: lui a3, 986895 +; RV32I-NEXT: addi t0, a3, 240 +; RV32I-NEXT: and a1, a1, t0 +; RV32I-NEXT: srli a1, a1, 4 +; RV32I-NEXT: or a1, a1, a5 +; RV32I-NEXT: lui a5, 209715 +; RV32I-NEXT: addi t2, a5, 819 +; RV32I-NEXT: and a4, a1, t2 +; RV32I-NEXT: slli a4, a4, 2 +; RV32I-NEXT: lui a3, 838861 +; RV32I-NEXT: addi t3, a3, -820 +; RV32I-NEXT: and a1, a1, t3 +; RV32I-NEXT: srli a1, a1, 2 +; RV32I-NEXT: or a1, a1, a4 +; RV32I-NEXT: lui a4, 349525 +; RV32I-NEXT: addi a4, a4, 1365 +; RV32I-NEXT: and a2, a1, a4 +; RV32I-NEXT: slli a2, a2, 1 +; RV32I-NEXT: lui a5, 699051 +; RV32I-NEXT: addi a5, a5, -1366 +; RV32I-NEXT: and a1, a1, a5 +; RV32I-NEXT: srli a1, a1, 1 +; RV32I-NEXT: or a1, a1, a2 +; RV32I-NEXT: srli a2, a0, 8 +; RV32I-NEXT: and a2, a2, t1 +; RV32I-NEXT: srli a3, a0, 24 +; RV32I-NEXT: or a2, a2, a3 +; RV32I-NEXT: slli a3, a0, 8 +; RV32I-NEXT: and a3, a3, a6 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or a0, a0, a3 +; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: and a2, a0, a7 +; RV32I-NEXT: slli a2, a2, 4 +; RV32I-NEXT: and a0, a0, t0 +; RV32I-NEXT: srli a0, a0, 4 +; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: and a2, a0, t2 +; RV32I-NEXT: slli a2, a2, 2 +; RV32I-NEXT: and a0, a0, t3 +; RV32I-NEXT: srli a0, a0, 2 +; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: and a2, a0, a4 +; RV32I-NEXT: slli a2, a2, 1 +; RV32I-NEXT: and a0, a0, a5 +; RV32I-NEXT: srli a0, a0, 1 +; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: srli a2, a0, 8 +; RV32I-NEXT: and a2, a2, t1 +; RV32I-NEXT: srli a3, a0, 24 +; RV32I-NEXT: or a2, a2, a3 +; RV32I-NEXT: slli a3, a0, 8 +; RV32I-NEXT: and a3, a3, a6 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or a0, a0, a3 +; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: srli a2, a1, 8 +; RV32I-NEXT: and a2, a2, t1 +; RV32I-NEXT: srli a3, a1, 24 +; RV32I-NEXT: or a2, a2, a3 +; RV32I-NEXT: slli a3, a1, 8 +; RV32I-NEXT: and a3, a3, a6 +; RV32I-NEXT: slli a1, a1, 24 +; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: or a1, a1, a2 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: bitreverse_bswap_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: rev.b a0, a0 +; RV32IB-NEXT: rev.b a1, a1 +; RV32IB-NEXT: ret +; +; RV32IBP-LABEL: bitreverse_bswap_i64: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: rev.b a0, a0 +; RV32IBP-NEXT: rev.b a1, a1 +; RV32IBP-NEXT: ret + %1 = call i64 @llvm.bitreverse.i64(i64 %a) + %2 = call i64 @llvm.bswap.i64(i64 %1) + ret i64 %2 +} + define i32 @shfl1_i32(i32 %a, i32 %b) nounwind { ; RV32I-LABEL: shfl1_i32: ; RV32I: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rv64Zbp.ll b/llvm/test/CodeGen/RISCV/rv64Zbp.ll --- a/llvm/test/CodeGen/RISCV/rv64Zbp.ll +++ b/llvm/test/CodeGen/RISCV/rv64Zbp.ll @@ -2377,6 +2377,262 @@ ret i32 %2 } +define i32 @bitreverse_bswap_i32(i32 %a) { +; RV64I-LABEL: bitreverse_bswap_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: srli a1, a0, 24 +; RV64I-NEXT: lui a2, 4080 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: srli a2, a0, 8 +; RV64I-NEXT: addi a3, zero, 255 +; RV64I-NEXT: slli a4, a3, 24 +; RV64I-NEXT: and a2, a2, a4 +; RV64I-NEXT: or a1, a2, a1 +; RV64I-NEXT: srli a2, a0, 40 +; RV64I-NEXT: lui a4, 16 +; RV64I-NEXT: addiw a4, a4, -256 +; RV64I-NEXT: and a2, a2, a4 +; RV64I-NEXT: srli a4, a0, 56 +; RV64I-NEXT: or a2, a2, a4 +; RV64I-NEXT: or a4, a1, a2 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: slli a2, a3, 32 +; RV64I-NEXT: and a2, a1, a2 +; RV64I-NEXT: slli a5, a0, 24 +; RV64I-NEXT: slli a6, a3, 40 +; RV64I-NEXT: and a5, a5, a6 +; RV64I-NEXT: or a5, a5, a2 +; RV64I-NEXT: slli a1, a0, 40 +; RV64I-NEXT: slli a2, a3, 48 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: slli a0, a0, 56 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: or a0, a0, a5 +; RV64I-NEXT: or a0, a0, a4 +; RV64I-NEXT: lui a1, 3855 +; RV64I-NEXT: addiw a1, a1, 241 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, -241 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 241 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, -241 +; RV64I-NEXT: and a1, a0, a1 +; RV64I-NEXT: slli a1, a1, 4 +; RV64I-NEXT: lui a3, 1044721 +; RV64I-NEXT: addiw a3, a3, -241 +; RV64I-NEXT: slli a3, a3, 12 +; RV64I-NEXT: addi a3, a3, 241 +; RV64I-NEXT: slli a3, a3, 12 +; RV64I-NEXT: addi a3, a3, -241 +; RV64I-NEXT: slli a3, a3, 12 +; RV64I-NEXT: addi a3, a3, 240 +; RV64I-NEXT: and a0, a0, a3 +; RV64I-NEXT: srli a0, a0, 4 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: lui a1, 13107 +; RV64I-NEXT: addiw a1, a1, 819 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 819 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 819 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 819 +; RV64I-NEXT: and a1, a0, a1 +; RV64I-NEXT: slli a1, a1, 2 +; RV64I-NEXT: lui a3, 1035469 +; RV64I-NEXT: addiw a3, a3, -819 +; RV64I-NEXT: slli a3, a3, 12 +; RV64I-NEXT: addi a3, a3, -819 +; RV64I-NEXT: slli a3, a3, 12 +; RV64I-NEXT: addi a3, a3, -819 +; RV64I-NEXT: slli a3, a3, 12 +; RV64I-NEXT: addi a3, a3, -820 +; RV64I-NEXT: and a0, a0, a3 +; RV64I-NEXT: srli a0, a0, 2 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: lui a1, 21845 +; RV64I-NEXT: addiw a1, a1, 1365 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 1365 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 1365 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 1365 +; RV64I-NEXT: and a1, a0, a1 +; RV64I-NEXT: slli a1, a1, 1 +; RV64I-NEXT: lui a3, 1026731 +; RV64I-NEXT: addiw a3, a3, -1365 +; RV64I-NEXT: slli a3, a3, 12 +; RV64I-NEXT: addi a3, a3, -1365 +; RV64I-NEXT: slli a3, a3, 12 +; RV64I-NEXT: addi a3, a3, -1365 +; RV64I-NEXT: slli a3, a3, 12 +; RV64I-NEXT: addi a3, a3, -1366 +; RV64I-NEXT: and a0, a0, a3 +; RV64I-NEXT: srli a0, a0, 1 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 24 +; RV64I-NEXT: addi a3, zero, -1 +; RV64I-NEXT: slli a3, a3, 56 +; RV64I-NEXT: and a1, a1, a3 +; RV64I-NEXT: srli a3, a0, 8 +; RV64I-NEXT: and a3, a3, a6 +; RV64I-NEXT: srli a4, a0, 24 +; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: slli a0, a0, 8 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: bitreverse_bswap_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: greviw a0, a0, 7 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: bitreverse_bswap_i32: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: greviw a0, a0, 7 +; RV64IBP-NEXT: ret + %1 = call i32 @llvm.bitreverse.i32(i32 %a) + %2 = call i32 @llvm.bswap.i32(i32 %1) + ret i32 %2 +} + +define i64 @bitreverse_bswap_i64(i64 %a) { +; RV64I-LABEL: bitreverse_bswap_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: srli a1, a0, 24 +; RV64I-NEXT: lui a6, 4080 +; RV64I-NEXT: and a1, a1, a6 +; RV64I-NEXT: srli a3, a0, 8 +; RV64I-NEXT: addi a5, zero, 255 +; RV64I-NEXT: slli a7, a5, 24 +; RV64I-NEXT: and a3, a3, a7 +; RV64I-NEXT: or a3, a3, a1 +; RV64I-NEXT: srli a4, a0, 40 +; RV64I-NEXT: lui a1, 16 +; RV64I-NEXT: addiw a1, a1, -256 +; RV64I-NEXT: and a4, a4, a1 +; RV64I-NEXT: srli a2, a0, 56 +; RV64I-NEXT: or a2, a4, a2 +; RV64I-NEXT: or a2, a3, a2 +; RV64I-NEXT: slli a4, a0, 8 +; RV64I-NEXT: slli t0, a5, 32 +; RV64I-NEXT: and a3, a4, t0 +; RV64I-NEXT: slli a4, a0, 24 +; RV64I-NEXT: slli t1, a5, 40 +; RV64I-NEXT: and a4, a4, t1 +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: slli a4, a0, 40 +; RV64I-NEXT: slli a5, a5, 48 +; RV64I-NEXT: and a4, a4, a5 +; RV64I-NEXT: slli a0, a0, 56 +; RV64I-NEXT: or a0, a0, a4 +; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: or a0, a0, a2 +; RV64I-NEXT: lui a2, 3855 +; RV64I-NEXT: addiw a2, a2, 241 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, -241 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 241 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, -241 +; RV64I-NEXT: and a2, a0, a2 +; RV64I-NEXT: slli a2, a2, 4 +; RV64I-NEXT: lui a3, 1044721 +; RV64I-NEXT: addiw a3, a3, -241 +; RV64I-NEXT: slli a3, a3, 12 +; RV64I-NEXT: addi a3, a3, 241 +; RV64I-NEXT: slli a3, a3, 12 +; RV64I-NEXT: addi a3, a3, -241 +; RV64I-NEXT: slli a3, a3, 12 +; RV64I-NEXT: addi a3, a3, 240 +; RV64I-NEXT: and a0, a0, a3 +; RV64I-NEXT: srli a0, a0, 4 +; RV64I-NEXT: or a0, a0, a2 +; RV64I-NEXT: lui a2, 13107 +; RV64I-NEXT: addiw a2, a2, 819 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 819 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 819 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 819 +; RV64I-NEXT: and a2, a0, a2 +; RV64I-NEXT: slli a2, a2, 2 +; RV64I-NEXT: lui a3, 1035469 +; RV64I-NEXT: addiw a3, a3, -819 +; RV64I-NEXT: slli a3, a3, 12 +; RV64I-NEXT: addi a3, a3, -819 +; RV64I-NEXT: slli a3, a3, 12 +; RV64I-NEXT: addi a3, a3, -819 +; RV64I-NEXT: slli a3, a3, 12 +; RV64I-NEXT: addi a3, a3, -820 +; RV64I-NEXT: and a0, a0, a3 +; RV64I-NEXT: srli a0, a0, 2 +; RV64I-NEXT: or a0, a0, a2 +; RV64I-NEXT: lui a2, 21845 +; RV64I-NEXT: addiw a2, a2, 1365 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 1365 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 1365 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 1365 +; RV64I-NEXT: and a2, a0, a2 +; RV64I-NEXT: slli a2, a2, 1 +; RV64I-NEXT: lui a3, 1026731 +; RV64I-NEXT: addiw a3, a3, -1365 +; RV64I-NEXT: slli a3, a3, 12 +; RV64I-NEXT: addi a3, a3, -1365 +; RV64I-NEXT: slli a3, a3, 12 +; RV64I-NEXT: addi a3, a3, -1365 +; RV64I-NEXT: slli a3, a3, 12 +; RV64I-NEXT: addi a3, a3, -1366 +; RV64I-NEXT: and a0, a0, a3 +; RV64I-NEXT: srli a0, a0, 1 +; RV64I-NEXT: or a0, a0, a2 +; RV64I-NEXT: srli a2, a0, 40 +; RV64I-NEXT: and a1, a2, a1 +; RV64I-NEXT: srli a2, a0, 56 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: srli a2, a0, 24 +; RV64I-NEXT: and a2, a2, a6 +; RV64I-NEXT: srli a3, a0, 8 +; RV64I-NEXT: and a3, a3, a7 +; RV64I-NEXT: or a2, a3, a2 +; RV64I-NEXT: or a1, a2, a1 +; RV64I-NEXT: slli a2, a0, 8 +; RV64I-NEXT: and a2, a2, t0 +; RV64I-NEXT: slli a3, a0, 24 +; RV64I-NEXT: and a3, a3, t1 +; RV64I-NEXT: or a2, a3, a2 +; RV64I-NEXT: slli a3, a0, 40 +; RV64I-NEXT: and a3, a3, a5 +; RV64I-NEXT: slli a0, a0, 56 +; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: or a0, a0, a2 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: bitreverse_bswap_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: rev.b a0, a0 +; RV64IB-NEXT: ret +; +; RV64IBP-LABEL: bitreverse_bswap_i64: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: rev.b a0, a0 +; RV64IBP-NEXT: ret + %1 = call i64 @llvm.bitreverse.i64(i64 %a) + %2 = call i64 @llvm.bswap.i64(i64 %1) + ret i64 %2 +} + ; There's no [un]shfliw instruction as slliu.w occupies the encoding slot that ; would be occupied by shfliw.