diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -9375,6 +9375,15 @@ // fold (bswap (bswap x)) -> x if (N0.getOpcode() == ISD::BSWAP) return N0->getOperand(0); + + // Canonicalize bswap(bitreverse(x)) -> bitreverse(bswap(x)). This helps + // when bitreverse gets expanded. + if (N0.getOpcode() == ISD::BITREVERSE && N0.hasOneUse()) { + SDLoc DL(N); + SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0)); + return DAG.getNode(ISD::BITREVERSE, DL, VT, BSwap); + } + return SDValue(); } diff --git a/llvm/test/CodeGen/RISCV/bswap-bitreverse-ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/bswap-bitreverse-ctlz-cttz-ctpop.ll --- a/llvm/test/CodeGen/RISCV/bswap-bitreverse-ctlz-cttz-ctpop.ll +++ b/llvm/test/CodeGen/RISCV/bswap-bitreverse-ctlz-cttz-ctpop.ll @@ -1039,10 +1039,6 @@ define i16 @test_bitreverse_bswap_i16(i16 %a) nounwind { ; RV32I-LABEL: test_bitreverse_bswap_i16: ; RV32I: # %bb.0: -; RV32I-NEXT: slli a1, a0, 8 -; RV32I-NEXT: slli a0, a0, 16 -; RV32I-NEXT: srli a0, a0, 24 -; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: lui a2, 1 ; RV32I-NEXT: addi a2, a2, -241 @@ -1064,17 +1060,10 @@ ; RV32I-NEXT: and a0, a0, a2 ; RV32I-NEXT: slli a0, a0, 1 ; RV32I-NEXT: or a0, a1, a0 -; RV32I-NEXT: srli a1, a0, 8 -; RV32I-NEXT: slli a0, a0, 8 -; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: test_bitreverse_bswap_i16: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 8 -; RV64I-NEXT: slli a0, a0, 48 -; RV64I-NEXT: srli a0, a0, 56 -; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: srli a1, a0, 4 ; RV64I-NEXT: lui a2, 1 ; RV64I-NEXT: addiw a2, a2, -241 @@ -1096,21 +1085,17 @@ ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: slli a0, a0, 1 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: srli a1, a0, 8 -; RV64I-NEXT: slli a0, a0, 8 -; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret ; ; RV32ZBB-LABEL: test_bitreverse_bswap_i16: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: rev8 a0, a0 -; RV32ZBB-NEXT: srli a1, a0, 12 -; RV32ZBB-NEXT: lui a2, 15 -; RV32ZBB-NEXT: addi a2, a2, 240 +; RV32ZBB-NEXT: srli a1, a0, 4 +; RV32ZBB-NEXT: lui a2, 1 +; RV32ZBB-NEXT: addi a2, a2, -241 ; RV32ZBB-NEXT: and a1, a1, a2 -; RV32ZBB-NEXT: srli a0, a0, 20 -; RV32ZBB-NEXT: andi a0, a0, -241 -; RV32ZBB-NEXT: or a0, a0, a1 +; RV32ZBB-NEXT: and a0, a0, a2 +; RV32ZBB-NEXT: slli a0, a0, 4 +; RV32ZBB-NEXT: or a0, a1, a0 ; RV32ZBB-NEXT: srli a1, a0, 2 ; RV32ZBB-NEXT: lui a2, 3 ; RV32ZBB-NEXT: addi a2, a2, 819 @@ -1125,20 +1110,17 @@ ; RV32ZBB-NEXT: and a0, a0, a2 ; RV32ZBB-NEXT: slli a0, a0, 1 ; RV32ZBB-NEXT: or a0, a1, a0 -; RV32ZBB-NEXT: rev8 a0, a0 -; RV32ZBB-NEXT: srli a0, a0, 16 ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: test_bitreverse_bswap_i16: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: rev8 a0, a0 -; RV64ZBB-NEXT: srli a1, a0, 44 -; RV64ZBB-NEXT: lui a2, 15 -; RV64ZBB-NEXT: addiw a2, a2, 240 +; RV64ZBB-NEXT: srli a1, a0, 4 +; RV64ZBB-NEXT: lui a2, 1 +; RV64ZBB-NEXT: addiw a2, a2, -241 ; RV64ZBB-NEXT: and a1, a1, a2 -; RV64ZBB-NEXT: srli a0, a0, 52 -; RV64ZBB-NEXT: andi a0, a0, -241 -; RV64ZBB-NEXT: or a0, a0, a1 +; RV64ZBB-NEXT: and a0, a0, a2 +; RV64ZBB-NEXT: slli a0, a0, 4 +; RV64ZBB-NEXT: or a0, a1, a0 ; RV64ZBB-NEXT: srli a1, a0, 2 ; RV64ZBB-NEXT: lui a2, 3 ; RV64ZBB-NEXT: addiw a2, a2, 819 @@ -1153,8 +1135,6 @@ ; RV64ZBB-NEXT: and a0, a0, a2 ; RV64ZBB-NEXT: slli a0, a0, 1 ; RV64ZBB-NEXT: or a0, a1, a0 -; RV64ZBB-NEXT: rev8 a0, a0 -; RV64ZBB-NEXT: srli a0, a0, 48 ; RV64ZBB-NEXT: ret %tmp = call i16 @llvm.bitreverse.i16(i16 %a) %tmp2 = call i16 @llvm.bswap.i16(i16 %tmp) @@ -1164,99 +1144,56 @@ define i32 @test_bitreverse_bswap_i32(i32 %a) nounwind { ; RV32I-LABEL: test_bitreverse_bswap_i32: ; RV32I: # %bb.0: -; RV32I-NEXT: srli a1, a0, 8 -; RV32I-NEXT: lui a2, 16 -; RV32I-NEXT: addi a2, a2, -256 -; RV32I-NEXT: and a1, a1, a2 -; RV32I-NEXT: srli a3, a0, 24 -; RV32I-NEXT: or a1, a1, a3 -; RV32I-NEXT: slli a3, a0, 8 -; RV32I-NEXT: lui a4, 4080 -; RV32I-NEXT: and a3, a3, a4 -; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, a3 -; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 4 -; RV32I-NEXT: lui a3, 61681 -; RV32I-NEXT: addi a3, a3, -241 -; RV32I-NEXT: and a1, a1, a3 -; RV32I-NEXT: and a0, a0, a3 +; RV32I-NEXT: lui a2, 61681 +; RV32I-NEXT: addi a2, a2, -241 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: and a0, a0, a2 ; RV32I-NEXT: slli a0, a0, 4 ; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 2 -; RV32I-NEXT: lui a3, 209715 -; RV32I-NEXT: addi a3, a3, 819 -; RV32I-NEXT: and a1, a1, a3 -; RV32I-NEXT: and a0, a0, a3 +; RV32I-NEXT: lui a2, 209715 +; RV32I-NEXT: addi a2, a2, 819 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: and a0, a0, a2 ; RV32I-NEXT: slli a0, a0, 2 ; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 1 -; RV32I-NEXT: lui a3, 349525 -; RV32I-NEXT: addi a3, a3, 1365 -; RV32I-NEXT: and a1, a1, a3 -; RV32I-NEXT: and a0, a0, a3 +; RV32I-NEXT: lui a2, 349525 +; RV32I-NEXT: addi a2, a2, 1365 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: and a0, a0, a2 ; RV32I-NEXT: slli a0, a0, 1 ; RV32I-NEXT: or a0, a1, a0 -; RV32I-NEXT: srli a1, a0, 8 -; RV32I-NEXT: and a1, a1, a2 -; RV32I-NEXT: srli a2, a0, 24 -; RV32I-NEXT: or a1, a1, a2 -; RV32I-NEXT: slli a2, a0, 8 -; RV32I-NEXT: and a2, a2, a4 -; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, a2 -; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: test_bitreverse_bswap_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: srliw a1, a0, 8 -; RV64I-NEXT: lui a2, 16 -; RV64I-NEXT: addiw a2, a2, -256 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srliw a3, a0, 24 -; RV64I-NEXT: or a1, a1, a3 -; RV64I-NEXT: slli a3, a0, 8 -; RV64I-NEXT: lui a4, 4080 -; RV64I-NEXT: and a3, a3, a4 -; RV64I-NEXT: slliw a0, a0, 24 -; RV64I-NEXT: or a0, a0, a3 -; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: srli a1, a0, 4 -; RV64I-NEXT: lui a3, 61681 -; RV64I-NEXT: addiw a3, a3, -241 -; RV64I-NEXT: and a1, a1, a3 -; RV64I-NEXT: and a0, a0, a3 +; RV64I-NEXT: lui a2, 61681 +; RV64I-NEXT: addiw a2, a2, -241 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: slliw a0, a0, 4 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: srli a1, a0, 2 -; RV64I-NEXT: lui a3, 209715 -; RV64I-NEXT: addiw a3, a3, 819 -; RV64I-NEXT: and a1, a1, a3 -; RV64I-NEXT: and a0, a0, a3 +; RV64I-NEXT: lui a2, 209715 +; RV64I-NEXT: addiw a2, a2, 819 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: slliw a0, a0, 2 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: srli a1, a0, 1 -; RV64I-NEXT: lui a3, 349525 -; RV64I-NEXT: addiw a3, a3, 1365 -; RV64I-NEXT: and a1, a1, a3 -; RV64I-NEXT: and a0, a0, a3 +; RV64I-NEXT: lui a2, 349525 +; RV64I-NEXT: addiw a2, a2, 1365 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: slliw a0, a0, 1 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: srliw a1, a0, 8 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srliw a2, a0, 24 -; RV64I-NEXT: or a1, a1, a2 -; RV64I-NEXT: slli a2, a0, 8 -; RV64I-NEXT: and a2, a2, a4 -; RV64I-NEXT: slliw a0, a0, 24 -; RV64I-NEXT: or a0, a0, a2 -; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret ; ; RV32ZBB-LABEL: test_bitreverse_bswap_i32: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: rev8 a0, a0 ; RV32ZBB-NEXT: srli a1, a0, 4 ; RV32ZBB-NEXT: lui a2, 61681 ; RV32ZBB-NEXT: addi a2, a2, -241 @@ -1278,21 +1215,16 @@ ; RV32ZBB-NEXT: and a0, a0, a2 ; RV32ZBB-NEXT: slli a0, a0, 1 ; RV32ZBB-NEXT: or a0, a1, a0 -; RV32ZBB-NEXT: rev8 a0, a0 ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: test_bitreverse_bswap_i32: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: rev8 a0, a0 -; RV64ZBB-NEXT: srli a1, a0, 36 +; RV64ZBB-NEXT: srli a1, a0, 4 ; RV64ZBB-NEXT: lui a2, 61681 ; RV64ZBB-NEXT: addiw a2, a2, -241 ; RV64ZBB-NEXT: and a1, a1, a2 -; RV64ZBB-NEXT: srli a0, a0, 28 -; RV64ZBB-NEXT: lui a2, 986895 -; RV64ZBB-NEXT: addiw a2, a2, 240 ; RV64ZBB-NEXT: and a0, a0, a2 -; RV64ZBB-NEXT: sext.w a0, a0 +; RV64ZBB-NEXT: slliw a0, a0, 4 ; RV64ZBB-NEXT: or a0, a1, a0 ; RV64ZBB-NEXT: srli a1, a0, 2 ; RV64ZBB-NEXT: lui a2, 209715 @@ -1306,10 +1238,8 @@ ; RV64ZBB-NEXT: addiw a2, a2, 1365 ; RV64ZBB-NEXT: and a1, a1, a2 ; RV64ZBB-NEXT: and a0, a0, a2 -; RV64ZBB-NEXT: slli a0, a0, 1 +; RV64ZBB-NEXT: slliw a0, a0, 1 ; RV64ZBB-NEXT: or a0, a1, a0 -; RV64ZBB-NEXT: rev8 a0, a0 -; RV64ZBB-NEXT: srli a0, a0, 32 ; RV64ZBB-NEXT: ret %tmp = call i32 @llvm.bitreverse.i32(i32 %a) %tmp2 = call i32 @llvm.bswap.i32(i32 %tmp) @@ -1319,206 +1249,113 @@ define i64 @test_bitreverse_bswap_i64(i64 %a) nounwind { ; RV32I-LABEL: test_bitreverse_bswap_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: srli a3, a1, 8 -; RV32I-NEXT: lui a2, 16 -; RV32I-NEXT: addi a2, a2, -256 -; RV32I-NEXT: and a3, a3, a2 -; RV32I-NEXT: srli a4, a1, 24 -; RV32I-NEXT: or a4, a3, a4 -; RV32I-NEXT: slli a5, a1, 8 -; RV32I-NEXT: lui a3, 4080 -; RV32I-NEXT: and a5, a5, a3 -; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, a5 -; RV32I-NEXT: or a1, a1, a4 -; RV32I-NEXT: srli a4, a1, 4 -; RV32I-NEXT: lui a5, 61681 -; RV32I-NEXT: addi a5, a5, -241 -; RV32I-NEXT: and a4, a4, a5 -; RV32I-NEXT: and a1, a1, a5 -; RV32I-NEXT: slli a1, a1, 4 -; RV32I-NEXT: or a1, a4, a1 -; RV32I-NEXT: srli a4, a1, 2 -; RV32I-NEXT: lui a6, 209715 -; RV32I-NEXT: addi a6, a6, 819 -; RV32I-NEXT: and a4, a4, a6 -; RV32I-NEXT: and a1, a1, a6 -; RV32I-NEXT: slli a1, a1, 2 -; RV32I-NEXT: or a1, a4, a1 -; RV32I-NEXT: srli a4, a1, 1 -; RV32I-NEXT: lui a7, 349525 -; RV32I-NEXT: addi a7, a7, 1365 -; RV32I-NEXT: and a4, a4, a7 -; RV32I-NEXT: and a1, a1, a7 -; RV32I-NEXT: slli a1, a1, 1 -; RV32I-NEXT: or a1, a4, a1 -; RV32I-NEXT: srli a4, a0, 8 -; RV32I-NEXT: and a4, a4, a2 -; RV32I-NEXT: srli t0, a0, 24 -; RV32I-NEXT: or a4, a4, t0 -; RV32I-NEXT: slli t0, a0, 8 -; RV32I-NEXT: and t0, t0, a3 -; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, t0 -; RV32I-NEXT: or a0, a0, a4 -; RV32I-NEXT: srli a4, a0, 4 -; RV32I-NEXT: and a4, a4, a5 -; RV32I-NEXT: and a0, a0, a5 +; RV32I-NEXT: srli a2, a0, 4 +; RV32I-NEXT: lui a3, 61681 +; RV32I-NEXT: addi a3, a3, -241 +; RV32I-NEXT: and a2, a2, a3 +; RV32I-NEXT: and a0, a0, a3 ; RV32I-NEXT: slli a0, a0, 4 -; RV32I-NEXT: or a0, a4, a0 -; RV32I-NEXT: srli a4, a0, 2 -; RV32I-NEXT: and a4, a4, a6 -; RV32I-NEXT: and a0, a0, a6 +; RV32I-NEXT: or a0, a2, a0 +; RV32I-NEXT: srli a2, a0, 2 +; RV32I-NEXT: lui a4, 209715 +; RV32I-NEXT: addi a4, a4, 819 +; RV32I-NEXT: and a2, a2, a4 +; RV32I-NEXT: and a0, a0, a4 ; RV32I-NEXT: slli a0, a0, 2 -; RV32I-NEXT: or a0, a4, a0 -; RV32I-NEXT: srli a4, a0, 1 -; RV32I-NEXT: and a4, a4, a7 -; RV32I-NEXT: and a0, a0, a7 +; RV32I-NEXT: or a0, a2, a0 +; RV32I-NEXT: srli a2, a0, 1 +; RV32I-NEXT: lui a5, 349525 +; RV32I-NEXT: addi a5, a5, 1365 +; RV32I-NEXT: and a2, a2, a5 +; RV32I-NEXT: and a0, a0, a5 ; RV32I-NEXT: slli a0, a0, 1 -; RV32I-NEXT: or a0, a4, a0 -; RV32I-NEXT: srli a4, a0, 8 -; RV32I-NEXT: and a4, a4, a2 -; RV32I-NEXT: srli a5, a0, 24 -; RV32I-NEXT: or a4, a4, a5 -; RV32I-NEXT: slli a5, a0, 8 -; RV32I-NEXT: and a5, a5, a3 -; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, a5 -; RV32I-NEXT: or a0, a0, a4 -; RV32I-NEXT: srli a4, a1, 8 -; RV32I-NEXT: and a2, a4, a2 -; RV32I-NEXT: srli a4, a1, 24 -; RV32I-NEXT: or a2, a2, a4 -; RV32I-NEXT: slli a4, a1, 8 -; RV32I-NEXT: and a3, a4, a3 -; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, a3 -; RV32I-NEXT: or a1, a1, a2 +; RV32I-NEXT: or a0, a2, a0 +; RV32I-NEXT: srli a2, a1, 4 +; RV32I-NEXT: and a2, a2, a3 +; RV32I-NEXT: and a1, a1, a3 +; RV32I-NEXT: slli a1, a1, 4 +; RV32I-NEXT: or a1, a2, a1 +; RV32I-NEXT: srli a2, a1, 2 +; RV32I-NEXT: and a2, a2, a4 +; RV32I-NEXT: and a1, a1, a4 +; RV32I-NEXT: slli a1, a1, 2 +; RV32I-NEXT: or a1, a2, a1 +; RV32I-NEXT: srli a2, a1, 1 +; RV32I-NEXT: and a2, a2, a5 +; RV32I-NEXT: and a1, a1, a5 +; RV32I-NEXT: slli a1, a1, 1 +; RV32I-NEXT: or a1, a2, a1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: test_bitreverse_bswap_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: srli a1, a0, 24 -; RV64I-NEXT: lui a2, 4080 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srli a3, a0, 8 -; RV64I-NEXT: li a4, 255 -; RV64I-NEXT: slli a5, a4, 24 -; RV64I-NEXT: and a3, a3, a5 -; RV64I-NEXT: or a1, a3, a1 -; RV64I-NEXT: srli a3, a0, 40 -; RV64I-NEXT: lui a6, 16 -; RV64I-NEXT: addiw a6, a6, -256 -; RV64I-NEXT: and a3, a3, a6 -; RV64I-NEXT: srli a7, a0, 56 -; RV64I-NEXT: or a3, a3, a7 -; RV64I-NEXT: or a1, a1, a3 -; RV64I-NEXT: slli a3, a0, 24 -; RV64I-NEXT: slli a7, a4, 40 -; RV64I-NEXT: and a3, a3, a7 -; RV64I-NEXT: srliw t0, a0, 24 -; RV64I-NEXT: slli t0, t0, 32 -; RV64I-NEXT: or a3, a3, t0 -; RV64I-NEXT: slli t0, a0, 40 -; RV64I-NEXT: slli a4, a4, 48 -; RV64I-NEXT: and t0, t0, a4 -; RV64I-NEXT: slli a0, a0, 56 -; RV64I-NEXT: or a0, a0, t0 -; RV64I-NEXT: lui t0, %hi(.LCPI12_0) -; RV64I-NEXT: ld t0, %lo(.LCPI12_0)(t0) -; RV64I-NEXT: or a0, a0, a3 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: srli a1, a0, 4 -; RV64I-NEXT: and a1, a1, t0 -; RV64I-NEXT: and a0, a0, t0 -; RV64I-NEXT: lui a3, %hi(.LCPI12_1) -; RV64I-NEXT: ld a3, %lo(.LCPI12_1)(a3) +; RV64I-NEXT: lui a1, %hi(.LCPI12_0) +; RV64I-NEXT: ld a1, %lo(.LCPI12_0)(a1) +; RV64I-NEXT: srli a2, a0, 4 +; RV64I-NEXT: and a2, a2, a1 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: lui a1, %hi(.LCPI12_1) +; RV64I-NEXT: ld a1, %lo(.LCPI12_1)(a1) ; RV64I-NEXT: slli a0, a0, 4 -; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: srli a1, a0, 2 -; RV64I-NEXT: and a1, a1, a3 -; RV64I-NEXT: and a0, a0, a3 -; RV64I-NEXT: lui a3, %hi(.LCPI12_2) -; RV64I-NEXT: ld a3, %lo(.LCPI12_2)(a3) +; RV64I-NEXT: or a0, a2, a0 +; RV64I-NEXT: srli a2, a0, 2 +; RV64I-NEXT: and a2, a2, a1 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: lui a1, %hi(.LCPI12_2) +; RV64I-NEXT: ld a1, %lo(.LCPI12_2)(a1) ; RV64I-NEXT: slli a0, a0, 2 -; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: srli a1, a0, 1 -; RV64I-NEXT: and a1, a1, a3 -; RV64I-NEXT: and a0, a0, a3 +; RV64I-NEXT: or a0, a2, a0 +; RV64I-NEXT: srli a2, a0, 1 +; RV64I-NEXT: and a2, a2, a1 +; RV64I-NEXT: and a0, a0, a1 ; RV64I-NEXT: slli a0, a0, 1 -; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: srli a1, a0, 40 -; RV64I-NEXT: and a1, a1, a6 -; RV64I-NEXT: srli a3, a0, 56 -; RV64I-NEXT: or a1, a1, a3 -; RV64I-NEXT: srli a3, a0, 24 -; RV64I-NEXT: and a2, a3, a2 -; RV64I-NEXT: srli a3, a0, 8 -; RV64I-NEXT: and a3, a3, a5 -; RV64I-NEXT: or a2, a3, a2 -; RV64I-NEXT: or a1, a2, a1 -; RV64I-NEXT: slli a2, a0, 24 -; RV64I-NEXT: and a2, a2, a7 -; RV64I-NEXT: srliw a3, a0, 24 -; RV64I-NEXT: slli a3, a3, 32 -; RV64I-NEXT: or a2, a2, a3 -; RV64I-NEXT: slli a3, a0, 40 -; RV64I-NEXT: and a3, a3, a4 -; RV64I-NEXT: slli a0, a0, 56 -; RV64I-NEXT: or a0, a0, a3 -; RV64I-NEXT: or a0, a0, a2 -; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: ret ; ; RV32ZBB-LABEL: test_bitreverse_bswap_i64: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: rev8 a1, a1 -; RV32ZBB-NEXT: srli a2, a1, 4 +; RV32ZBB-NEXT: srli a2, a0, 4 ; RV32ZBB-NEXT: lui a3, 61681 ; RV32ZBB-NEXT: addi a3, a3, -241 ; RV32ZBB-NEXT: and a2, a2, a3 -; RV32ZBB-NEXT: and a1, a1, a3 -; RV32ZBB-NEXT: slli a1, a1, 4 -; RV32ZBB-NEXT: or a1, a2, a1 -; RV32ZBB-NEXT: srli a2, a1, 2 -; RV32ZBB-NEXT: lui a4, 209715 -; RV32ZBB-NEXT: addi a4, a4, 819 -; RV32ZBB-NEXT: and a2, a2, a4 -; RV32ZBB-NEXT: and a1, a1, a4 -; RV32ZBB-NEXT: slli a1, a1, 2 -; RV32ZBB-NEXT: or a1, a2, a1 -; RV32ZBB-NEXT: srli a2, a1, 1 -; RV32ZBB-NEXT: lui a5, 349525 -; RV32ZBB-NEXT: addi a5, a5, 1365 -; RV32ZBB-NEXT: and a2, a2, a5 -; RV32ZBB-NEXT: and a1, a1, a5 -; RV32ZBB-NEXT: slli a1, a1, 1 -; RV32ZBB-NEXT: or a1, a2, a1 -; RV32ZBB-NEXT: rev8 a0, a0 -; RV32ZBB-NEXT: srli a2, a0, 4 -; RV32ZBB-NEXT: and a2, a2, a3 ; RV32ZBB-NEXT: and a0, a0, a3 ; RV32ZBB-NEXT: slli a0, a0, 4 ; RV32ZBB-NEXT: or a0, a2, a0 ; RV32ZBB-NEXT: srli a2, a0, 2 +; RV32ZBB-NEXT: lui a4, 209715 +; RV32ZBB-NEXT: addi a4, a4, 819 ; RV32ZBB-NEXT: and a2, a2, a4 ; RV32ZBB-NEXT: and a0, a0, a4 ; RV32ZBB-NEXT: slli a0, a0, 2 ; RV32ZBB-NEXT: or a0, a2, a0 ; RV32ZBB-NEXT: srli a2, a0, 1 +; RV32ZBB-NEXT: lui a5, 349525 +; RV32ZBB-NEXT: addi a5, a5, 1365 ; RV32ZBB-NEXT: and a2, a2, a5 ; RV32ZBB-NEXT: and a0, a0, a5 ; RV32ZBB-NEXT: slli a0, a0, 1 ; RV32ZBB-NEXT: or a0, a2, a0 -; RV32ZBB-NEXT: rev8 a0, a0 -; RV32ZBB-NEXT: rev8 a1, a1 +; RV32ZBB-NEXT: srli a2, a1, 4 +; RV32ZBB-NEXT: and a2, a2, a3 +; RV32ZBB-NEXT: and a1, a1, a3 +; RV32ZBB-NEXT: slli a1, a1, 4 +; RV32ZBB-NEXT: or a1, a2, a1 +; RV32ZBB-NEXT: srli a2, a1, 2 +; RV32ZBB-NEXT: and a2, a2, a4 +; RV32ZBB-NEXT: and a1, a1, a4 +; RV32ZBB-NEXT: slli a1, a1, 2 +; RV32ZBB-NEXT: or a1, a2, a1 +; RV32ZBB-NEXT: srli a2, a1, 1 +; RV32ZBB-NEXT: and a2, a2, a5 +; RV32ZBB-NEXT: and a1, a1, a5 +; RV32ZBB-NEXT: slli a1, a1, 1 +; RV32ZBB-NEXT: or a1, a2, a1 ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: test_bitreverse_bswap_i64: ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: lui a1, %hi(.LCPI12_0) ; RV64ZBB-NEXT: ld a1, %lo(.LCPI12_0)(a1) -; RV64ZBB-NEXT: rev8 a0, a0 ; RV64ZBB-NEXT: srli a2, a0, 4 ; RV64ZBB-NEXT: and a2, a2, a1 ; RV64ZBB-NEXT: and a0, a0, a1 @@ -1538,7 +1375,6 @@ ; RV64ZBB-NEXT: and a0, a0, a1 ; RV64ZBB-NEXT: slli a0, a0, 1 ; RV64ZBB-NEXT: or a0, a2, a0 -; RV64ZBB-NEXT: rev8 a0, a0 ; RV64ZBB-NEXT: ret %tmp = call i64 @llvm.bitreverse.i64(i64 %a) %tmp2 = call i64 @llvm.bswap.i64(i64 %tmp) diff --git a/llvm/test/CodeGen/RISCV/rv32zbp.ll b/llvm/test/CodeGen/RISCV/rv32zbp.ll --- a/llvm/test/CodeGen/RISCV/rv32zbp.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbp.ll @@ -2404,48 +2404,27 @@ define i32 @bitreverse_bswap_i32(i32 %a) { ; RV32I-LABEL: bitreverse_bswap_i32: ; RV32I: # %bb.0: -; RV32I-NEXT: srli a1, a0, 8 -; RV32I-NEXT: lui a2, 16 -; RV32I-NEXT: addi a2, a2, -256 -; RV32I-NEXT: and a1, a1, a2 -; RV32I-NEXT: srli a3, a0, 24 -; RV32I-NEXT: or a1, a1, a3 -; RV32I-NEXT: slli a3, a0, 8 -; RV32I-NEXT: lui a4, 4080 -; RV32I-NEXT: and a3, a3, a4 -; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, a3 -; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 4 -; RV32I-NEXT: lui a3, 61681 -; RV32I-NEXT: addi a3, a3, -241 -; RV32I-NEXT: and a1, a1, a3 -; RV32I-NEXT: and a0, a0, a3 +; RV32I-NEXT: lui a2, 61681 +; RV32I-NEXT: addi a2, a2, -241 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: and a0, a0, a2 ; RV32I-NEXT: slli a0, a0, 4 ; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 2 -; RV32I-NEXT: lui a3, 209715 -; RV32I-NEXT: addi a3, a3, 819 -; RV32I-NEXT: and a1, a1, a3 -; RV32I-NEXT: and a0, a0, a3 +; RV32I-NEXT: lui a2, 209715 +; RV32I-NEXT: addi a2, a2, 819 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: and a0, a0, a2 ; RV32I-NEXT: slli a0, a0, 2 ; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 1 -; RV32I-NEXT: lui a3, 349525 -; RV32I-NEXT: addi a3, a3, 1365 -; RV32I-NEXT: and a1, a1, a3 -; RV32I-NEXT: and a0, a0, a3 +; RV32I-NEXT: lui a2, 349525 +; RV32I-NEXT: addi a2, a2, 1365 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: and a0, a0, a2 ; RV32I-NEXT: slli a0, a0, 1 ; RV32I-NEXT: or a0, a1, a0 -; RV32I-NEXT: srli a1, a0, 8 -; RV32I-NEXT: and a1, a1, a2 -; RV32I-NEXT: srli a2, a0, 24 -; RV32I-NEXT: or a1, a1, a2 -; RV32I-NEXT: slli a2, a0, 8 -; RV32I-NEXT: and a2, a2, a4 -; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, a2 -; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: ret ; ; RV32ZBP-LABEL: bitreverse_bswap_i32: @@ -2460,81 +2439,42 @@ define i64 @bitreverse_bswap_i64(i64 %a) { ; RV32I-LABEL: bitreverse_bswap_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: srli a3, a1, 8 -; RV32I-NEXT: lui a2, 16 -; RV32I-NEXT: addi a2, a2, -256 -; RV32I-NEXT: and a3, a3, a2 -; RV32I-NEXT: srli a4, a1, 24 -; RV32I-NEXT: or a4, a3, a4 -; RV32I-NEXT: slli a5, a1, 8 -; RV32I-NEXT: lui a3, 4080 -; RV32I-NEXT: and a5, a5, a3 -; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, a5 -; RV32I-NEXT: or a1, a1, a4 -; RV32I-NEXT: srli a4, a1, 4 -; RV32I-NEXT: lui a5, 61681 -; RV32I-NEXT: addi a5, a5, -241 -; RV32I-NEXT: and a4, a4, a5 -; RV32I-NEXT: and a1, a1, a5 -; RV32I-NEXT: slli a1, a1, 4 -; RV32I-NEXT: or a1, a4, a1 -; RV32I-NEXT: srli a4, a1, 2 -; RV32I-NEXT: lui a6, 209715 -; RV32I-NEXT: addi a6, a6, 819 -; RV32I-NEXT: and a4, a4, a6 -; RV32I-NEXT: and a1, a1, a6 -; RV32I-NEXT: slli a1, a1, 2 -; RV32I-NEXT: or a1, a4, a1 -; RV32I-NEXT: srli a4, a1, 1 -; RV32I-NEXT: lui a7, 349525 -; RV32I-NEXT: addi a7, a7, 1365 -; RV32I-NEXT: and a4, a4, a7 -; RV32I-NEXT: and a1, a1, a7 -; RV32I-NEXT: slli a1, a1, 1 -; RV32I-NEXT: or a1, a4, a1 -; RV32I-NEXT: srli a4, a0, 8 -; RV32I-NEXT: and a4, a4, a2 -; RV32I-NEXT: srli t0, a0, 24 -; RV32I-NEXT: or a4, a4, t0 -; RV32I-NEXT: slli t0, a0, 8 -; RV32I-NEXT: and t0, t0, a3 -; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, t0 -; RV32I-NEXT: or a0, a0, a4 -; RV32I-NEXT: srli a4, a0, 4 -; RV32I-NEXT: and a4, a4, a5 -; RV32I-NEXT: and a0, a0, a5 +; RV32I-NEXT: srli a2, a0, 4 +; RV32I-NEXT: lui a3, 61681 +; RV32I-NEXT: addi a3, a3, -241 +; RV32I-NEXT: and a2, a2, a3 +; RV32I-NEXT: and a0, a0, a3 ; RV32I-NEXT: slli a0, a0, 4 -; RV32I-NEXT: or a0, a4, a0 -; RV32I-NEXT: srli a4, a0, 2 -; RV32I-NEXT: and a4, a4, a6 -; RV32I-NEXT: and a0, a0, a6 +; RV32I-NEXT: or a0, a2, a0 +; RV32I-NEXT: srli a2, a0, 2 +; RV32I-NEXT: lui a4, 209715 +; RV32I-NEXT: addi a4, a4, 819 +; RV32I-NEXT: and a2, a2, a4 +; RV32I-NEXT: and a0, a0, a4 ; RV32I-NEXT: slli a0, a0, 2 -; RV32I-NEXT: or a0, a4, a0 -; RV32I-NEXT: srli a4, a0, 1 -; RV32I-NEXT: and a4, a4, a7 -; RV32I-NEXT: and a0, a0, a7 +; RV32I-NEXT: or a0, a2, a0 +; RV32I-NEXT: srli a2, a0, 1 +; RV32I-NEXT: lui a5, 349525 +; RV32I-NEXT: addi a5, a5, 1365 +; RV32I-NEXT: and a2, a2, a5 +; RV32I-NEXT: and a0, a0, a5 ; RV32I-NEXT: slli a0, a0, 1 -; RV32I-NEXT: or a0, a4, a0 -; RV32I-NEXT: srli a4, a0, 8 -; RV32I-NEXT: and a4, a4, a2 -; RV32I-NEXT: srli a5, a0, 24 -; RV32I-NEXT: or a4, a4, a5 -; RV32I-NEXT: slli a5, a0, 8 -; RV32I-NEXT: and a5, a5, a3 -; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, a5 -; RV32I-NEXT: or a0, a0, a4 -; RV32I-NEXT: srli a4, a1, 8 -; RV32I-NEXT: and a2, a4, a2 -; RV32I-NEXT: srli a4, a1, 24 -; RV32I-NEXT: or a2, a2, a4 -; RV32I-NEXT: slli a4, a1, 8 -; RV32I-NEXT: and a3, a4, a3 -; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, a3 -; RV32I-NEXT: or a1, a1, a2 +; RV32I-NEXT: or a0, a2, a0 +; RV32I-NEXT: srli a2, a1, 4 +; RV32I-NEXT: and a2, a2, a3 +; RV32I-NEXT: and a1, a1, a3 +; RV32I-NEXT: slli a1, a1, 4 +; RV32I-NEXT: or a1, a2, a1 +; RV32I-NEXT: srli a2, a1, 2 +; RV32I-NEXT: and a2, a2, a4 +; RV32I-NEXT: and a1, a1, a4 +; RV32I-NEXT: slli a1, a1, 2 +; RV32I-NEXT: or a1, a2, a1 +; RV32I-NEXT: srli a2, a1, 1 +; RV32I-NEXT: and a2, a2, a5 +; RV32I-NEXT: and a1, a1, a5 +; RV32I-NEXT: slli a1, a1, 1 +; RV32I-NEXT: or a1, a2, a1 ; RV32I-NEXT: ret ; ; RV32ZBP-LABEL: bitreverse_bswap_i64: diff --git a/llvm/test/CodeGen/RISCV/rv64zbp.ll b/llvm/test/CodeGen/RISCV/rv64zbp.ll --- a/llvm/test/CodeGen/RISCV/rv64zbp.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbp.ll @@ -2351,48 +2351,27 @@ define i32 @bitreverse_bswap_i32(i32 %a) { ; RV64I-LABEL: bitreverse_bswap_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: srliw a1, a0, 8 -; RV64I-NEXT: lui a2, 16 -; RV64I-NEXT: addiw a2, a2, -256 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srliw a3, a0, 24 -; RV64I-NEXT: or a1, a1, a3 -; RV64I-NEXT: slli a3, a0, 8 -; RV64I-NEXT: lui a4, 4080 -; RV64I-NEXT: and a3, a3, a4 -; RV64I-NEXT: slliw a0, a0, 24 -; RV64I-NEXT: or a0, a0, a3 -; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: srli a1, a0, 4 -; RV64I-NEXT: lui a3, 61681 -; RV64I-NEXT: addiw a3, a3, -241 -; RV64I-NEXT: and a1, a1, a3 -; RV64I-NEXT: and a0, a0, a3 +; RV64I-NEXT: lui a2, 61681 +; RV64I-NEXT: addiw a2, a2, -241 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: slliw a0, a0, 4 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: srli a1, a0, 2 -; RV64I-NEXT: lui a3, 209715 -; RV64I-NEXT: addiw a3, a3, 819 -; RV64I-NEXT: and a1, a1, a3 -; RV64I-NEXT: and a0, a0, a3 +; RV64I-NEXT: lui a2, 209715 +; RV64I-NEXT: addiw a2, a2, 819 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: slliw a0, a0, 2 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: srli a1, a0, 1 -; RV64I-NEXT: lui a3, 349525 -; RV64I-NEXT: addiw a3, a3, 1365 -; RV64I-NEXT: and a1, a1, a3 -; RV64I-NEXT: and a0, a0, a3 +; RV64I-NEXT: lui a2, 349525 +; RV64I-NEXT: addiw a2, a2, 1365 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: slliw a0, a0, 1 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: srliw a1, a0, 8 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srliw a2, a0, 24 -; RV64I-NEXT: or a1, a1, a2 -; RV64I-NEXT: slli a2, a0, 8 -; RV64I-NEXT: and a2, a2, a4 -; RV64I-NEXT: slliw a0, a0, 24 -; RV64I-NEXT: or a0, a0, a2 -; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64ZBP-LABEL: bitreverse_bswap_i32: @@ -2407,76 +2386,27 @@ define i64 @bitreverse_bswap_i64(i64 %a) { ; RV64I-LABEL: bitreverse_bswap_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: srli a1, a0, 24 -; RV64I-NEXT: lui a2, 4080 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srli a3, a0, 8 -; RV64I-NEXT: li a4, 255 -; RV64I-NEXT: slli a5, a4, 24 -; RV64I-NEXT: and a3, a3, a5 -; RV64I-NEXT: or a1, a3, a1 -; RV64I-NEXT: srli a3, a0, 40 -; RV64I-NEXT: lui a6, 16 -; RV64I-NEXT: addiw a6, a6, -256 -; RV64I-NEXT: and a3, a3, a6 -; RV64I-NEXT: srli a7, a0, 56 -; RV64I-NEXT: or a3, a3, a7 -; RV64I-NEXT: or a1, a1, a3 -; RV64I-NEXT: slli a3, a0, 24 -; RV64I-NEXT: slli a7, a4, 40 -; RV64I-NEXT: and a3, a3, a7 -; RV64I-NEXT: srliw t0, a0, 24 -; RV64I-NEXT: slli t0, t0, 32 -; RV64I-NEXT: or a3, a3, t0 -; RV64I-NEXT: slli t0, a0, 40 -; RV64I-NEXT: slli a4, a4, 48 -; RV64I-NEXT: and t0, t0, a4 -; RV64I-NEXT: slli a0, a0, 56 -; RV64I-NEXT: or a0, a0, t0 -; RV64I-NEXT: lui t0, %hi(.LCPI68_0) -; RV64I-NEXT: ld t0, %lo(.LCPI68_0)(t0) -; RV64I-NEXT: or a0, a0, a3 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: srli a1, a0, 4 -; RV64I-NEXT: and a1, a1, t0 -; RV64I-NEXT: and a0, a0, t0 -; RV64I-NEXT: lui a3, %hi(.LCPI68_1) -; RV64I-NEXT: ld a3, %lo(.LCPI68_1)(a3) +; RV64I-NEXT: lui a1, %hi(.LCPI68_0) +; RV64I-NEXT: ld a1, %lo(.LCPI68_0)(a1) +; RV64I-NEXT: srli a2, a0, 4 +; RV64I-NEXT: and a2, a2, a1 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: lui a1, %hi(.LCPI68_1) +; RV64I-NEXT: ld a1, %lo(.LCPI68_1)(a1) ; RV64I-NEXT: slli a0, a0, 4 -; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: srli a1, a0, 2 -; RV64I-NEXT: and a1, a1, a3 -; RV64I-NEXT: and a0, a0, a3 -; RV64I-NEXT: lui a3, %hi(.LCPI68_2) -; RV64I-NEXT: ld a3, %lo(.LCPI68_2)(a3) +; RV64I-NEXT: or a0, a2, a0 +; RV64I-NEXT: srli a2, a0, 2 +; RV64I-NEXT: and a2, a2, a1 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: lui a1, %hi(.LCPI68_2) +; RV64I-NEXT: ld a1, %lo(.LCPI68_2)(a1) ; RV64I-NEXT: slli a0, a0, 2 -; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: srli a1, a0, 1 -; RV64I-NEXT: and a1, a1, a3 -; RV64I-NEXT: and a0, a0, a3 +; RV64I-NEXT: or a0, a2, a0 +; RV64I-NEXT: srli a2, a0, 1 +; RV64I-NEXT: and a2, a2, a1 +; RV64I-NEXT: and a0, a0, a1 ; RV64I-NEXT: slli a0, a0, 1 -; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: srli a1, a0, 40 -; RV64I-NEXT: and a1, a1, a6 -; RV64I-NEXT: srli a3, a0, 56 -; RV64I-NEXT: or a1, a1, a3 -; RV64I-NEXT: srli a3, a0, 24 -; RV64I-NEXT: and a2, a3, a2 -; RV64I-NEXT: srli a3, a0, 8 -; RV64I-NEXT: and a3, a3, a5 -; RV64I-NEXT: or a2, a3, a2 -; RV64I-NEXT: or a1, a2, a1 -; RV64I-NEXT: slli a2, a0, 24 -; RV64I-NEXT: and a2, a2, a7 -; RV64I-NEXT: srliw a3, a0, 24 -; RV64I-NEXT: slli a3, a3, 32 -; RV64I-NEXT: or a2, a2, a3 -; RV64I-NEXT: slli a3, a0, 40 -; RV64I-NEXT: and a3, a3, a4 -; RV64I-NEXT: slli a0, a0, 56 -; RV64I-NEXT: or a0, a0, a3 -; RV64I-NEXT: or a0, a0, a2 -; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: ret ; ; RV64ZBP-LABEL: bitreverse_bswap_i64: