Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -9724,6 +9724,17 @@ return DAG.getNode(ISD::BITREVERSE, DL, VT, BSwap); } + // fold (bswap(srl (bswap c), 8*x)) -> (shl c, 8*x) + if (N0->getOpcode() == ISD::SRL && N0.hasOneUse()) { + auto *ShAmt = dyn_cast(N0.getOperand(1)); + if (ShAmt && ShAmt->getZExtValue() % 8 == 0) { + SDValue BSwap = N0->getOperand(0); + if (BSwap->getOpcode() == ISD::BSWAP && BSwap.hasOneUse()) + return DAG.getNode(ISD::SHL, DL, VT, BSwap->getOperand(0), + N0->getOperand(1)); + } + } + // fold (bswap shl(x,c)) -> (zext(bswap(trunc(shl(x,sub(c,bw/2)))))) // iff x >= bw/2 (i.e. lower half is known zero) unsigned BW = VT.getScalarSizeInBits(); Index: llvm/test/CodeGen/RISCV/bswap-srli-bswap.ll =================================================================== --- llvm/test/CodeGen/RISCV/bswap-srli-bswap.ll +++ llvm/test/CodeGen/RISCV/bswap-srli-bswap.ll @@ -69,28 +69,22 @@ define i16 @test_bswap_srli_8_bswap_i16(i16 %a) nounwind { ; RV32I-LABEL: test_bswap_srli_8_bswap_i16: ; RV32I: # %bb.0: -; RV32I-NEXT: andi a0, a0, 255 ; RV32I-NEXT: slli a0, a0, 8 ; RV32I-NEXT: ret ; ; RV64I-LABEL: test_bswap_srli_8_bswap_i16: ; RV64I: # %bb.0: -; RV64I-NEXT: andi a0, a0, 255 ; RV64I-NEXT: slli a0, a0, 8 ; RV64I-NEXT: ret ; ; RV32ZB-LABEL: test_bswap_srli_8_bswap_i16: ; RV32ZB: # %bb.0: -; RV32ZB-NEXT: andi a0, a0, 255 -; RV32ZB-NEXT: rev8 a0, a0 -; RV32ZB-NEXT: srli a0, a0, 16 +; RV32ZB-NEXT: slli a0, a0, 8 ; RV32ZB-NEXT: ret ; ; RV64ZB-LABEL: test_bswap_srli_8_bswap_i16: ; RV64ZB: # %bb.0: -; RV64ZB-NEXT: andi a0, a0, 255 -; RV64ZB-NEXT: rev8 a0, a0 -; RV64ZB-NEXT: srli a0, a0, 48 +; RV64ZB-NEXT: slli a0, a0, 8 ; RV64ZB-NEXT: ret %1 = call i16 @llvm.bswap.i16(i16 %a) %2 = lshr i16 %1, 8 @@ -101,65 +95,22 @@ define i32 @test_bswap_srli_8_bswap_i32(i32 %a) nounwind { ; RV32I-LABEL: test_bswap_srli_8_bswap_i32: ; RV32I: # %bb.0: -; RV32I-NEXT: srli a1, a0, 8 -; RV32I-NEXT: lui a2, 16 -; RV32I-NEXT: addi a2, a2, -256 -; RV32I-NEXT: and a1, a1, a2 -; RV32I-NEXT: srli a2, a0, 24 -; RV32I-NEXT: or a1, a1, a2 -; RV32I-NEXT: slli a2, a0, 8 -; RV32I-NEXT: lui a3, 4080 -; RV32I-NEXT: and a2, a2, a3 -; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, a2 -; RV32I-NEXT: or a0, a0, a1 -; RV32I-NEXT: srli a1, a0, 8 -; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: and a2, a0, a3 -; RV32I-NEXT: or a1, a1, a2 -; RV32I-NEXT: srli a0, a0, 16 -; RV32I-NEXT: andi a0, a0, -256 -; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: slli a0, a0, 8 ; RV32I-NEXT: ret ; ; RV64I-LABEL: test_bswap_srli_8_bswap_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: srliw a1, a0, 8 -; RV64I-NEXT: lui a2, 16 -; RV64I-NEXT: addiw a2, a2, -256 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srliw a2, a0, 24 -; RV64I-NEXT: or a1, a1, a2 -; RV64I-NEXT: slli a2, a0, 8 -; RV64I-NEXT: lui a3, 4080 -; RV64I-NEXT: and a2, a2, a3 -; RV64I-NEXT: slliw a0, a0, 24 -; RV64I-NEXT: or a0, a0, a2 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: slli a1, a0, 32 -; RV64I-NEXT: srli a1, a1, 32 -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: and a2, a0, a3 -; RV64I-NEXT: slliw a1, a1, 24 -; RV64I-NEXT: or a1, a1, a2 -; RV64I-NEXT: srliw a0, a0, 16 -; RV64I-NEXT: andi a0, a0, -256 -; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: slliw a0, a0, 8 ; RV64I-NEXT: ret ; ; RV32ZB-LABEL: test_bswap_srli_8_bswap_i32: ; RV32ZB: # %bb.0: -; RV32ZB-NEXT: rev8 a0, a0 -; RV32ZB-NEXT: srli a0, a0, 8 -; RV32ZB-NEXT: rev8 a0, a0 +; RV32ZB-NEXT: slli a0, a0, 8 ; RV32ZB-NEXT: ret ; ; RV64ZB-LABEL: test_bswap_srli_8_bswap_i32: ; RV64ZB: # %bb.0: -; RV64ZB-NEXT: rev8 a0, a0 -; RV64ZB-NEXT: srli a0, a0, 40 -; RV64ZB-NEXT: rev8 a0, a0 -; RV64ZB-NEXT: srli a0, a0, 32 +; RV64ZB-NEXT: slliw a0, a0, 8 ; RV64ZB-NEXT: ret %1 = call i32 @llvm.bswap.i32(i32 %a) %2 = lshr i32 %1, 8 @@ -170,61 +121,22 @@ define i32 @test_bswap_srli_16_bswap_i32(i32 %a) nounwind { ; RV32I-LABEL: test_bswap_srli_16_bswap_i32: ; RV32I: # %bb.0: -; RV32I-NEXT: srli a1, a0, 8 -; RV32I-NEXT: lui a2, 16 -; RV32I-NEXT: addi a2, a2, -256 -; RV32I-NEXT: and a1, a1, a2 -; RV32I-NEXT: srli a2, a0, 24 -; RV32I-NEXT: or a1, a1, a2 -; RV32I-NEXT: slli a2, a0, 8 -; RV32I-NEXT: lui a3, 4080 -; RV32I-NEXT: and a2, a2, a3 -; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, a2 -; RV32I-NEXT: or a0, a0, a1 -; RV32I-NEXT: srli a1, a0, 16 -; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: srli a0, a0, 24 ; RV32I-NEXT: slli a0, a0, 16 -; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: test_bswap_srli_16_bswap_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: srliw a1, a0, 8 -; RV64I-NEXT: lui a2, 16 -; RV64I-NEXT: addiw a2, a2, -256 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srliw a2, a0, 24 -; RV64I-NEXT: or a1, a1, a2 -; RV64I-NEXT: slli a2, a0, 8 -; RV64I-NEXT: lui a3, 4080 -; RV64I-NEXT: and a2, a2, a3 -; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, a2 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: srli a0, a0, 32 -; RV64I-NEXT: srli a1, a0, 16 -; RV64I-NEXT: srliw a0, a0, 24 -; RV64I-NEXT: slli a0, a0, 16 -; RV64I-NEXT: slliw a1, a1, 24 -; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: slliw a0, a0, 16 ; RV64I-NEXT: ret ; ; RV32ZB-LABEL: test_bswap_srli_16_bswap_i32: ; RV32ZB: # %bb.0: -; RV32ZB-NEXT: rev8 a0, a0 -; RV32ZB-NEXT: srli a0, a0, 16 -; RV32ZB-NEXT: rev8 a0, a0 +; RV32ZB-NEXT: slli a0, a0, 16 ; RV32ZB-NEXT: ret ; ; RV64ZB-LABEL: test_bswap_srli_16_bswap_i32: ; RV64ZB: # %bb.0: -; RV64ZB-NEXT: rev8 a0, a0 -; RV64ZB-NEXT: srli a0, a0, 48 -; RV64ZB-NEXT: rev8 a0, a0 -; RV64ZB-NEXT: srli a0, a0, 32 +; RV64ZB-NEXT: slliw a0, a0, 16 ; RV64ZB-NEXT: ret %1 = call i32 @llvm.bswap.i32(i32 %a) %2 = lshr i32 %1, 16 @@ -245,15 +157,12 @@ ; ; RV32ZB-LABEL: test_bswap_srli_24_bswap_i32: ; RV32ZB: # %bb.0: -; RV32ZB-NEXT: andi a0, a0, 255 -; RV32ZB-NEXT: rev8 a0, a0 +; RV32ZB-NEXT: slli a0, a0, 24 ; RV32ZB-NEXT: ret ; ; RV64ZB-LABEL: test_bswap_srli_24_bswap_i32: ; RV64ZB: # %bb.0: -; RV64ZB-NEXT: andi a0, a0, 255 -; RV64ZB-NEXT: rev8 a0, a0 -; RV64ZB-NEXT: srli a0, a0, 32 +; RV64ZB-NEXT: slliw a0, a0, 24 ; RV64ZB-NEXT: ret %1 = call i32 @llvm.bswap.i32(i32 %a) %2 = lshr i32 %1, 24 @@ -264,76 +173,24 @@ define i64 @test_bswap_srli_48_bswap_i64(i64 %a) nounwind { ; RV32I-LABEL: test_bswap_srli_48_bswap_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: srli a1, a0, 8 -; RV32I-NEXT: lui a2, 16 -; RV32I-NEXT: addi a2, a2, -256 -; RV32I-NEXT: and a1, a1, a2 -; RV32I-NEXT: srli a2, a0, 24 -; RV32I-NEXT: or a1, a1, a2 -; RV32I-NEXT: slli a2, a0, 8 -; RV32I-NEXT: lui a3, 4080 -; RV32I-NEXT: and a2, a2, a3 -; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, a2 -; RV32I-NEXT: or a0, a0, a1 -; RV32I-NEXT: srli a1, a0, 16 -; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: srli a0, a0, 24 -; RV32I-NEXT: slli a0, a0, 16 -; RV32I-NEXT: or a1, a1, a0 +; RV32I-NEXT: slli a1, a0, 16 ; RV32I-NEXT: li a0, 0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: test_bswap_srli_48_bswap_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: srli a1, a0, 24 -; RV64I-NEXT: lui a2, 4080 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srli a2, a0, 8 -; RV64I-NEXT: li a3, 255 -; RV64I-NEXT: slli a4, a3, 24 -; RV64I-NEXT: and a2, a2, a4 -; RV64I-NEXT: or a1, a2, a1 -; RV64I-NEXT: srli a2, a0, 40 -; RV64I-NEXT: lui a4, 16 -; RV64I-NEXT: addiw a4, a4, -256 -; RV64I-NEXT: and a2, a2, a4 -; RV64I-NEXT: srli a4, a0, 56 -; RV64I-NEXT: or a2, a2, a4 -; RV64I-NEXT: or a1, a1, a2 -; RV64I-NEXT: slli a2, a0, 24 -; RV64I-NEXT: slli a4, a3, 40 -; RV64I-NEXT: and a2, a2, a4 -; RV64I-NEXT: srliw a4, a0, 24 -; RV64I-NEXT: slli a4, a4, 32 -; RV64I-NEXT: or a2, a2, a4 -; RV64I-NEXT: slli a4, a0, 40 -; RV64I-NEXT: slli a3, a3, 48 -; RV64I-NEXT: and a3, a4, a3 -; RV64I-NEXT: slli a0, a0, 56 -; RV64I-NEXT: or a0, a0, a3 -; RV64I-NEXT: or a0, a0, a2 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: srli a1, a0, 48 -; RV64I-NEXT: slli a1, a1, 56 -; RV64I-NEXT: srli a0, a0, 56 ; RV64I-NEXT: slli a0, a0, 48 -; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret ; ; RV32ZB-LABEL: test_bswap_srli_48_bswap_i64: ; RV32ZB: # %bb.0: -; RV32ZB-NEXT: rev8 a0, a0 -; RV32ZB-NEXT: srli a0, a0, 16 -; RV32ZB-NEXT: rev8 a1, a0 +; RV32ZB-NEXT: slli a1, a0, 16 ; RV32ZB-NEXT: li a0, 0 ; RV32ZB-NEXT: ret ; ; RV64ZB-LABEL: test_bswap_srli_48_bswap_i64: ; RV64ZB: # %bb.0: -; RV64ZB-NEXT: rev8 a0, a0 -; RV64ZB-NEXT: srli a0, a0, 48 -; RV64ZB-NEXT: rev8 a0, a0 +; RV64ZB-NEXT: slli a0, a0, 48 ; RV64ZB-NEXT: ret %1 = call i64 @llvm.bswap.i64(i64 %a) %2 = lshr i64 %1, 48 Index: llvm/test/CodeGen/X86/combine-bswap.ll =================================================================== --- llvm/test/CodeGen/X86/combine-bswap.ll +++ llvm/test/CodeGen/X86/combine-bswap.ll @@ -38,22 +38,18 @@ ret i32 %c } -; TODO: fold (bswap(srl (bswap c), x)) -> (shl c, x) +; fold (bswap(srl (bswap c), x)) -> (shl c, x) define i32 @test_bswap_srli_8_bswap_i32(i32 %a) nounwind { ; X86-LABEL: test_bswap_srli_8_bswap_i32: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: bswapl %eax -; X86-NEXT: shrl $8, %eax -; X86-NEXT: bswapl %eax +; X86-NEXT: shll $8, %eax ; X86-NEXT: retl ; ; X64-LABEL: test_bswap_srli_8_bswap_i32: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: bswapl %eax -; X64-NEXT: shrl $8, %eax -; X64-NEXT: bswapl %eax +; X64-NEXT: shll $8, %eax ; X64-NEXT: retq %1 = call i32 @llvm.bswap.i32(i32 %a) %2 = lshr i32 %1, 8