diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -9707,6 +9707,17 @@ return DAG.getNode(ISD::BITREVERSE, DL, VT, BSwap); } + // fold (bswap(srl (bswap c), x)) -> (shl c, x) + if (N0->getOpcode() == ISD::SRL && N0.hasOneUse()) { + auto *ShAmt = dyn_cast(N0.getOperand(1)); + if (ShAmt && ShAmt->getZExtValue() % 8 == 0) { + SDValue BSwap = N0->getOperand(0); + if (BSwap->getOpcode() == ISD::BSWAP && BSwap.hasOneUse()) + return DAG.getNode(ISD::SHL, DL, VT, BSwap->getOperand(0), + N0->getOperand(1)); + } + } + // fold (bswap shl(x,c)) -> (zext(bswap(trunc(shl(x,sub(c,bw/2)))))) // iff x >= bw/2 (i.e. lower half is known zero) unsigned BW = VT.getScalarSizeInBits(); diff --git a/llvm/test/CodeGen/RISCV/bswap-bitreverse.ll b/llvm/test/CodeGen/RISCV/bswap-bitreverse.ll --- a/llvm/test/CodeGen/RISCV/bswap-bitreverse.ll +++ b/llvm/test/CodeGen/RISCV/bswap-bitreverse.ll @@ -848,18 +848,14 @@ ; ; RV32ZBKB-LABEL: test_bswap_bitreverse_i16: ; RV32ZBKB: # %bb.0: -; RV32ZBKB-NEXT: rev8 a0, a0 -; RV32ZBKB-NEXT: srli a0, a0, 16 -; RV32ZBKB-NEXT: rev8 a0, a0 +; RV32ZBKB-NEXT: slli a0, a0, 16 ; RV32ZBKB-NEXT: brev8 a0, a0 ; RV32ZBKB-NEXT: srli a0, a0, 16 ; RV32ZBKB-NEXT: ret ; ; RV64ZBKB-LABEL: test_bswap_bitreverse_i16: ; RV64ZBKB: # %bb.0: -; RV64ZBKB-NEXT: rev8 a0, a0 -; RV64ZBKB-NEXT: srli a0, a0, 48 -; RV64ZBKB-NEXT: rev8 a0, a0 +; RV64ZBKB-NEXT: slli a0, a0, 48 ; RV64ZBKB-NEXT: brev8 a0, a0 ; RV64ZBKB-NEXT: srli a0, a0, 48 ; RV64ZBKB-NEXT: ret @@ -977,9 +973,7 @@ ; ; RV64ZBKB-LABEL: test_bswap_bitreverse_i32: ; RV64ZBKB: # %bb.0: -; RV64ZBKB-NEXT: rev8 a0, a0 -; RV64ZBKB-NEXT: srli a0, a0, 32 -; RV64ZBKB-NEXT: rev8 a0, a0 +; RV64ZBKB-NEXT: slli a0, a0, 32 ; RV64ZBKB-NEXT: brev8 a0, a0 ; RV64ZBKB-NEXT: srli a0, a0, 32 ; RV64ZBKB-NEXT: ret @@ -1238,18 +1232,14 @@ ; ; RV32ZBKB-LABEL: test_bitreverse_bswap_i16: ; RV32ZBKB: # %bb.0: -; RV32ZBKB-NEXT: rev8 a0, a0 -; RV32ZBKB-NEXT: srli a0, a0, 16 -; RV32ZBKB-NEXT: rev8 a0, a0 +; RV32ZBKB-NEXT: slli a0, a0, 16 ; RV32ZBKB-NEXT: brev8 a0, a0 ; RV32ZBKB-NEXT: srli a0, a0, 16 ; RV32ZBKB-NEXT: ret ; ; RV64ZBKB-LABEL: test_bitreverse_bswap_i16: ; RV64ZBKB: # %bb.0: -; RV64ZBKB-NEXT: rev8 a0, a0 -; RV64ZBKB-NEXT: srli a0, a0, 48 -; RV64ZBKB-NEXT: rev8 a0, a0 +; RV64ZBKB-NEXT: slli a0, a0, 48 ; RV64ZBKB-NEXT: brev8 a0, a0 ; RV64ZBKB-NEXT: srli a0, a0, 48 ; RV64ZBKB-NEXT: ret @@ -1367,9 +1357,7 @@ ; ; RV64ZBKB-LABEL: test_bitreverse_bswap_i32: ; RV64ZBKB: # %bb.0: -; RV64ZBKB-NEXT: rev8 a0, a0 -; RV64ZBKB-NEXT: srli a0, a0, 32 -; RV64ZBKB-NEXT: rev8 a0, a0 +; RV64ZBKB-NEXT: slli a0, a0, 32 ; RV64ZBKB-NEXT: brev8 a0, a0 ; RV64ZBKB-NEXT: srli a0, a0, 32 ; RV64ZBKB-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/bswap-srli-bswap.ll b/llvm/test/CodeGen/RISCV/bswap-srli-bswap.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/bswap-srli-bswap.ll @@ -0,0 +1,147 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64I +; RUN: llc -mtriple=riscv32 -mattr=+zbb -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV32ZB +; RUN: llc -mtriple=riscv64 -mattr=+zbb -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV64ZB +; RUN: llc -mtriple=riscv32 -mattr=+zbkb -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV32ZB +; RUN: llc -mtriple=riscv64 -mattr=+zbkb -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV64ZB + +declare i16 @llvm.bswap.i16(i16) +declare i32 @llvm.bswap.i32(i32) +declare i64 @llvm.bswap.i64(i64) + +define i16 @test_bswap_srli_bswap_i16(i16 %a) nounwind { +; RV32I-LABEL: test_bswap_srli_bswap_i16: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a1, a0, 8 +; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: srli a0, a0, 24 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: srli a0, a0, 16 +; RV32I-NEXT: slli a1, a0, 1 +; RV32I-NEXT: andi a1, a1, -256 +; RV32I-NEXT: srli a0, a0, 15 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: ret +; +; RV64I-LABEL: test_bswap_srli_bswap_i16: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: slli a0, a0, 48 +; RV64I-NEXT: srli a0, a0, 56 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: slli a0, a0, 48 +; RV64I-NEXT: srli a0, a0, 48 +; RV64I-NEXT: slli a1, a0, 1 +; RV64I-NEXT: andi a1, a1, -256 +; RV64I-NEXT: srli a0, a0, 15 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +; +; RV32ZB-LABEL: test_bswap_srli_bswap_i16: +; RV32ZB: # %bb.0: +; RV32ZB-NEXT: rev8 a0, a0 +; RV32ZB-NEXT: srli a0, a0, 23 +; RV32ZB-NEXT: rev8 a0, a0 +; RV32ZB-NEXT: srli a0, a0, 16 +; RV32ZB-NEXT: ret +; +; RV64ZB-LABEL: test_bswap_srli_bswap_i16: +; RV64ZB: # %bb.0: +; RV64ZB-NEXT: rev8 a0, a0 +; RV64ZB-NEXT: srli a0, a0, 55 +; RV64ZB-NEXT: rev8 a0, a0 +; RV64ZB-NEXT: srli a0, a0, 48 +; RV64ZB-NEXT: ret + %1 = call i16 @llvm.bswap.i16(i16 %a) + %2 = lshr i16 %1, 7 + %3 = call i16 @llvm.bswap.i16(i16 %2) + ret i16 %3 +} + +define i16 @test_bswap_srli_bswap_i16_1(i16 %a) nounwind { +; RV32I-LABEL: test_bswap_srli_bswap_i16_1: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a0, a0, 8 +; RV32I-NEXT: ret +; +; RV64I-LABEL: test_bswap_srli_bswap_i16_1: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 8 +; RV64I-NEXT: ret +; +; RV32ZB-LABEL: test_bswap_srli_bswap_i16_1: +; RV32ZB: # %bb.0: +; RV32ZB-NEXT: slli a0, a0, 8 +; RV32ZB-NEXT: ret +; +; RV64ZB-LABEL: test_bswap_srli_bswap_i16_1: +; RV64ZB: # %bb.0: +; RV64ZB-NEXT: slli a0, a0, 8 +; RV64ZB-NEXT: ret + %1 = call i16 @llvm.bswap.i16(i16 %a) + %2 = lshr i16 %1, 8 + %3 = call i16 @llvm.bswap.i16(i16 %2) + ret i16 %3 +} + +define i32 @test_bswap_srli_bswap_i32(i32 %a) nounwind { +; RV32I-LABEL: test_bswap_srli_bswap_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: ret +; +; RV64I-LABEL: test_bswap_srli_bswap_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: slliw a0, a0, 24 +; RV64I-NEXT: ret +; +; RV32ZB-LABEL: test_bswap_srli_bswap_i32: +; RV32ZB: # %bb.0: +; RV32ZB-NEXT: slli a0, a0, 24 +; RV32ZB-NEXT: ret +; +; RV64ZB-LABEL: test_bswap_srli_bswap_i32: +; RV64ZB: # %bb.0: +; RV64ZB-NEXT: slliw a0, a0, 24 +; RV64ZB-NEXT: ret + %1 = call i32 @llvm.bswap.i32(i32 %a) + %2 = lshr i32 %1, 24 + %3 = call i32 @llvm.bswap.i32(i32 %2) + ret i32 %3 +} + +define i64 @test_bswap_srli_bswap_i64(i64 %a) nounwind { +; RV32I-LABEL: test_bswap_srli_bswap_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: li a0, 0 +; RV32I-NEXT: ret +; +; RV64I-LABEL: test_bswap_srli_bswap_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 48 +; RV64I-NEXT: ret +; +; RV32ZB-LABEL: test_bswap_srli_bswap_i64: +; RV32ZB: # %bb.0: +; RV32ZB-NEXT: slli a1, a0, 16 +; RV32ZB-NEXT: li a0, 0 +; RV32ZB-NEXT: ret +; +; RV64ZB-LABEL: test_bswap_srli_bswap_i64: +; RV64ZB: # %bb.0: +; RV64ZB-NEXT: slli a0, a0, 48 +; RV64ZB-NEXT: ret + %1 = call i64 @llvm.bswap.i64(i64 %a) + %2 = lshr i64 %1, 48 + %3 = call i64 @llvm.bswap.i64(i64 %2) + ret i64 %3 +} diff --git a/llvm/test/CodeGen/X86/combine-bswap.ll b/llvm/test/CodeGen/X86/combine-bswap.ll --- a/llvm/test/CodeGen/X86/combine-bswap.ll +++ b/llvm/test/CodeGen/X86/combine-bswap.ll @@ -38,6 +38,25 @@ ret i32 %c } +; fold (bswap(srl (bswap c), x)) -> (shl c, x) +define i32 @test_bswap_srli_bswap_i32(i32 %a) nounwind { +; X86-LABEL: test_bswap_srli_bswap_i32: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shll $24, %eax +; X86-NEXT: retl +; +; X64-LABEL: test_bswap_srli_bswap_i32: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: shll $24, %eax +; X64-NEXT: retq + %1 = call i32 @llvm.bswap.i32(i32 %a) + %2 = lshr i32 %1, 24 + %3 = call i32 @llvm.bswap.i32(i32 %2) + ret i32 %3 +} + define i32 @test_demandedbits_bswap(i32 %a0) nounwind { ; X86-LABEL: test_demandedbits_bswap: ; X86: # %bb.0: