diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -9753,6 +9753,21 @@ } } + // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as + // inverse-shift-of-bswap: + // bswap (X u<< C) --> (bswap X) u>> C + // bswap (X u>> C) --> (bswap X) u<< C + if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) && + N0.hasOneUse()) { + auto *ShAmt = dyn_cast(N0.getOperand(1)); + if (ShAmt && ShAmt->getAPIntValue().ult(BW) && + ShAmt->getZExtValue() % 8 == 0) { + SDValue NewSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0)); + unsigned InverseShift = N0.getOpcode() == ISD::SHL ? ISD::SRL : ISD::SHL; + return DAG.getNode(InverseShift, DL, VT, NewSwap, N0.getOperand(1)); + } + } + return SDValue(); } diff --git a/llvm/test/CodeGen/AArch64/arm64-rev.ll b/llvm/test/CodeGen/AArch64/arm64-rev.ll --- a/llvm/test/CodeGen/AArch64/arm64-rev.ll +++ b/llvm/test/CodeGen/AArch64/arm64-rev.ll @@ -653,8 +653,7 @@ ; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: ldrh w8, [x0, #2] ; CHECK-NEXT: mov x19, x1 -; CHECK-NEXT: lsl w8, w8, #16 -; CHECK-NEXT: rev w0, w8 +; CHECK-NEXT: rev16 w0, w8 ; CHECK-NEXT: bl gid_tbl_len ; CHECK-NEXT: strh wzr, [x19] ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/load-combine-big-endian.ll b/llvm/test/CodeGen/AArch64/load-combine-big-endian.ll --- a/llvm/test/CodeGen/AArch64/load-combine-big-endian.ll +++ b/llvm/test/CodeGen/AArch64/load-combine-big-endian.ll @@ -442,8 +442,7 @@ ; CHECK-LABEL: zext_load_i32_by_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: lsl w8, w8, #16 -; CHECK-NEXT: rev w0, w8 +; CHECK-NEXT: rev16 w0, w8 ; CHECK-NEXT: ret %tmp = bitcast i32* %arg to i8* %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0 diff --git a/llvm/test/CodeGen/AArch64/load-combine.ll b/llvm/test/CodeGen/AArch64/load-combine.ll --- a/llvm/test/CodeGen/AArch64/load-combine.ll +++ b/llvm/test/CodeGen/AArch64/load-combine.ll @@ -499,8 +499,7 @@ ; CHECK-LABEL: zext_load_i32_by_i8_bswap: ; CHECK: // %bb.0: ; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: lsl w8, w8, #16 -; CHECK-NEXT: rev w0, w8 +; CHECK-NEXT: rev16 w0, w8 ; CHECK-NEXT: ret %tmp = bitcast i32* %arg to i8* diff --git a/llvm/test/CodeGen/ARM/load-combine-big-endian.ll b/llvm/test/CodeGen/ARM/load-combine-big-endian.ll --- a/llvm/test/CodeGen/ARM/load-combine-big-endian.ll +++ b/llvm/test/CodeGen/ARM/load-combine-big-endian.ll @@ -825,22 +825,19 @@ ; CHECK-ARMv6-LABEL: zext_load_i32_by_i8: ; CHECK-ARMv6: @ %bb.0: ; CHECK-ARMv6-NEXT: ldrh r0, [r0] -; CHECK-ARMv6-NEXT: lsl r0, r0, #16 -; CHECK-ARMv6-NEXT: rev r0, r0 +; CHECK-ARMv6-NEXT: rev16 r0, r0 ; CHECK-ARMv6-NEXT: bx lr ; ; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8: ; CHECK-THUMBv6: @ %bb.0: ; CHECK-THUMBv6-NEXT: ldrh r0, [r0] -; CHECK-THUMBv6-NEXT: lsls r0, r0, #16 -; CHECK-THUMBv6-NEXT: rev r0, r0 +; CHECK-THUMBv6-NEXT: rev16 r0, r0 ; CHECK-THUMBv6-NEXT: bx lr ; ; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8: ; CHECK-THUMBv7: @ %bb.0: ; CHECK-THUMBv7-NEXT: ldrh r0, [r0] -; CHECK-THUMBv7-NEXT: lsls r0, r0, #16 -; CHECK-THUMBv7-NEXT: rev r0, r0 +; CHECK-THUMBv7-NEXT: rev16 r0, r0 ; CHECK-THUMBv7-NEXT: bx lr %tmp = bitcast i32* %arg to i8* diff --git a/llvm/test/CodeGen/ARM/load-combine.ll b/llvm/test/CodeGen/ARM/load-combine.ll --- a/llvm/test/CodeGen/ARM/load-combine.ll +++ b/llvm/test/CodeGen/ARM/load-combine.ll @@ -875,22 +875,19 @@ ; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_bswap: ; CHECK-ARMv6: @ %bb.0: ; CHECK-ARMv6-NEXT: ldrh r0, [r0] -; CHECK-ARMv6-NEXT: lsl r0, r0, #16 -; CHECK-ARMv6-NEXT: rev r0, r0 +; CHECK-ARMv6-NEXT: rev16 r0, r0 ; CHECK-ARMv6-NEXT: bx lr ; ; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8_bswap: ; CHECK-THUMBv6: @ %bb.0: ; CHECK-THUMBv6-NEXT: ldrh r0, [r0] -; CHECK-THUMBv6-NEXT: lsls r0, r0, #16 -; CHECK-THUMBv6-NEXT: rev r0, r0 +; CHECK-THUMBv6-NEXT: rev16 r0, r0 ; CHECK-THUMBv6-NEXT: bx lr ; ; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8_bswap: ; CHECK-THUMBv7: @ %bb.0: ; CHECK-THUMBv7-NEXT: ldrh r0, [r0] -; CHECK-THUMBv7-NEXT: lsls r0, r0, #16 -; CHECK-THUMBv7-NEXT: rev r0, r0 +; CHECK-THUMBv7-NEXT: rev16 r0, r0 ; CHECK-THUMBv7-NEXT: bx lr %tmp = bitcast i32* %arg to i8* diff --git a/llvm/test/CodeGen/RISCV/bswap-shift.ll b/llvm/test/CodeGen/RISCV/bswap-shift.ll --- a/llvm/test/CodeGen/RISCV/bswap-shift.ll +++ b/llvm/test/CodeGen/RISCV/bswap-shift.ll @@ -41,16 +41,12 @@ define i16 @test_bswap_srli_8_bswap_i16(i16 %a) nounwind { ; RV32ZB-LABEL: test_bswap_srli_8_bswap_i16: ; RV32ZB: # %bb.0: -; RV32ZB-NEXT: andi a0, a0, 255 -; RV32ZB-NEXT: rev8 a0, a0 -; RV32ZB-NEXT: srli a0, a0, 16 +; RV32ZB-NEXT: slli a0, a0, 8 ; RV32ZB-NEXT: ret ; ; RV64ZB-LABEL: test_bswap_srli_8_bswap_i16: ; RV64ZB: # %bb.0: -; RV64ZB-NEXT: andi a0, a0, 255 -; RV64ZB-NEXT: rev8 a0, a0 -; RV64ZB-NEXT: srli a0, a0, 48 +; RV64ZB-NEXT: slli a0, a0, 8 ; RV64ZB-NEXT: ret %1 = call i16 @llvm.bswap.i16(i16 %a) %2 = lshr i16 %1, 8 @@ -61,17 +57,12 @@ define i32 @test_bswap_srli_8_bswap_i32(i32 %a) nounwind { ; RV32ZB-LABEL: test_bswap_srli_8_bswap_i32: ; RV32ZB: # %bb.0: -; RV32ZB-NEXT: rev8 a0, a0 -; RV32ZB-NEXT: srli a0, a0, 8 -; RV32ZB-NEXT: rev8 a0, a0 +; RV32ZB-NEXT: slli a0, a0, 8 ; RV32ZB-NEXT: ret ; ; RV64ZB-LABEL: test_bswap_srli_8_bswap_i32: ; RV64ZB: # %bb.0: -; RV64ZB-NEXT: rev8 a0, a0 -; RV64ZB-NEXT: srli a0, a0, 40 -; RV64ZB-NEXT: rev8 a0, a0 -; RV64ZB-NEXT: srli a0, a0, 32 +; RV64ZB-NEXT: slliw a0, a0, 8 ; RV64ZB-NEXT: ret %1 = call i32 @llvm.bswap.i32(i32 %a) %2 = lshr i32 %1, 8 @@ -82,17 +73,12 @@ define i32 @test_bswap_srli_16_bswap_i32(i32 %a) nounwind { ; RV32ZB-LABEL: test_bswap_srli_16_bswap_i32: ; RV32ZB: # %bb.0: -; RV32ZB-NEXT: rev8 a0, a0 -; RV32ZB-NEXT: srli a0, a0, 16 -; RV32ZB-NEXT: rev8 a0, a0 +; RV32ZB-NEXT: slli a0, a0, 16 ; RV32ZB-NEXT: ret ; ; RV64ZB-LABEL: test_bswap_srli_16_bswap_i32: ; RV64ZB: # %bb.0: -; RV64ZB-NEXT: rev8 a0, a0 -; RV64ZB-NEXT: srli a0, a0, 48 -; RV64ZB-NEXT: rev8 a0, a0 -; RV64ZB-NEXT: srli a0, a0, 32 +; RV64ZB-NEXT: slliw a0, a0, 16 ; RV64ZB-NEXT: ret %1 = call i32 @llvm.bswap.i32(i32 %a) %2 = lshr i32 %1, 16 @@ -103,15 +89,12 @@ define i32 @test_bswap_srli_24_bswap_i32(i32 %a) nounwind { ; RV32ZB-LABEL: test_bswap_srli_24_bswap_i32: ; RV32ZB: # %bb.0: -; RV32ZB-NEXT: andi a0, a0, 255 -; RV32ZB-NEXT: rev8 a0, a0 +; RV32ZB-NEXT: slli a0, a0, 24 ; RV32ZB-NEXT: ret ; ; RV64ZB-LABEL: test_bswap_srli_24_bswap_i32: ; RV64ZB: # %bb.0: -; RV64ZB-NEXT: andi a0, a0, 255 -; RV64ZB-NEXT: rev8 a0, a0 -; RV64ZB-NEXT: srli a0, a0, 32 +; RV64ZB-NEXT: slliw a0, a0, 24 ; RV64ZB-NEXT: ret %1 = call i32 @llvm.bswap.i32(i32 %a) %2 = lshr i32 %1, 24 @@ -122,17 +105,13 @@ define i64 @test_bswap_srli_48_bswap_i64(i64 %a) nounwind { ; RV32ZB-LABEL: test_bswap_srli_48_bswap_i64: ; RV32ZB: # %bb.0: -; RV32ZB-NEXT: rev8 a0, a0 -; RV32ZB-NEXT: srli a0, a0, 16 -; RV32ZB-NEXT: rev8 a1, a0 +; RV32ZB-NEXT: slli a1, a0, 16 ; RV32ZB-NEXT: li a0, 0 ; RV32ZB-NEXT: ret ; ; RV64ZB-LABEL: test_bswap_srli_48_bswap_i64: ; RV64ZB: # %bb.0: -; RV64ZB-NEXT: rev8 a0, a0 -; RV64ZB-NEXT: srli a0, a0, 48 -; RV64ZB-NEXT: rev8 a0, a0 +; RV64ZB-NEXT: slli a0, a0, 48 ; RV64ZB-NEXT: ret %1 = call i64 @llvm.bswap.i64(i64 %a) %2 = lshr i64 %1, 48 @@ -167,16 +146,14 @@ define i16 @test_bswap_shli_8_bswap_i16(i16 %a) nounwind { ; RV32ZB-LABEL: test_bswap_shli_8_bswap_i16: ; RV32ZB: # %bb.0: -; RV32ZB-NEXT: andi a0, a0, -256 -; RV32ZB-NEXT: rev8 a0, a0 -; RV32ZB-NEXT: srli a0, a0, 16 +; RV32ZB-NEXT: slli a0, a0, 16 +; RV32ZB-NEXT: srli a0, a0, 24 ; RV32ZB-NEXT: ret ; ; RV64ZB-LABEL: test_bswap_shli_8_bswap_i16: ; RV64ZB: # %bb.0: -; RV64ZB-NEXT: andi a0, a0, -256 -; RV64ZB-NEXT: rev8 a0, a0 -; RV64ZB-NEXT: srli a0, a0, 48 +; RV64ZB-NEXT: slli a0, a0, 48 +; RV64ZB-NEXT: srli a0, a0, 56 ; RV64ZB-NEXT: ret %1 = call i16 @llvm.bswap.i16(i16 %a) %2 = shl i16 %1, 8 @@ -187,18 +164,12 @@ define i32 @test_bswap_shli_8_bswap_i32(i32 %a) nounwind { ; RV32ZB-LABEL: test_bswap_shli_8_bswap_i32: ; RV32ZB: # %bb.0: -; RV32ZB-NEXT: rev8 a0, a0 -; RV32ZB-NEXT: slli a0, a0, 8 -; RV32ZB-NEXT: rev8 a0, a0 +; RV32ZB-NEXT: srli a0, a0, 8 ; RV32ZB-NEXT: ret ; ; RV64ZB-LABEL: test_bswap_shli_8_bswap_i32: ; RV64ZB: # %bb.0: -; RV64ZB-NEXT: rev8 a0, a0 -; RV64ZB-NEXT: srli a0, a0, 24 -; RV64ZB-NEXT: andi a0, a0, -256 -; RV64ZB-NEXT: rev8 a0, a0 -; RV64ZB-NEXT: srli a0, a0, 32 +; RV64ZB-NEXT: srliw a0, a0, 8 ; RV64ZB-NEXT: ret %1 = call i32 @llvm.bswap.i32(i32 %a) %2 = shl i32 %1, 8 @@ -209,18 +180,12 @@ define i32 @test_bswap_shli_16_bswap_i32(i32 %a) nounwind { ; RV32ZB-LABEL: test_bswap_shli_16_bswap_i32: ; RV32ZB: # %bb.0: -; RV32ZB-NEXT: rev8 a0, a0 -; RV32ZB-NEXT: slli a0, a0, 16 -; RV32ZB-NEXT: rev8 a0, a0 +; RV32ZB-NEXT: srli a0, a0, 16 ; RV32ZB-NEXT: ret ; ; RV64ZB-LABEL: test_bswap_shli_16_bswap_i32: ; RV64ZB: # %bb.0: -; RV64ZB-NEXT: rev8 a0, a0 -; RV64ZB-NEXT: srli a0, a0, 32 -; RV64ZB-NEXT: slli a0, a0, 16 -; RV64ZB-NEXT: rev8 a0, a0 -; RV64ZB-NEXT: srli a0, a0, 32 +; RV64ZB-NEXT: srliw a0, a0, 16 ; RV64ZB-NEXT: ret %1 = call i32 @llvm.bswap.i32(i32 %a) %2 = shl i32 %1, 16 @@ -231,17 +196,12 @@ define i32 @test_bswap_shli_24_bswap_i32(i32 %a) nounwind { ; RV32ZB-LABEL: test_bswap_shli_24_bswap_i32: ; RV32ZB: # %bb.0: -; RV32ZB-NEXT: lui a1, 1044480 -; RV32ZB-NEXT: and a0, a0, a1 -; RV32ZB-NEXT: rev8 a0, a0 +; RV32ZB-NEXT: srli a0, a0, 24 ; RV32ZB-NEXT: ret ; ; RV64ZB-LABEL: test_bswap_shli_24_bswap_i32: ; RV64ZB: # %bb.0: -; RV64ZB-NEXT: lui a1, 1044480 -; RV64ZB-NEXT: and a0, a0, a1 -; RV64ZB-NEXT: rev8 a0, a0 -; RV64ZB-NEXT: srli a0, a0, 32 +; RV64ZB-NEXT: srliw a0, a0, 24 ; RV64ZB-NEXT: ret %1 = call i32 @llvm.bswap.i32(i32 %a) %2 = shl i32 %1, 24 @@ -252,17 +212,13 @@ define i64 @test_bswap_shli_48_bswap_i64(i64 %a) nounwind { ; RV32ZB-LABEL: test_bswap_shli_48_bswap_i64: ; RV32ZB: # %bb.0: -; RV32ZB-NEXT: rev8 a0, a1 -; RV32ZB-NEXT: slli a0, a0, 16 -; RV32ZB-NEXT: rev8 a0, a0 +; RV32ZB-NEXT: srli a0, a1, 16 ; RV32ZB-NEXT: li a1, 0 ; RV32ZB-NEXT: ret ; ; RV64ZB-LABEL: test_bswap_shli_48_bswap_i64: ; RV64ZB: # %bb.0: -; RV64ZB-NEXT: rev8 a0, a0 -; RV64ZB-NEXT: slli a0, a0, 48 -; RV64ZB-NEXT: rev8 a0, a0 +; RV64ZB-NEXT: srli a0, a0, 48 ; RV64ZB-NEXT: ret %1 = call i64 @llvm.bswap.i64(i64 %a) %2 = shl i64 %1, 48 diff --git a/llvm/test/CodeGen/X86/combine-bswap.ll b/llvm/test/CodeGen/X86/combine-bswap.ll --- a/llvm/test/CodeGen/X86/combine-bswap.ll +++ b/llvm/test/CodeGen/X86/combine-bswap.ll @@ -39,19 +39,18 @@ ret i32 %c } -; TODO: fold (bswap(srl (bswap c), x)) -> (shl c, x) define i16 @test_bswap_srli_8_bswap_i16(i16 %a) nounwind { ; X86-LABEL: test_bswap_srli_8_bswap_i16: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: rolw $8, %ax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shll $8, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: test_bswap_srli_8_bswap_i16: ; X64: # %bb.0: -; X64-NEXT: movzbl %dil, %eax -; X64-NEXT: rolw $8, %ax +; X64-NEXT: movl %edi, %eax +; X64-NEXT: shll $8, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %1 = call i16 @llvm.bswap.i16(i16 %a) @@ -64,17 +63,13 @@ ; X86-LABEL: test_bswap_srli_8_bswap_i32: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: bswapl %eax -; X86-NEXT: shrl $8, %eax -; X86-NEXT: bswapl %eax +; X86-NEXT: shll $8, %eax ; X86-NEXT: retl ; ; X64-LABEL: test_bswap_srli_8_bswap_i32: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: bswapl %eax -; X64-NEXT: shrl $8, %eax -; X64-NEXT: bswapl %eax +; X64-NEXT: shll $8, %eax ; X64-NEXT: retq %1 = call i32 @llvm.bswap.i32(i32 %a) %2 = lshr i32 %1, 8 @@ -87,20 +82,13 @@ ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: bswapl %edx -; X86-NEXT: bswapl %eax -; X86-NEXT: shrdl $16, %eax, %edx -; X86-NEXT: shrl $16, %eax -; X86-NEXT: bswapl %edx -; X86-NEXT: bswapl %eax +; X86-NEXT: shll $16, %eax ; X86-NEXT: retl ; ; X64-LABEL: test_bswap_srli_16_bswap_i64: ; X64: # %bb.0: ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: bswapq %rax -; X64-NEXT: shrq $16, %rax -; X64-NEXT: bswapq %rax +; X64-NEXT: shlq $16, %rax ; X64-NEXT: retq %1 = call i64 @llvm.bswap.i64(i64 %a) %2 = lshr i64 %1, 16 @@ -108,21 +96,17 @@ ret i64 %3 } -; TODO: fold (bswap(shl (bswap c), x)) -> (srl c, x) define i16 @test_bswap_shli_8_bswap_i16(i16 %a) nounwind { ; X86-LABEL: test_bswap_shli_8_bswap_i16: ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: shll $8, %eax -; X86-NEXT: rolw $8, %ax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: test_bswap_shli_8_bswap_i16: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andl $65280, %eax # imm = 0xFF00 -; X64-NEXT: rolw $8, %ax +; X64-NEXT: movzbl %ah, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %1 = call i16 @llvm.bswap.i16(i16 %a) @@ -135,17 +119,13 @@ ; X86-LABEL: test_bswap_shli_8_bswap_i32: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: bswapl %eax -; X86-NEXT: shll $8, %eax -; X86-NEXT: bswapl %eax +; X86-NEXT: shrl $8, %eax ; X86-NEXT: retl ; ; X64-LABEL: test_bswap_shli_8_bswap_i32: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: bswapl %eax -; X64-NEXT: shll $8, %eax -; X64-NEXT: bswapl %eax +; X64-NEXT: shrl $8, %eax ; X64-NEXT: retq %1 = call i32 @llvm.bswap.i32(i32 %a) %2 = shl i32 %1, 8 @@ -157,21 +137,13 @@ ; X86-LABEL: test_bswap_shli_16_bswap_i64: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %eax -; X86-NEXT: shldl $16, %ecx, %eax -; X86-NEXT: bswapl %eax -; X86-NEXT: rolw $8, %cx -; X86-NEXT: movzwl %cx, %edx +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx ; X86-NEXT: retl ; ; X64-LABEL: test_bswap_shli_16_bswap_i64: ; X64: # %bb.0: ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: bswapq %rax -; X64-NEXT: shlq $16, %rax -; X64-NEXT: bswapq %rax +; X64-NEXT: shrq $16, %rax ; X64-NEXT: retq %1 = call i64 @llvm.bswap.i64(i64 %a) %2 = shl i64 %1, 16