diff --git a/llvm/test/CodeGen/RISCV/bswap-shift.ll b/llvm/test/CodeGen/RISCV/bswap-shift.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/bswap-shift.ll @@ -0,0 +1,271 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+zbb -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV32ZB +; RUN: llc -mtriple=riscv64 -mattr=+zbb -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV64ZB +; RUN: llc -mtriple=riscv32 -mattr=+zbkb -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV32ZB +; RUN: llc -mtriple=riscv64 -mattr=+zbkb -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV64ZB + +; TODO: These tests can be optmised, with x%8 == 0 +; fold (bswap(srl (bswap c), x)) -> (shl c, x) +; fold (bswap(shl (bswap c), x)) -> (srl c, x) + +declare i16 @llvm.bswap.i16(i16) +declare i32 @llvm.bswap.i32(i32) +declare i64 @llvm.bswap.i64(i64) + +define i16 @test_bswap_srli_7_bswap_i16(i16 %a) nounwind { +; RV32ZB-LABEL: test_bswap_srli_7_bswap_i16: +; RV32ZB: # %bb.0: +; RV32ZB-NEXT: rev8 a0, a0 +; RV32ZB-NEXT: srli a0, a0, 23 +; RV32ZB-NEXT: rev8 a0, a0 +; RV32ZB-NEXT: srli a0, a0, 16 +; RV32ZB-NEXT: ret +; +; RV64ZB-LABEL: test_bswap_srli_7_bswap_i16: +; RV64ZB: # %bb.0: +; RV64ZB-NEXT: rev8 a0, a0 +; RV64ZB-NEXT: srli a0, a0, 55 +; RV64ZB-NEXT: rev8 a0, a0 +; RV64ZB-NEXT: srli a0, a0, 48 +; RV64ZB-NEXT: ret + %1 = call i16 @llvm.bswap.i16(i16 %a) + %2 = lshr i16 %1, 7 + %3 = call i16 @llvm.bswap.i16(i16 %2) + ret i16 %3 +} + +define i16 @test_bswap_srli_8_bswap_i16(i16 %a) nounwind { +; RV32ZB-LABEL: test_bswap_srli_8_bswap_i16: +; RV32ZB: # %bb.0: +; RV32ZB-NEXT: andi a0, a0, 255 +; RV32ZB-NEXT: rev8 a0, a0 +; RV32ZB-NEXT: srli a0, a0, 16 +; RV32ZB-NEXT: ret +; +; RV64ZB-LABEL: test_bswap_srli_8_bswap_i16: +; RV64ZB: # %bb.0: +; RV64ZB-NEXT: andi a0, a0, 255 +; RV64ZB-NEXT: rev8 a0, a0 +; RV64ZB-NEXT: srli a0, a0, 48 +; RV64ZB-NEXT: ret + %1 = call i16 @llvm.bswap.i16(i16 %a) + %2 = lshr i16 %1, 8 + %3 = call i16 @llvm.bswap.i16(i16 %2) + ret i16 %3 +} + +define i32 @test_bswap_srli_8_bswap_i32(i32 %a) nounwind { +; RV32ZB-LABEL: test_bswap_srli_8_bswap_i32: +; RV32ZB: # %bb.0: +; RV32ZB-NEXT: rev8 a0, a0 +; RV32ZB-NEXT: srli a0, a0, 8 +; RV32ZB-NEXT: rev8 a0, a0 +; RV32ZB-NEXT: ret +; +; RV64ZB-LABEL: test_bswap_srli_8_bswap_i32: +; RV64ZB: # %bb.0: +; RV64ZB-NEXT: rev8 a0, a0 +; RV64ZB-NEXT: srli a0, a0, 40 +; RV64ZB-NEXT: rev8 a0, a0 +; RV64ZB-NEXT: srli a0, a0, 32 +; RV64ZB-NEXT: ret + %1 = call i32 @llvm.bswap.i32(i32 %a) + %2 = lshr i32 %1, 8 + %3 = call i32 @llvm.bswap.i32(i32 %2) + ret i32 %3 +} + +define i32 @test_bswap_srli_16_bswap_i32(i32 %a) nounwind { +; RV32ZB-LABEL: test_bswap_srli_16_bswap_i32: +; RV32ZB: # %bb.0: +; RV32ZB-NEXT: rev8 a0, a0 +; RV32ZB-NEXT: srli a0, a0, 16 +; RV32ZB-NEXT: rev8 a0, a0 +; RV32ZB-NEXT: ret +; +; RV64ZB-LABEL: test_bswap_srli_16_bswap_i32: +; RV64ZB: # %bb.0: +; RV64ZB-NEXT: rev8 a0, a0 +; RV64ZB-NEXT: srli a0, a0, 48 +; RV64ZB-NEXT: rev8 a0, a0 +; RV64ZB-NEXT: srli a0, a0, 32 +; RV64ZB-NEXT: ret + %1 = call i32 @llvm.bswap.i32(i32 %a) + %2 = lshr i32 %1, 16 + %3 = call i32 @llvm.bswap.i32(i32 %2) + ret i32 %3 +} + +define i32 @test_bswap_srli_24_bswap_i32(i32 %a) nounwind { +; RV32ZB-LABEL: test_bswap_srli_24_bswap_i32: +; RV32ZB: # %bb.0: +; RV32ZB-NEXT: andi a0, a0, 255 +; RV32ZB-NEXT: rev8 a0, a0 +; RV32ZB-NEXT: ret +; +; RV64ZB-LABEL: test_bswap_srli_24_bswap_i32: +; RV64ZB: # %bb.0: +; RV64ZB-NEXT: andi a0, a0, 255 +; RV64ZB-NEXT: rev8 a0, a0 +; RV64ZB-NEXT: srli a0, a0, 32 +; RV64ZB-NEXT: ret + %1 = call i32 @llvm.bswap.i32(i32 %a) + %2 = lshr i32 %1, 24 + %3 = call i32 @llvm.bswap.i32(i32 %2) + ret i32 %3 +} + +define i64 @test_bswap_srli_48_bswap_i64(i64 %a) nounwind { +; RV32ZB-LABEL: test_bswap_srli_48_bswap_i64: +; RV32ZB: # %bb.0: +; RV32ZB-NEXT: rev8 a0, a0 +; RV32ZB-NEXT: srli a0, a0, 16 +; RV32ZB-NEXT: rev8 a1, a0 +; RV32ZB-NEXT: li a0, 0 +; RV32ZB-NEXT: ret +; +; RV64ZB-LABEL: test_bswap_srli_48_bswap_i64: +; RV64ZB: # %bb.0: +; RV64ZB-NEXT: rev8 a0, a0 +; RV64ZB-NEXT: srli a0, a0, 48 +; RV64ZB-NEXT: rev8 a0, a0 +; RV64ZB-NEXT: ret + %1 = call i64 @llvm.bswap.i64(i64 %a) + %2 = lshr i64 %1, 48 + %3 = call i64 @llvm.bswap.i64(i64 %2) + ret i64 %3 +} + +define i16 @test_bswap_shli_7_bswap_i16(i16 %a) nounwind { +; RV32ZB-LABEL: test_bswap_shli_7_bswap_i16: +; RV32ZB: # %bb.0: +; RV32ZB-NEXT: rev8 a0, a0 +; RV32ZB-NEXT: srli a0, a0, 9 +; RV32ZB-NEXT: andi a0, a0, -128 +; RV32ZB-NEXT: rev8 a0, a0 +; RV32ZB-NEXT: srli a0, a0, 16 +; RV32ZB-NEXT: ret +; +; RV64ZB-LABEL: test_bswap_shli_7_bswap_i16: +; RV64ZB: # %bb.0: +; RV64ZB-NEXT: rev8 a0, a0 +; RV64ZB-NEXT: srli a0, a0, 41 +; RV64ZB-NEXT: andi a0, a0, -128 +; RV64ZB-NEXT: rev8 a0, a0 +; RV64ZB-NEXT: srli a0, a0, 48 +; RV64ZB-NEXT: ret + %1 = call i16 @llvm.bswap.i16(i16 %a) + %2 = shl i16 %1, 7 + %3 = call i16 @llvm.bswap.i16(i16 %2) + ret i16 %3 +} + +define i16 @test_bswap_shli_8_bswap_i16(i16 %a) nounwind { +; RV32ZB-LABEL: test_bswap_shli_8_bswap_i16: +; RV32ZB: # %bb.0: +; RV32ZB-NEXT: andi a0, a0, -256 +; RV32ZB-NEXT: rev8 a0, a0 +; RV32ZB-NEXT: srli a0, a0, 16 +; RV32ZB-NEXT: ret +; +; RV64ZB-LABEL: test_bswap_shli_8_bswap_i16: +; RV64ZB: # %bb.0: +; RV64ZB-NEXT: andi a0, a0, -256 +; RV64ZB-NEXT: rev8 a0, a0 +; RV64ZB-NEXT: srli a0, a0, 48 +; RV64ZB-NEXT: ret + %1 = call i16 @llvm.bswap.i16(i16 %a) + %2 = shl i16 %1, 8 + %3 = call i16 @llvm.bswap.i16(i16 %2) + ret i16 %3 +} + +define i32 @test_bswap_shli_8_bswap_i32(i32 %a) nounwind { +; RV32ZB-LABEL: test_bswap_shli_8_bswap_i32: +; RV32ZB: # %bb.0: +; RV32ZB-NEXT: rev8 a0, a0 +; RV32ZB-NEXT: slli a0, a0, 8 +; RV32ZB-NEXT: rev8 a0, a0 +; RV32ZB-NEXT: ret +; +; RV64ZB-LABEL: test_bswap_shli_8_bswap_i32: +; RV64ZB: # %bb.0: +; RV64ZB-NEXT: rev8 a0, a0 +; RV64ZB-NEXT: srli a0, a0, 24 +; RV64ZB-NEXT: andi a0, a0, -256 +; RV64ZB-NEXT: rev8 a0, a0 +; RV64ZB-NEXT: srli a0, a0, 32 +; RV64ZB-NEXT: ret + %1 = call i32 @llvm.bswap.i32(i32 %a) + %2 = shl i32 %1, 8 + %3 = call i32 @llvm.bswap.i32(i32 %2) + ret i32 %3 +} + +define i32 @test_bswap_shli_16_bswap_i32(i32 %a) nounwind { +; RV32ZB-LABEL: test_bswap_shli_16_bswap_i32: +; RV32ZB: # %bb.0: +; RV32ZB-NEXT: rev8 a0, a0 +; RV32ZB-NEXT: slli a0, a0, 16 +; RV32ZB-NEXT: rev8 a0, a0 +; RV32ZB-NEXT: ret +; +; RV64ZB-LABEL: test_bswap_shli_16_bswap_i32: +; RV64ZB: # %bb.0: +; RV64ZB-NEXT: rev8 a0, a0 +; RV64ZB-NEXT: srli a0, a0, 32 +; RV64ZB-NEXT: slli a0, a0, 16 +; RV64ZB-NEXT: rev8 a0, a0 +; RV64ZB-NEXT: srli a0, a0, 32 +; RV64ZB-NEXT: ret + %1 = call i32 @llvm.bswap.i32(i32 %a) + %2 = shl i32 %1, 16 + %3 = call i32 @llvm.bswap.i32(i32 %2) + ret i32 %3 +} + +define i32 @test_bswap_shli_24_bswap_i32(i32 %a) nounwind { +; RV32ZB-LABEL: test_bswap_shli_24_bswap_i32: +; RV32ZB: # %bb.0: +; RV32ZB-NEXT: lui a1, 1044480 +; RV32ZB-NEXT: and a0, a0, a1 +; RV32ZB-NEXT: rev8 a0, a0 +; RV32ZB-NEXT: ret +; +; RV64ZB-LABEL: test_bswap_shli_24_bswap_i32: +; RV64ZB: # %bb.0: +; RV64ZB-NEXT: lui a1, 1044480 +; RV64ZB-NEXT: and a0, a0, a1 +; RV64ZB-NEXT: rev8 a0, a0 +; RV64ZB-NEXT: srli a0, a0, 32 +; RV64ZB-NEXT: ret + %1 = call i32 @llvm.bswap.i32(i32 %a) + %2 = shl i32 %1, 24 + %3 = call i32 @llvm.bswap.i32(i32 %2) + ret i32 %3 +} + +define i64 @test_bswap_shli_48_bswap_i64(i64 %a) nounwind { +; RV32ZB-LABEL: test_bswap_shli_48_bswap_i64: +; RV32ZB: # %bb.0: +; RV32ZB-NEXT: rev8 a0, a1 +; RV32ZB-NEXT: slli a0, a0, 16 +; RV32ZB-NEXT: rev8 a0, a0 +; RV32ZB-NEXT: li a1, 0 +; RV32ZB-NEXT: ret +; +; RV64ZB-LABEL: test_bswap_shli_48_bswap_i64: +; RV64ZB: # %bb.0: +; RV64ZB-NEXT: rev8 a0, a0 +; RV64ZB-NEXT: slli a0, a0, 48 +; RV64ZB-NEXT: rev8 a0, a0 +; RV64ZB-NEXT: ret + %1 = call i64 @llvm.bswap.i64(i64 %a) + %2 = shl i64 %1, 48 + %3 = call i64 @llvm.bswap.i64(i64 %2) + ret i64 %3 +} diff --git a/llvm/test/CodeGen/X86/combine-bswap.ll b/llvm/test/CodeGen/X86/combine-bswap.ll --- a/llvm/test/CodeGen/X86/combine-bswap.ll +++ b/llvm/test/CodeGen/X86/combine-bswap.ll @@ -5,6 +5,7 @@ ; These tests just check that the plumbing is in place for @llvm.bswap. The ; actual output is massive at the moment as llvm.bswap is not yet legal. +declare i16 @llvm.bswap.i16(i16) readnone declare i32 @llvm.bswap.i32(i32) readnone declare i64 @llvm.bswap.i64(i64) readnone declare i32 @llvm.bswap.v4i32(i32) readnone @@ -38,6 +39,146 @@ ret i32 %c } +; TODO: fold (bswap(srl (bswap c), x)) -> (shl c, x) +define i16 @test_bswap_srli_8_bswap_i16(i16 %a) nounwind { +; X86-LABEL: test_bswap_srli_8_bswap_i16: +; X86: # %bb.0: +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: rolw $8, %ax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl +; +; X64-LABEL: test_bswap_srli_8_bswap_i16: +; X64: # %bb.0: +; X64-NEXT: movzbl %dil, %eax +; X64-NEXT: rolw $8, %ax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq + %1 = call i16 @llvm.bswap.i16(i16 %a) + %2 = lshr i16 %1, 8 + %3 = call i16 @llvm.bswap.i16(i16 %2) + ret i16 %3 +} + +define i32 @test_bswap_srli_8_bswap_i32(i32 %a) nounwind { +; X86-LABEL: test_bswap_srli_8_bswap_i32: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: bswapl %eax +; X86-NEXT: shrl $8, %eax +; X86-NEXT: bswapl %eax +; X86-NEXT: retl +; +; X64-LABEL: test_bswap_srli_8_bswap_i32: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: bswapl %eax +; X64-NEXT: shrl $8, %eax +; X64-NEXT: bswapl %eax +; X64-NEXT: retq + %1 = call i32 @llvm.bswap.i32(i32 %a) + %2 = lshr i32 %1, 8 + %3 = call i32 @llvm.bswap.i32(i32 %2) + ret i32 %3 +} + +define i64 @test_bswap_srli_16_bswap_i64(i64 %a) nounwind { +; X86-LABEL: test_bswap_srli_16_bswap_i64: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: bswapl %edx +; X86-NEXT: bswapl %eax +; X86-NEXT: shrdl $16, %eax, %edx +; X86-NEXT: shrl $16, %eax +; X86-NEXT: bswapl %edx +; X86-NEXT: bswapl %eax +; X86-NEXT: retl +; +; X64-LABEL: test_bswap_srli_16_bswap_i64: +; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: bswapq %rax +; X64-NEXT: shrq $16, %rax +; X64-NEXT: bswapq %rax +; X64-NEXT: retq + %1 = call i64 @llvm.bswap.i64(i64 %a) + %2 = lshr i64 %1, 16 + %3 = call i64 @llvm.bswap.i64(i64 %2) + ret i64 %3 +} + +; TODO: fold (bswap(shl (bswap c), x)) -> (srl c, x) +define i16 @test_bswap_shli_8_bswap_i16(i16 %a) nounwind { +; X86-LABEL: test_bswap_shli_8_bswap_i16: +; X86: # %bb.0: +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shll $8, %eax +; X86-NEXT: rolw $8, %ax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl +; +; X64-LABEL: test_bswap_shli_8_bswap_i16: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: andl $65280, %eax # imm = 0xFF00 +; X64-NEXT: rolw $8, %ax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq + %1 = call i16 @llvm.bswap.i16(i16 %a) + %2 = shl i16 %1, 8 + %3 = call i16 @llvm.bswap.i16(i16 %2) + ret i16 %3 +} + +define i32 @test_bswap_shli_8_bswap_i32(i32 %a) nounwind { +; X86-LABEL: test_bswap_shli_8_bswap_i32: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: bswapl %eax +; X86-NEXT: shll $8, %eax +; X86-NEXT: bswapl %eax +; X86-NEXT: retl +; +; X64-LABEL: test_bswap_shli_8_bswap_i32: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: bswapl %eax +; X64-NEXT: shll $8, %eax +; X64-NEXT: bswapl %eax +; X64-NEXT: retq + %1 = call i32 @llvm.bswap.i32(i32 %a) + %2 = shl i32 %1, 8 + %3 = call i32 @llvm.bswap.i32(i32 %2) + ret i32 %3 +} + +define i64 @test_bswap_shli_16_bswap_i64(i64 %a) nounwind { +; X86-LABEL: test_bswap_shli_16_bswap_i64: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: bswapl %ecx +; X86-NEXT: bswapl %eax +; X86-NEXT: shldl $16, %ecx, %eax +; X86-NEXT: bswapl %eax +; X86-NEXT: rolw $8, %cx +; X86-NEXT: movzwl %cx, %edx +; X86-NEXT: retl +; +; X64-LABEL: test_bswap_shli_16_bswap_i64: +; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: bswapq %rax +; X64-NEXT: shlq $16, %rax +; X64-NEXT: bswapq %rax +; X64-NEXT: retq + %1 = call i64 @llvm.bswap.i64(i64 %a) + %2 = shl i64 %1, 16 + %3 = call i64 @llvm.bswap.i64(i64 %2) + ret i64 %3 +} + define i32 @test_demandedbits_bswap(i32 %a0) nounwind { ; X86-LABEL: test_demandedbits_bswap: ; X86: # %bb.0: