diff --git a/llvm/test/CodeGen/RISCV/rvp/bitreverse.ll b/llvm/test/CodeGen/RISCV/rvp/bitreverse.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvp/bitreverse.ll @@ -0,0 +1,317 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-p -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=RV32 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-p -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=RV64 + +declare i8 @llvm.bitreverse.i8(i8) +declare i16 @llvm.bitreverse.i16(i16) +declare i32 @llvm.bitreverse.i32(i32) +declare i64 @llvm.bitreverse.i64(i64) + +define i8 @bitreverse8(i8 %a) nounwind { +; RV32-LABEL: bitreverse8: +; RV32: # %bb.0: +; RV32-NEXT: andi a1, a0, 15 +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: andi a0, a0, 240 +; RV32-NEXT: srli a0, a0, 4 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: andi a1, a0, 51 +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: andi a0, a0, 204 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: andi a1, a0, 85 +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: andi a0, a0, 170 +; RV32-NEXT: srli a0, a0, 1 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: ret +; +; RV64-LABEL: bitreverse8: +; RV64: # %bb.0: +; RV64-NEXT: andi a1, a0, 15 +; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: andi a0, a0, 240 +; RV64-NEXT: srli a0, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: andi a1, a0, 51 +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: andi a0, a0, 204 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: andi a1, a0, 85 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: andi a0, a0, 170 +; RV64-NEXT: srli a0, a0, 1 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: ret + %ret = call i8 @llvm.bitreverse.i8(i8 %a) + ret i8 %ret +} + +define i16 @bitreverse16(i16 %a) nounwind { +; RV32-LABEL: bitreverse16: +; RV32: # %bb.0: +; RV32-NEXT: swap8 a0, a0 +; RV32-NEXT: swap16 a0, a0 +; RV32-NEXT: srli a1, a0, 12 +; RV32-NEXT: lui a2, 15 +; RV32-NEXT: addi a2, a2, 240 +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: srli a0, a0, 20 +; RV32-NEXT: andi a0, a0, -241 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: lui a1, 3 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: and a1, a0, a1 +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: lui a2, 13 +; RV32-NEXT: addi a2, a2, -820 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: lui a1, 5 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: and a1, a0, a1 +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: lui a2, 11 +; RV32-NEXT: addi a2, a2, -1366 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: srli a0, a0, 1 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: ret +; +; RV64-LABEL: bitreverse16: +; RV64: # %bb.0: +; RV64-NEXT: swap8 a0, a0 +; RV64-NEXT: swap16 a0, a0 +; RV64-NEXT: pkbt32 a0, a0, a0 +; RV64-NEXT: srli a1, a0, 44 +; RV64-NEXT: lui a2, 15 +; RV64-NEXT: addiw a2, a2, 240 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: srli a0, a0, 52 +; RV64-NEXT: andi a0, a0, -241 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: lui a1, 3 +; RV64-NEXT: addiw a1, a1, 819 +; RV64-NEXT: and a1, a0, a1 +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: lui a2, 13 +; RV64-NEXT: addiw a2, a2, -820 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: lui a1, 5 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: and a1, a0, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: lui a2, 11 +; RV64-NEXT: addiw a2, a2, -1366 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: srli a0, a0, 1 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: ret + %ret = call i16 @llvm.bitreverse.i16(i16 %a) + ret i16 %ret +} + +define i32 @bitreverse32(i32 %a) nounwind { +; RV32-LABEL: bitreverse32: +; RV32: # %bb.0: +; RV32-NEXT: swap8 a0, a0 +; RV32-NEXT: swap16 a0, a0 +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: and a1, a0, a1 +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: lui a2, 986895 +; RV32-NEXT: addi a2, a2, 240 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: srli a0, a0, 4 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: and a1, a0, a1 +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: lui a2, 838861 +; RV32-NEXT: addi a2, a2, -820 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: and a1, a0, a1 +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: lui a2, 699051 +; RV32-NEXT: addi a2, a2, -1366 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: srli a0, a0, 1 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: ret +; +; RV64-LABEL: bitreverse32: +; RV64: # %bb.0: +; RV64-NEXT: swap8 a0, a0 +; RV64-NEXT: swap16 a0, a0 +; RV64-NEXT: pkbt32 a0, a0, a0 +; RV64-NEXT: srli a1, a0, 28 +; RV64-NEXT: lui a2, 241 +; RV64-NEXT: addiw a2, a2, -241 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi a2, a2, 240 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: srli a0, a0, 36 +; RV64-NEXT: lui a2, 61681 +; RV64-NEXT: addiw a2, a2, -241 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: lui a1, 209715 +; RV64-NEXT: addiw a1, a1, 819 +; RV64-NEXT: and a1, a0, a1 +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: lui a2, 838861 +; RV64-NEXT: addiw a2, a2, -820 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: and a1, a0, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: lui a2, 699051 +; RV64-NEXT: addiw a2, a2, -1366 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: srli a0, a0, 1 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: ret + %ret = call i32 @llvm.bitreverse.i32(i32 %a) + ret i32 %ret +} + +define i64 @bitreverse64(i64 %a) nounwind { +; RV32-LABEL: bitreverse64: +; RV32: # %bb.0: +; RV32-NEXT: swap8 a1, a1 +; RV32-NEXT: swap16 a1, a1 +; RV32-NEXT: lui a2, 61681 +; RV32-NEXT: addi a6, a2, -241 +; RV32-NEXT: and a2, a1, a6 +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: lui a4, 986895 +; RV32-NEXT: addi t0, a4, 240 +; RV32-NEXT: and a1, a1, t0 +; RV32-NEXT: srli a1, a1, 4 +; RV32-NEXT: or a1, a1, a2 +; RV32-NEXT: lui a2, 209715 +; RV32-NEXT: addi t1, a2, 819 +; RV32-NEXT: and a2, a1, t1 +; RV32-NEXT: slli a2, a2, 2 +; RV32-NEXT: lui a3, 838861 +; RV32-NEXT: addi a3, a3, -820 +; RV32-NEXT: and a1, a1, a3 +; RV32-NEXT: srli a1, a1, 2 +; RV32-NEXT: or a1, a1, a2 +; RV32-NEXT: lui a2, 349525 +; RV32-NEXT: addi a2, a2, 1365 +; RV32-NEXT: and a4, a1, a2 +; RV32-NEXT: slli a4, a4, 1 +; RV32-NEXT: lui a5, 699051 +; RV32-NEXT: addi a5, a5, -1366 +; RV32-NEXT: and a1, a1, a5 +; RV32-NEXT: srli a1, a1, 1 +; RV32-NEXT: or a7, a1, a4 +; RV32-NEXT: swap8 a0, a0 +; RV32-NEXT: swap16 a0, a0 +; RV32-NEXT: and a1, a0, a6 +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: and a0, a0, t0 +; RV32-NEXT: srli a0, a0, 4 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: and a1, a0, t1 +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: and a0, a0, a3 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: and a1, a0, a2 +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: and a0, a0, a5 +; RV32-NEXT: srli a0, a0, 1 +; RV32-NEXT: or a1, a0, a1 +; RV32-NEXT: mv a0, a7 +; RV32-NEXT: ret +; +; RV64-LABEL: bitreverse64: +; RV64: # %bb.0: +; RV64-NEXT: swap8 a0, a0 +; RV64-NEXT: swap16 a0, a0 +; RV64-NEXT: pkbt32 a0, a0, a0 +; RV64-NEXT: lui a1, 3855 +; RV64-NEXT: addiw a1, a1, 241 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, -241 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 241 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, -241 +; RV64-NEXT: and a1, a0, a1 +; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: lui a2, 1044721 +; RV64-NEXT: addiw a2, a2, -241 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi a2, a2, 241 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi a2, a2, -241 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi a2, a2, 240 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: srli a0, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: lui a1, 13107 +; RV64-NEXT: addiw a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 819 +; RV64-NEXT: and a1, a0, a1 +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: lui a2, 1035469 +; RV64-NEXT: addiw a2, a2, -819 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi a2, a2, -819 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi a2, a2, -819 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi a2, a2, -820 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: lui a1, 21845 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 1365 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 1365 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 1365 +; RV64-NEXT: and a1, a0, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: lui a2, 1026731 +; RV64-NEXT: addiw a2, a2, -1365 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi a2, a2, -1365 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi a2, a2, -1365 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi a2, a2, -1366 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: srli a0, a0, 1 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: ret + %ret = call i64 @llvm.bitreverse.i64(i64 %a) + ret i64 %ret +}