diff --git a/llvm/test/CodeGen/RISCV/rvp/clo.ll b/llvm/test/CodeGen/RISCV/rvp/clo.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvp/clo.ll @@ -0,0 +1,2248 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-p -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=RV32 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-p -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=RV64 + +define i8 @clo8(i8 %x) { +; RV32-LABEL: clo8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: andi a1, a0, 255 +; RV32-NEXT: addi a2, zero, 255 +; RV32-NEXT: beq a1, a2, .LBB0_2 +; RV32-NEXT: # %bb.1: # %cond.false +; RV32-NEXT: not a0, a0 +; RV32-NEXT: andi a0, a0, 255 +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: not a0, a0 +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: lui a2, 349525 +; RV32-NEXT: addi a2, a2, 1365 +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: and a2, a0, a1 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: call __mulsi3@plt +; RV32-NEXT: srli a0, a0, 24 +; RV32-NEXT: addi a0, a0, -24 +; RV32-NEXT: j .LBB0_3 +; RV32-NEXT: .LBB0_2: +; RV32-NEXT: addi a0, zero, 8 +; RV32-NEXT: .LBB0_3: # %cond.end +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: clo8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: andi a1, a0, 255 +; RV64-NEXT: addi a2, zero, 255 +; RV64-NEXT: beq a1, a2, .LBB0_2 +; RV64-NEXT: # %bb.1: # %cond.false +; RV64-NEXT: not a0, a0 +; RV64-NEXT: andi a0, a0, 255 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: lui a2, 21845 +; RV64-NEXT: addiw a2, a2, 1365 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi a2, a2, 1365 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi a2, a2, 1365 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi a2, a2, 1365 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: lui a1, 13107 +; RV64-NEXT: addiw a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 819 +; RV64-NEXT: and a2, a0, a1 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: add a0, a2, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: lui a1, 3855 +; RV64-NEXT: addiw a1, a1, 241 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, -241 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 241 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, -241 +; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: lui a1, 4112 +; RV64-NEXT: addiw a1, a1, 257 +; RV64-NEXT: slli a1, a1, 16 +; RV64-NEXT: addi a1, a1, 257 +; RV64-NEXT: slli a1, a1, 16 +; RV64-NEXT: addi a1, a1, 257 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -56 +; RV64-NEXT: j .LBB0_3 +; RV64-NEXT: .LBB0_2: +; RV64-NEXT: addi a0, zero, 8 +; RV64-NEXT: .LBB0_3: # %cond.end +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %neg = xor i8 %x, -1 + %res = call i8 @llvm.ctlz.i8(i8 %neg, i1 false) + ret i8 %res +} + +define i16 @clo16(i16 %x) { +; RV32-LABEL: clo16: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: lui a1, 16 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a2, a0, a1 +; RV32-NEXT: beq a2, a1, .LBB1_2 +; RV32-NEXT: # %bb.1: # %cond.false +; RV32-NEXT: not a0, a0 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: not a0, a0 +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: lui a2, 349525 +; RV32-NEXT: addi a2, a2, 1365 +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: and a2, a0, a1 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: call __mulsi3@plt +; RV32-NEXT: srli a0, a0, 24 +; RV32-NEXT: addi a0, a0, -16 +; RV32-NEXT: j .LBB1_3 +; RV32-NEXT: .LBB1_2: +; RV32-NEXT: addi a0, zero, 16 +; RV32-NEXT: .LBB1_3: # %cond.end +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: clo16: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: lui a1, 16 +; RV64-NEXT: addiw a1, a1, -1 +; RV64-NEXT: and a2, a0, a1 +; RV64-NEXT: beq a2, a1, .LBB1_2 +; RV64-NEXT: # %bb.1: # %cond.false +; RV64-NEXT: not a0, a0 +; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: lui a2, 21845 +; RV64-NEXT: addiw a2, a2, 1365 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi a2, a2, 1365 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi a2, a2, 1365 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi a2, a2, 1365 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: lui a1, 13107 +; RV64-NEXT: addiw a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 819 +; RV64-NEXT: and a2, a0, a1 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: add a0, a2, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: lui a1, 3855 +; RV64-NEXT: addiw a1, a1, 241 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, -241 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 241 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, -241 +; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: lui a1, 4112 +; RV64-NEXT: addiw a1, a1, 257 +; RV64-NEXT: slli a1, a1, 16 +; RV64-NEXT: addi a1, a1, 257 +; RV64-NEXT: slli a1, a1, 16 +; RV64-NEXT: addi a1, a1, 257 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -48 +; RV64-NEXT: j .LBB1_3 +; RV64-NEXT: .LBB1_2: +; RV64-NEXT: addi a0, zero, 16 +; RV64-NEXT: .LBB1_3: # %cond.end +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %neg = xor i16 %x, -1 + %res = call i16 @llvm.ctlz.i16(i16 %neg, i1 false) + ret i16 %res +} + +define i32 @clo32(i32 %x) { +; RV32-LABEL: clo32: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: addi a1, zero, -1 +; RV32-NEXT: beq a0, a1, .LBB2_2 +; RV32-NEXT: # %bb.1: # %cond.false +; RV32-NEXT: not a0, a0 +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: not a0, a0 +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: lui a2, 349525 +; RV32-NEXT: addi a2, a2, 1365 +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: and a2, a0, a1 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: call __mulsi3@plt +; RV32-NEXT: srli a0, a0, 24 +; RV32-NEXT: j .LBB2_3 +; RV32-NEXT: .LBB2_2: +; RV32-NEXT: addi a0, zero, 32 +; RV32-NEXT: .LBB2_3: # %cond.end +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: clo32: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: sext.w a1, a0 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: beq a1, a2, .LBB2_2 +; RV64-NEXT: # %bb.1: # %cond.false +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srliw a1, a0, 1 +; RV64-NEXT: slli a0, a0, 32 +; RV64-NEXT: srli a0, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: lui a2, 21845 +; RV64-NEXT: addiw a2, a2, 1365 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi a2, a2, 1365 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi a2, a2, 1365 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi a2, a2, 1365 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: lui a1, 13107 +; RV64-NEXT: addiw a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 819 +; RV64-NEXT: and a2, a0, a1 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: add a0, a2, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: lui a1, 3855 +; RV64-NEXT: addiw a1, a1, 241 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, -241 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 241 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, -241 +; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: lui a1, 4112 +; RV64-NEXT: addiw a1, a1, 257 +; RV64-NEXT: slli a1, a1, 16 +; RV64-NEXT: addi a1, a1, 257 +; RV64-NEXT: slli a1, a1, 16 +; RV64-NEXT: addi a1, a1, 257 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -32 +; RV64-NEXT: j .LBB2_3 +; RV64-NEXT: .LBB2_2: +; RV64-NEXT: addi a0, zero, 32 +; RV64-NEXT: .LBB2_3: # %cond.end +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %neg = xor i32 %x, -1 + %res = call i32 @llvm.ctlz.i32(i32 %neg, i1 false) + ret i32 %res +} + +define i32 @clov4i8(i32 %x, i1) { +; RV32-LABEL: clov4i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s4, 8(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: .cfi_offset s0, -8 +; RV32-NEXT: .cfi_offset s1, -12 +; RV32-NEXT: .cfi_offset s2, -16 +; RV32-NEXT: .cfi_offset s3, -20 +; RV32-NEXT: .cfi_offset s4, -24 +; RV32-NEXT: addi a1, zero, -1 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: sw a0, 4(sp) +; RV32-NEXT: lbu a0, 4(sp) +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: not a0, a0 +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: lui a2, 349525 +; RV32-NEXT: addi s2, a2, 1365 +; RV32-NEXT: and a1, a1, s2 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi s0, a1, 819 +; RV32-NEXT: and a1, a0, s0 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: and a0, a0, s0 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi s4, a1, -241 +; RV32-NEXT: and a0, a0, s4 +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi s3, a1, 257 +; RV32-NEXT: mv a1, s3 +; RV32-NEXT: call __mulsi3@plt +; RV32-NEXT: mv s1, zero +; RV32-NEXT: lbu a1, 5(sp) +; RV32-NEXT: srli a0, a0, 24 +; RV32-NEXT: addi a0, a0, -24 +; RV32-NEXT: insb s1, a0, 0 +; RV32-NEXT: srli a0, a1, 1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: srli a1, a0, 2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: not a0, a0 +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: and a1, a1, s2 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: and a1, a0, s0 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: and a0, a0, s0 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: and a0, a0, s4 +; RV32-NEXT: mv a1, s3 +; RV32-NEXT: call __mulsi3@plt +; RV32-NEXT: lbu a1, 6(sp) +; RV32-NEXT: srli a0, a0, 24 +; RV32-NEXT: addi a0, a0, -24 +; RV32-NEXT: insb s1, a0, 1 +; RV32-NEXT: srli a0, a1, 1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: srli a1, a0, 2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: not a0, a0 +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: and a1, a1, s2 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: and a1, a0, s0 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: and a0, a0, s0 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: and a0, a0, s4 +; RV32-NEXT: mv a1, s3 +; RV32-NEXT: call __mulsi3@plt +; RV32-NEXT: lbu a1, 7(sp) +; RV32-NEXT: srli a0, a0, 24 +; RV32-NEXT: addi a0, a0, -24 +; RV32-NEXT: insb s1, a0, 2 +; RV32-NEXT: srli a0, a1, 1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: srli a1, a0, 2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: not a0, a0 +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: and a1, a1, s2 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: and a1, a0, s0 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: and a0, a0, s0 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: and a0, a0, s4 +; RV32-NEXT: mv a1, s3 +; RV32-NEXT: call __mulsi3@plt +; RV32-NEXT: srli a0, a0, 24 +; RV32-NEXT: addi a0, a0, -24 +; RV32-NEXT: insb s1, a0, 3 +; RV32-NEXT: mv a0, s1 +; RV32-NEXT: lw s4, 8(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: ret +; +; RV64-LABEL: clov4i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -64 +; RV64-NEXT: .cfi_def_cfa_offset 64 +; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s1, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s2, 32(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s3, 24(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s4, 16(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: .cfi_offset s0, -16 +; RV64-NEXT: .cfi_offset s1, -24 +; RV64-NEXT: .cfi_offset s2, -32 +; RV64-NEXT: .cfi_offset s3, -40 +; RV64-NEXT: .cfi_offset s4, -48 +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: xor a0, a0, a1 +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: lbu a0, 8(sp) +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: lui a2, 21845 +; RV64-NEXT: addiw a2, a2, 1365 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi a2, a2, 1365 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi a2, a2, 1365 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi s2, a2, 1365 +; RV64-NEXT: and a1, a1, s2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: lui a1, 13107 +; RV64-NEXT: addiw a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi s0, a1, 819 +; RV64-NEXT: and a1, a0, s0 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, s0 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: lui a1, 3855 +; RV64-NEXT: addiw a1, a1, 241 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, -241 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 241 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi s4, a1, -241 +; RV64-NEXT: and a0, a0, s4 +; RV64-NEXT: lui a1, 4112 +; RV64-NEXT: addiw a1, a1, 257 +; RV64-NEXT: slli a1, a1, 16 +; RV64-NEXT: addi a1, a1, 257 +; RV64-NEXT: slli a1, a1, 16 +; RV64-NEXT: addi s3, a1, 257 +; RV64-NEXT: mv a1, s3 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: mv s1, zero +; RV64-NEXT: lbu a1, 9(sp) +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -56 +; RV64-NEXT: insb s1, a0, 0 +; RV64-NEXT: srli a0, a1, 1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: and a1, a1, s2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: and a1, a0, s0 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, s0 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: and a0, a0, s4 +; RV64-NEXT: mv a1, s3 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: lbu a1, 10(sp) +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -56 +; RV64-NEXT: insb s1, a0, 1 +; RV64-NEXT: srli a0, a1, 1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: and a1, a1, s2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: and a1, a0, s0 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, s0 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: and a0, a0, s4 +; RV64-NEXT: mv a1, s3 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: lbu a1, 11(sp) +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -56 +; RV64-NEXT: insb s1, a0, 2 +; RV64-NEXT: srli a0, a1, 1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: and a1, a1, s2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: and a1, a0, s0 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, s0 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: and a0, a0, s4 +; RV64-NEXT: mv a1, s3 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: lbu a1, 12(sp) +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -56 +; RV64-NEXT: insb s1, a0, 3 +; RV64-NEXT: srli a0, a1, 1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: and a1, a1, s2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: and a1, a0, s0 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, s0 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: and a0, a0, s4 +; RV64-NEXT: mv a1, s3 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: lbu a1, 13(sp) +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -56 +; RV64-NEXT: insb s1, a0, 4 +; RV64-NEXT: srli a0, a1, 1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: and a1, a1, s2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: and a1, a0, s0 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, s0 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: and a0, a0, s4 +; RV64-NEXT: mv a1, s3 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: lbu a1, 14(sp) +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -56 +; RV64-NEXT: insb s1, a0, 5 +; RV64-NEXT: srli a0, a1, 1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: and a1, a1, s2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: and a1, a0, s0 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, s0 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: and a0, a0, s4 +; RV64-NEXT: mv a1, s3 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: lbu a1, 15(sp) +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -56 +; RV64-NEXT: insb s1, a0, 6 +; RV64-NEXT: srli a0, a1, 1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: and a1, a1, s2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: and a1, a0, s0 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, s0 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: and a0, a0, s4 +; RV64-NEXT: mv a1, s3 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -56 +; RV64-NEXT: insb s1, a0, 7 +; RV64-NEXT: mv a0, s1 +; RV64-NEXT: ld s4, 16(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s3, 24(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s2, 32(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s1, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 64 +; RV64-NEXT: ret + %a = bitcast i32 %x to <4 x i8> + %neg = xor <4 x i8> %a, + %b = call <4 x i8> @llvm.ctlz.v4i8(<4 x i8> %neg, i1 false) + %c = bitcast <4 x i8> %b to i32 + ret i32 %c +} + +define i32 @clov2i16(i32 %x, i1) { +; RV32-LABEL: clov2i16: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s4, 8(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: .cfi_offset s0, -8 +; RV32-NEXT: .cfi_offset s1, -12 +; RV32-NEXT: .cfi_offset s2, -16 +; RV32-NEXT: .cfi_offset s3, -20 +; RV32-NEXT: .cfi_offset s4, -24 +; RV32-NEXT: addi a1, zero, -1 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: sw a0, 4(sp) +; RV32-NEXT: lhu a0, 4(sp) +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: not a0, a0 +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: lui a2, 349525 +; RV32-NEXT: addi s2, a2, 1365 +; RV32-NEXT: and a1, a1, s2 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi s1, a1, 819 +; RV32-NEXT: and a1, a0, s1 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: and a0, a0, s1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi s3, a1, -241 +; RV32-NEXT: and a0, a0, s3 +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi s0, a1, 257 +; RV32-NEXT: mv a1, s0 +; RV32-NEXT: call __mulsi3@plt +; RV32-NEXT: lhu a1, 6(sp) +; RV32-NEXT: srli a0, a0, 24 +; RV32-NEXT: addi a0, a0, -16 +; RV32-NEXT: pktb16 s4, zero, a0 +; RV32-NEXT: srli a0, a1, 1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: srli a1, a0, 2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: not a0, a0 +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: and a1, a1, s2 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: and a1, a0, s1 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: and a0, a0, s1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: and a0, a0, s3 +; RV32-NEXT: mv a1, s0 +; RV32-NEXT: call __mulsi3@plt +; RV32-NEXT: srli a0, a0, 24 +; RV32-NEXT: addi a0, a0, -16 +; RV32-NEXT: pkbb16 a0, a0, s4 +; RV32-NEXT: lw s4, 8(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: ret +; +; RV64-LABEL: clov2i16: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -64 +; RV64-NEXT: .cfi_def_cfa_offset 64 +; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s1, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s2, 32(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s3, 24(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s4, 16(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s5, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: .cfi_offset s0, -16 +; RV64-NEXT: .cfi_offset s1, -24 +; RV64-NEXT: .cfi_offset s2, -32 +; RV64-NEXT: .cfi_offset s3, -40 +; RV64-NEXT: .cfi_offset s4, -48 +; RV64-NEXT: .cfi_offset s5, -56 +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: xor a0, a0, a1 +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lhu a0, 0(sp) +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: lui a2, 21845 +; RV64-NEXT: addiw a2, a2, 1365 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi a2, a2, 1365 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi a2, a2, 1365 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi s2, a2, 1365 +; RV64-NEXT: and a1, a1, s2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: lui a1, 13107 +; RV64-NEXT: addiw a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi s1, a1, 819 +; RV64-NEXT: and a1, a0, s1 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, s1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: lui a1, 3855 +; RV64-NEXT: addiw a1, a1, 241 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, -241 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 241 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi s3, a1, -241 +; RV64-NEXT: and a0, a0, s3 +; RV64-NEXT: lui a1, 4112 +; RV64-NEXT: addiw a1, a1, 257 +; RV64-NEXT: slli a1, a1, 16 +; RV64-NEXT: addi a1, a1, 257 +; RV64-NEXT: slli a1, a1, 16 +; RV64-NEXT: addi s4, a1, 257 +; RV64-NEXT: mv a1, s4 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: lhu a1, 2(sp) +; RV64-NEXT: addi a0, a0, -48 +; RV64-NEXT: pktb16 a0, zero, a0 +; RV64-NEXT: pktb32 s0, zero, a0 +; RV64-NEXT: srli a0, a1, 1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: and a1, a1, s2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: and a1, a0, s1 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, s1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: and a0, a0, s3 +; RV64-NEXT: mv a1, s4 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -48 +; RV64-NEXT: lhu a1, 4(sp) +; RV64-NEXT: pkbb16 a0, a0, s0 +; RV64-NEXT: pktb32 s0, s0, a0 +; RV64-NEXT: pkbt32 s5, s0, s0 +; RV64-NEXT: srli a0, a1, 1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: and a1, a1, s2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: and a1, a0, s1 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, s1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: and a0, a0, s3 +; RV64-NEXT: mv a1, s4 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -48 +; RV64-NEXT: lhu a1, 6(sp) +; RV64-NEXT: pktb16 a0, s5, a0 +; RV64-NEXT: pkbb32 s0, a0, s0 +; RV64-NEXT: pkbt32 s5, s0, s0 +; RV64-NEXT: srli a0, a1, 1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: and a1, a1, s2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: and a1, a0, s1 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, s1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: and a0, a0, s3 +; RV64-NEXT: mv a1, s4 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -48 +; RV64-NEXT: pkbb16 a0, a0, s5 +; RV64-NEXT: pkbb32 a0, a0, s0 +; RV64-NEXT: ld s5, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s4, 16(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s3, 24(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s2, 32(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s1, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 64 +; RV64-NEXT: ret + %a = bitcast i32 %x to <2 x i16> + %neg = xor <2 x i16> %a, + %b = call <2 x i16> @llvm.ctlz.v2i16(<2 x i16> %neg, i1 false) + %c = bitcast <2 x i16> %b to i32 + ret i32 %c +} + +define i64 @clov8i8(i64 %x, i1) { +; RV32-LABEL: clov8i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 40(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 36(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 32(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s3, 28(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s4, 24(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s5, 20(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: .cfi_offset s0, -8 +; RV32-NEXT: .cfi_offset s1, -12 +; RV32-NEXT: .cfi_offset s2, -16 +; RV32-NEXT: .cfi_offset s3, -20 +; RV32-NEXT: .cfi_offset s4, -24 +; RV32-NEXT: .cfi_offset s5, -28 +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: xor a0, a0, a2 +; RV32-NEXT: xor a1, a1, a2 +; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a0, 12(sp) +; RV32-NEXT: lbu a0, 16(sp) +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: not a0, a0 +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: lui a2, 349525 +; RV32-NEXT: addi s3, a2, 1365 +; RV32-NEXT: and a1, a1, s3 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi s0, a1, 819 +; RV32-NEXT: and a1, a0, s0 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: and a0, a0, s0 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi s5, a1, -241 +; RV32-NEXT: and a0, a0, s5 +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi s2, a1, 257 +; RV32-NEXT: mv a1, s2 +; RV32-NEXT: call __mulsi3@plt +; RV32-NEXT: mv s4, zero +; RV32-NEXT: lbu a1, 17(sp) +; RV32-NEXT: srli a0, a0, 24 +; RV32-NEXT: addi a0, a0, -24 +; RV32-NEXT: insb s4, a0, 0 +; RV32-NEXT: srli a0, a1, 1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: srli a1, a0, 2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: not a0, a0 +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: and a1, a1, s3 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: and a1, a0, s0 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: and a0, a0, s0 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: and a0, a0, s5 +; RV32-NEXT: mv a1, s2 +; RV32-NEXT: call __mulsi3@plt +; RV32-NEXT: lbu a1, 18(sp) +; RV32-NEXT: srli a0, a0, 24 +; RV32-NEXT: addi a0, a0, -24 +; RV32-NEXT: insb s4, a0, 1 +; RV32-NEXT: srli a0, a1, 1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: srli a1, a0, 2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: not a0, a0 +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: and a1, a1, s3 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: and a1, a0, s0 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: and a0, a0, s0 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: and a0, a0, s5 +; RV32-NEXT: mv a1, s2 +; RV32-NEXT: call __mulsi3@plt +; RV32-NEXT: lbu a1, 19(sp) +; RV32-NEXT: srli a0, a0, 24 +; RV32-NEXT: addi a0, a0, -24 +; RV32-NEXT: insb s4, a0, 2 +; RV32-NEXT: srli a0, a1, 1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: srli a1, a0, 2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: not a0, a0 +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: and a1, a1, s3 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: and a1, a0, s0 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: and a0, a0, s0 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: and a0, a0, s5 +; RV32-NEXT: mv a1, s2 +; RV32-NEXT: call __mulsi3@plt +; RV32-NEXT: lbu a1, 12(sp) +; RV32-NEXT: srli a0, a0, 24 +; RV32-NEXT: addi a0, a0, -24 +; RV32-NEXT: insb s4, a0, 3 +; RV32-NEXT: srli a0, a1, 1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: srli a1, a0, 2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: not a0, a0 +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: and a1, a1, s3 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: and a1, a0, s0 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: and a0, a0, s0 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: and a0, a0, s5 +; RV32-NEXT: mv a1, s2 +; RV32-NEXT: call __mulsi3@plt +; RV32-NEXT: mv s1, zero +; RV32-NEXT: lbu a1, 13(sp) +; RV32-NEXT: srli a0, a0, 24 +; RV32-NEXT: addi a0, a0, -24 +; RV32-NEXT: insb s1, a0, 0 +; RV32-NEXT: srli a0, a1, 1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: srli a1, a0, 2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: not a0, a0 +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: and a1, a1, s3 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: and a1, a0, s0 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: and a0, a0, s0 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: and a0, a0, s5 +; RV32-NEXT: mv a1, s2 +; RV32-NEXT: call __mulsi3@plt +; RV32-NEXT: lbu a1, 14(sp) +; RV32-NEXT: srli a0, a0, 24 +; RV32-NEXT: addi a0, a0, -24 +; RV32-NEXT: insb s1, a0, 1 +; RV32-NEXT: srli a0, a1, 1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: srli a1, a0, 2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: not a0, a0 +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: and a1, a1, s3 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: and a1, a0, s0 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: and a0, a0, s0 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: and a0, a0, s5 +; RV32-NEXT: mv a1, s2 +; RV32-NEXT: call __mulsi3@plt +; RV32-NEXT: lbu a1, 15(sp) +; RV32-NEXT: srli a0, a0, 24 +; RV32-NEXT: addi a0, a0, -24 +; RV32-NEXT: insb s1, a0, 2 +; RV32-NEXT: srli a0, a1, 1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: srli a1, a0, 2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: not a0, a0 +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: and a1, a1, s3 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: and a1, a0, s0 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: and a0, a0, s0 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: and a0, a0, s5 +; RV32-NEXT: mv a1, s2 +; RV32-NEXT: call __mulsi3@plt +; RV32-NEXT: srli a0, a0, 24 +; RV32-NEXT: addi a0, a0, -24 +; RV32-NEXT: insb s1, a0, 3 +; RV32-NEXT: mv a0, s1 +; RV32-NEXT: mv a1, s4 +; RV32-NEXT: lw s5, 20(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s4, 24(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s3, 28(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 32(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 36(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 40(sp) # 4-byte Folded Reload +; RV32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 48 +; RV32-NEXT: ret +; +; RV64-LABEL: clov8i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -64 +; RV64-NEXT: .cfi_def_cfa_offset 64 +; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s1, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s2, 32(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s3, 24(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s4, 16(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: .cfi_offset s0, -16 +; RV64-NEXT: .cfi_offset s1, -24 +; RV64-NEXT: .cfi_offset s2, -32 +; RV64-NEXT: .cfi_offset s3, -40 +; RV64-NEXT: .cfi_offset s4, -48 +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: xor a0, a0, a1 +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: lbu a0, 8(sp) +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: lui a2, 21845 +; RV64-NEXT: addiw a2, a2, 1365 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi a2, a2, 1365 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi a2, a2, 1365 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi s2, a2, 1365 +; RV64-NEXT: and a1, a1, s2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: lui a1, 13107 +; RV64-NEXT: addiw a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi s0, a1, 819 +; RV64-NEXT: and a1, a0, s0 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, s0 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: lui a1, 3855 +; RV64-NEXT: addiw a1, a1, 241 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, -241 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 241 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi s4, a1, -241 +; RV64-NEXT: and a0, a0, s4 +; RV64-NEXT: lui a1, 4112 +; RV64-NEXT: addiw a1, a1, 257 +; RV64-NEXT: slli a1, a1, 16 +; RV64-NEXT: addi a1, a1, 257 +; RV64-NEXT: slli a1, a1, 16 +; RV64-NEXT: addi s3, a1, 257 +; RV64-NEXT: mv a1, s3 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: mv s1, zero +; RV64-NEXT: lbu a1, 9(sp) +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -56 +; RV64-NEXT: insb s1, a0, 0 +; RV64-NEXT: srli a0, a1, 1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: and a1, a1, s2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: and a1, a0, s0 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, s0 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: and a0, a0, s4 +; RV64-NEXT: mv a1, s3 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: lbu a1, 10(sp) +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -56 +; RV64-NEXT: insb s1, a0, 1 +; RV64-NEXT: srli a0, a1, 1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: and a1, a1, s2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: and a1, a0, s0 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, s0 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: and a0, a0, s4 +; RV64-NEXT: mv a1, s3 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: lbu a1, 11(sp) +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -56 +; RV64-NEXT: insb s1, a0, 2 +; RV64-NEXT: srli a0, a1, 1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: and a1, a1, s2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: and a1, a0, s0 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, s0 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: and a0, a0, s4 +; RV64-NEXT: mv a1, s3 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: lbu a1, 12(sp) +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -56 +; RV64-NEXT: insb s1, a0, 3 +; RV64-NEXT: srli a0, a1, 1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: and a1, a1, s2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: and a1, a0, s0 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, s0 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: and a0, a0, s4 +; RV64-NEXT: mv a1, s3 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: lbu a1, 13(sp) +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -56 +; RV64-NEXT: insb s1, a0, 4 +; RV64-NEXT: srli a0, a1, 1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: and a1, a1, s2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: and a1, a0, s0 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, s0 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: and a0, a0, s4 +; RV64-NEXT: mv a1, s3 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: lbu a1, 14(sp) +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -56 +; RV64-NEXT: insb s1, a0, 5 +; RV64-NEXT: srli a0, a1, 1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: and a1, a1, s2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: and a1, a0, s0 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, s0 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: and a0, a0, s4 +; RV64-NEXT: mv a1, s3 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: lbu a1, 15(sp) +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -56 +; RV64-NEXT: insb s1, a0, 6 +; RV64-NEXT: srli a0, a1, 1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: and a1, a1, s2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: and a1, a0, s0 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, s0 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: and a0, a0, s4 +; RV64-NEXT: mv a1, s3 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -56 +; RV64-NEXT: insb s1, a0, 7 +; RV64-NEXT: mv a0, s1 +; RV64-NEXT: ld s4, 16(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s3, 24(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s2, 32(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s1, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 64 +; RV64-NEXT: ret + %a = bitcast i64 %x to <8 x i8> + %neg = xor <8 x i8> %a, + %b = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %neg, i1 false) + %c = bitcast <8 x i8> %b to i64 + ret i64 %c +} + +define i64 @clov4i16(i64 %x, i1) { +; RV32-LABEL: clov4i16: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 40(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 36(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 32(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s3, 28(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s4, 24(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s5, 20(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: .cfi_offset s0, -8 +; RV32-NEXT: .cfi_offset s1, -12 +; RV32-NEXT: .cfi_offset s2, -16 +; RV32-NEXT: .cfi_offset s3, -20 +; RV32-NEXT: .cfi_offset s4, -24 +; RV32-NEXT: .cfi_offset s5, -28 +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: xor a0, a0, a2 +; RV32-NEXT: xor a1, a1, a2 +; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a0, 12(sp) +; RV32-NEXT: lhu a0, 16(sp) +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: not a0, a0 +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: lui a2, 349525 +; RV32-NEXT: addi s3, a2, 1365 +; RV32-NEXT: and a1, a1, s3 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi s1, a1, 819 +; RV32-NEXT: and a1, a0, s1 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: and a0, a0, s1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi s4, a1, -241 +; RV32-NEXT: and a0, a0, s4 +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi s0, a1, 257 +; RV32-NEXT: mv a1, s0 +; RV32-NEXT: call __mulsi3@plt +; RV32-NEXT: lhu a1, 18(sp) +; RV32-NEXT: srli a0, a0, 24 +; RV32-NEXT: addi a0, a0, -16 +; RV32-NEXT: pktb16 s2, zero, a0 +; RV32-NEXT: srli a0, a1, 1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: srli a1, a0, 2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: not a0, a0 +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: and a1, a1, s3 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: and a1, a0, s1 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: and a0, a0, s1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: and a0, a0, s4 +; RV32-NEXT: mv a1, s0 +; RV32-NEXT: call __mulsi3@plt +; RV32-NEXT: lhu a1, 12(sp) +; RV32-NEXT: srli a0, a0, 24 +; RV32-NEXT: addi a0, a0, -16 +; RV32-NEXT: pkbb16 s2, a0, s2 +; RV32-NEXT: srli a0, a1, 1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: srli a1, a0, 2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: not a0, a0 +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: and a1, a1, s3 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: and a1, a0, s1 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: and a0, a0, s1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: and a0, a0, s4 +; RV32-NEXT: mv a1, s0 +; RV32-NEXT: call __mulsi3@plt +; RV32-NEXT: lhu a1, 14(sp) +; RV32-NEXT: srli a0, a0, 24 +; RV32-NEXT: addi a0, a0, -16 +; RV32-NEXT: pktb16 s5, zero, a0 +; RV32-NEXT: srli a0, a1, 1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: srli a1, a0, 2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: not a0, a0 +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: and a1, a1, s3 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: and a1, a0, s1 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: and a0, a0, s1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: and a0, a0, s4 +; RV32-NEXT: mv a1, s0 +; RV32-NEXT: call __mulsi3@plt +; RV32-NEXT: srli a0, a0, 24 +; RV32-NEXT: addi a0, a0, -16 +; RV32-NEXT: pkbb16 a0, a0, s5 +; RV32-NEXT: mv a1, s2 +; RV32-NEXT: lw s5, 20(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s4, 24(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s3, 28(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 32(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 36(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 40(sp) # 4-byte Folded Reload +; RV32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 48 +; RV32-NEXT: ret +; +; RV64-LABEL: clov4i16: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -64 +; RV64-NEXT: .cfi_def_cfa_offset 64 +; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s1, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s2, 32(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s3, 24(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s4, 16(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s5, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: .cfi_offset s0, -16 +; RV64-NEXT: .cfi_offset s1, -24 +; RV64-NEXT: .cfi_offset s2, -32 +; RV64-NEXT: .cfi_offset s3, -40 +; RV64-NEXT: .cfi_offset s4, -48 +; RV64-NEXT: .cfi_offset s5, -56 +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: xor a0, a0, a1 +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lhu a0, 0(sp) +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: lui a2, 21845 +; RV64-NEXT: addiw a2, a2, 1365 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi a2, a2, 1365 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi a2, a2, 1365 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi s2, a2, 1365 +; RV64-NEXT: and a1, a1, s2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: lui a1, 13107 +; RV64-NEXT: addiw a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi s1, a1, 819 +; RV64-NEXT: and a1, a0, s1 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, s1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: lui a1, 3855 +; RV64-NEXT: addiw a1, a1, 241 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, -241 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 241 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi s3, a1, -241 +; RV64-NEXT: and a0, a0, s3 +; RV64-NEXT: lui a1, 4112 +; RV64-NEXT: addiw a1, a1, 257 +; RV64-NEXT: slli a1, a1, 16 +; RV64-NEXT: addi a1, a1, 257 +; RV64-NEXT: slli a1, a1, 16 +; RV64-NEXT: addi s4, a1, 257 +; RV64-NEXT: mv a1, s4 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: lhu a1, 2(sp) +; RV64-NEXT: addi a0, a0, -48 +; RV64-NEXT: pktb16 a0, zero, a0 +; RV64-NEXT: pktb32 s0, zero, a0 +; RV64-NEXT: srli a0, a1, 1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: and a1, a1, s2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: and a1, a0, s1 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, s1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: and a0, a0, s3 +; RV64-NEXT: mv a1, s4 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -48 +; RV64-NEXT: lhu a1, 4(sp) +; RV64-NEXT: pkbb16 a0, a0, s0 +; RV64-NEXT: pktb32 s0, s0, a0 +; RV64-NEXT: pkbt32 s5, s0, s0 +; RV64-NEXT: srli a0, a1, 1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: and a1, a1, s2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: and a1, a0, s1 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, s1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: and a0, a0, s3 +; RV64-NEXT: mv a1, s4 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -48 +; RV64-NEXT: lhu a1, 6(sp) +; RV64-NEXT: pktb16 a0, s5, a0 +; RV64-NEXT: pkbb32 s0, a0, s0 +; RV64-NEXT: pkbt32 s5, s0, s0 +; RV64-NEXT: srli a0, a1, 1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: and a1, a1, s2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: and a1, a0, s1 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, s1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: and a0, a0, s3 +; RV64-NEXT: mv a1, s4 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -48 +; RV64-NEXT: pkbb16 a0, a0, s5 +; RV64-NEXT: pkbb32 a0, a0, s0 +; RV64-NEXT: ld s5, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s4, 16(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s3, 24(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s2, 32(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s1, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 64 +; RV64-NEXT: ret + %a = bitcast i64 %x to <4 x i16> + %neg = xor <4 x i16> %a, + %b = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %neg, i1 false) + %c = bitcast <4 x i16> %b to i64 + ret i64 %c +} + +define i64 @clov2i32(i64 %x, i1) { +; RV32-LABEL: clov2i32: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s4, 8(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s5, 4(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: .cfi_offset s0, -8 +; RV32-NEXT: .cfi_offset s1, -12 +; RV32-NEXT: .cfi_offset s2, -16 +; RV32-NEXT: .cfi_offset s3, -20 +; RV32-NEXT: .cfi_offset s4, -24 +; RV32-NEXT: .cfi_offset s5, -28 +; RV32-NEXT: not s3, a1 +; RV32-NEXT: not a0, a0 +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: not a0, a0 +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: lui a2, 349525 +; RV32-NEXT: addi s4, a2, 1365 +; RV32-NEXT: and a1, a1, s4 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi s1, a1, 819 +; RV32-NEXT: and a1, a0, s1 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: and a0, a0, s1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi s5, a1, -241 +; RV32-NEXT: and a0, a0, s5 +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi s0, a1, 257 +; RV32-NEXT: mv a1, s0 +; RV32-NEXT: call __mulsi3@plt +; RV32-NEXT: srli s2, a0, 24 +; RV32-NEXT: srli a0, s3, 1 +; RV32-NEXT: or a0, s3, a0 +; RV32-NEXT: srli a1, a0, 2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: not a0, a0 +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: and a1, a1, s4 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: and a1, a0, s1 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: and a0, a0, s1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: and a0, a0, s5 +; RV32-NEXT: mv a1, s0 +; RV32-NEXT: call __mulsi3@plt +; RV32-NEXT: srli a1, a0, 24 +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: lw s5, 4(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s4, 8(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: ret +; +; RV64-LABEL: clov2i32: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -64 +; RV64-NEXT: .cfi_def_cfa_offset 64 +; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s1, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s2, 32(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s3, 24(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s4, 16(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: .cfi_offset s0, -16 +; RV64-NEXT: .cfi_offset s1, -24 +; RV64-NEXT: .cfi_offset s2, -32 +; RV64-NEXT: .cfi_offset s3, -40 +; RV64-NEXT: .cfi_offset s4, -48 +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: xor a0, a0, a1 +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: lwu a0, 8(sp) +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: lui a2, 21845 +; RV64-NEXT: addiw a2, a2, 1365 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi a2, a2, 1365 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi a2, a2, 1365 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi s2, a2, 1365 +; RV64-NEXT: and a1, a1, s2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: lui a1, 13107 +; RV64-NEXT: addiw a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi s1, a1, 819 +; RV64-NEXT: and a1, a0, s1 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, s1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: lui a1, 3855 +; RV64-NEXT: addiw a1, a1, 241 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, -241 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 241 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi s3, a1, -241 +; RV64-NEXT: and a0, a0, s3 +; RV64-NEXT: lui a1, 4112 +; RV64-NEXT: addiw a1, a1, 257 +; RV64-NEXT: slli a1, a1, 16 +; RV64-NEXT: addi a1, a1, 257 +; RV64-NEXT: slli a1, a1, 16 +; RV64-NEXT: addi s0, a1, 257 +; RV64-NEXT: mv a1, s0 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: lwu a1, 12(sp) +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -32 +; RV64-NEXT: pktb32 s4, zero, a0 +; RV64-NEXT: srli a0, a1, 1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: and a1, a1, s2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: and a1, a0, s1 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, s1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: and a0, a0, s3 +; RV64-NEXT: mv a1, s0 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -32 +; RV64-NEXT: pkbb32 a0, a0, s4 +; RV64-NEXT: ld s4, 16(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s3, 24(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s2, 32(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s1, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 64 +; RV64-NEXT: ret + %a = bitcast i64 %x to <2 x i32> + %neg = xor <2 x i32> %a, + %b = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %neg, i1 false) + %c = bitcast <2 x i32> %b to i64 + ret i64 %c +} + +declare i8 @llvm.ctlz.i8(i8, i1) +declare i16 @llvm.ctlz.i16(i16, i1) +declare i32 @llvm.ctlz.i32(i32, i1) +declare <4 x i8> @llvm.ctlz.v4i8(<4 x i8>, i1) +declare <2 x i16> @llvm.ctlz.v2i16(<2 x i16>, i1) +declare <8 x i8> @llvm.ctlz.v8i8(<8 x i8>, i1) +declare <4 x i16> @llvm.ctlz.v4i16(<4 x i16>, i1) +declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) diff --git a/llvm/test/CodeGen/RISCV/rvp/clz.ll b/llvm/test/CodeGen/RISCV/rvp/clz.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvp/clz.ll @@ -0,0 +1,2205 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-p -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=RV32 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-p -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=RV64 + +define i8 @clz8(i8 %x) { +; RV32-LABEL: clz8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: andi a0, a0, 255 +; RV32-NEXT: beqz a0, .LBB0_2 +; RV32-NEXT: # %bb.1: # %cond.false +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: not a0, a0 +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: lui a2, 349525 +; RV32-NEXT: addi a2, a2, 1365 +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: and a2, a0, a1 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: call __mulsi3@plt +; RV32-NEXT: srli a0, a0, 24 +; RV32-NEXT: addi a0, a0, -24 +; RV32-NEXT: j .LBB0_3 +; RV32-NEXT: .LBB0_2: +; RV32-NEXT: addi a0, zero, 8 +; RV32-NEXT: .LBB0_3: # %cond.end +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: clz8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: andi a0, a0, 255 +; RV64-NEXT: beqz a0, .LBB0_2 +; RV64-NEXT: # %bb.1: # %cond.false +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: lui a2, 21845 +; RV64-NEXT: addiw a2, a2, 1365 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi a2, a2, 1365 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi a2, a2, 1365 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi a2, a2, 1365 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: lui a1, 13107 +; RV64-NEXT: addiw a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 819 +; RV64-NEXT: and a2, a0, a1 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: add a0, a2, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: lui a1, 3855 +; RV64-NEXT: addiw a1, a1, 241 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, -241 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 241 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, -241 +; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: lui a1, 4112 +; RV64-NEXT: addiw a1, a1, 257 +; RV64-NEXT: slli a1, a1, 16 +; RV64-NEXT: addi a1, a1, 257 +; RV64-NEXT: slli a1, a1, 16 +; RV64-NEXT: addi a1, a1, 257 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -56 +; RV64-NEXT: j .LBB0_3 +; RV64-NEXT: .LBB0_2: +; RV64-NEXT: addi a0, zero, 8 +; RV64-NEXT: .LBB0_3: # %cond.end +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %res = call i8 @llvm.ctlz.i8(i8 %x, i1 false) + ret i8 %res +} + +define i16 @clz16(i16 %x) { +; RV32-LABEL: clz16: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: lui a1, 16 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: beqz a0, .LBB1_2 +; RV32-NEXT: # %bb.1: # %cond.false +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: not a0, a0 +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: lui a2, 349525 +; RV32-NEXT: addi a2, a2, 1365 +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: and a2, a0, a1 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: call __mulsi3@plt +; RV32-NEXT: srli a0, a0, 24 +; RV32-NEXT: addi a0, a0, -16 +; RV32-NEXT: j .LBB1_3 +; RV32-NEXT: .LBB1_2: +; RV32-NEXT: addi a0, zero, 16 +; RV32-NEXT: .LBB1_3: # %cond.end +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: clz16: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: lui a1, 16 +; RV64-NEXT: addiw a1, a1, -1 +; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: beqz a0, .LBB1_2 +; RV64-NEXT: # %bb.1: # %cond.false +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: lui a2, 21845 +; RV64-NEXT: addiw a2, a2, 1365 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi a2, a2, 1365 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi a2, a2, 1365 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi a2, a2, 1365 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: lui a1, 13107 +; RV64-NEXT: addiw a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 819 +; RV64-NEXT: and a2, a0, a1 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: add a0, a2, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: lui a1, 3855 +; RV64-NEXT: addiw a1, a1, 241 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, -241 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 241 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, -241 +; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: lui a1, 4112 +; RV64-NEXT: addiw a1, a1, 257 +; RV64-NEXT: slli a1, a1, 16 +; RV64-NEXT: addi a1, a1, 257 +; RV64-NEXT: slli a1, a1, 16 +; RV64-NEXT: addi a1, a1, 257 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -48 +; RV64-NEXT: j .LBB1_3 +; RV64-NEXT: .LBB1_2: +; RV64-NEXT: addi a0, zero, 16 +; RV64-NEXT: .LBB1_3: # %cond.end +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %res = call i16 @llvm.ctlz.i16(i16 %x, i1 false) + ret i16 %res +} + +define i32 @clz32(i32 %x) { +; RV32-LABEL: clz32: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: beqz a0, .LBB2_2 +; RV32-NEXT: # %bb.1: # %cond.false +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: not a0, a0 +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: lui a2, 349525 +; RV32-NEXT: addi a2, a2, 1365 +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: and a2, a0, a1 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: call __mulsi3@plt +; RV32-NEXT: srli a0, a0, 24 +; RV32-NEXT: j .LBB2_3 +; RV32-NEXT: .LBB2_2: +; RV32-NEXT: addi a0, zero, 32 +; RV32-NEXT: .LBB2_3: # %cond.end +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: clz32: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: sext.w a1, a0 +; RV64-NEXT: beqz a1, .LBB2_2 +; RV64-NEXT: # %bb.1: # %cond.false +; RV64-NEXT: srliw a1, a0, 1 +; RV64-NEXT: slli a0, a0, 32 +; RV64-NEXT: srli a0, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: lui a2, 21845 +; RV64-NEXT: addiw a2, a2, 1365 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi a2, a2, 1365 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi a2, a2, 1365 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi a2, a2, 1365 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: lui a1, 13107 +; RV64-NEXT: addiw a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 819 +; RV64-NEXT: and a2, a0, a1 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: add a0, a2, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: lui a1, 3855 +; RV64-NEXT: addiw a1, a1, 241 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, -241 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 241 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, -241 +; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: lui a1, 4112 +; RV64-NEXT: addiw a1, a1, 257 +; RV64-NEXT: slli a1, a1, 16 +; RV64-NEXT: addi a1, a1, 257 +; RV64-NEXT: slli a1, a1, 16 +; RV64-NEXT: addi a1, a1, 257 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -32 +; RV64-NEXT: j .LBB2_3 +; RV64-NEXT: .LBB2_2: +; RV64-NEXT: addi a0, zero, 32 +; RV64-NEXT: .LBB2_3: # %cond.end +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %res = call i32 @llvm.ctlz.i32(i32 %x, i1 false) + ret i32 %res +} + +define i32 @clzv4i8(i32 %x, i1) { +; RV32-LABEL: clzv4i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s4, 8(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: .cfi_offset s0, -8 +; RV32-NEXT: .cfi_offset s1, -12 +; RV32-NEXT: .cfi_offset s2, -16 +; RV32-NEXT: .cfi_offset s3, -20 +; RV32-NEXT: .cfi_offset s4, -24 +; RV32-NEXT: sw a0, 4(sp) +; RV32-NEXT: lbu a0, 4(sp) +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: not a0, a0 +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: lui a2, 349525 +; RV32-NEXT: addi s2, a2, 1365 +; RV32-NEXT: and a1, a1, s2 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi s0, a1, 819 +; RV32-NEXT: and a1, a0, s0 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: and a0, a0, s0 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi s4, a1, -241 +; RV32-NEXT: and a0, a0, s4 +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi s3, a1, 257 +; RV32-NEXT: mv a1, s3 +; RV32-NEXT: call __mulsi3@plt +; RV32-NEXT: mv s1, zero +; RV32-NEXT: lbu a1, 5(sp) +; RV32-NEXT: srli a0, a0, 24 +; RV32-NEXT: addi a0, a0, -24 +; RV32-NEXT: insb s1, a0, 0 +; RV32-NEXT: srli a0, a1, 1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: srli a1, a0, 2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: not a0, a0 +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: and a1, a1, s2 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: and a1, a0, s0 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: and a0, a0, s0 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: and a0, a0, s4 +; RV32-NEXT: mv a1, s3 +; RV32-NEXT: call __mulsi3@plt +; RV32-NEXT: lbu a1, 6(sp) +; RV32-NEXT: srli a0, a0, 24 +; RV32-NEXT: addi a0, a0, -24 +; RV32-NEXT: insb s1, a0, 1 +; RV32-NEXT: srli a0, a1, 1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: srli a1, a0, 2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: not a0, a0 +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: and a1, a1, s2 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: and a1, a0, s0 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: and a0, a0, s0 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: and a0, a0, s4 +; RV32-NEXT: mv a1, s3 +; RV32-NEXT: call __mulsi3@plt +; RV32-NEXT: lbu a1, 7(sp) +; RV32-NEXT: srli a0, a0, 24 +; RV32-NEXT: addi a0, a0, -24 +; RV32-NEXT: insb s1, a0, 2 +; RV32-NEXT: srli a0, a1, 1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: srli a1, a0, 2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: not a0, a0 +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: and a1, a1, s2 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: and a1, a0, s0 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: and a0, a0, s0 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: and a0, a0, s4 +; RV32-NEXT: mv a1, s3 +; RV32-NEXT: call __mulsi3@plt +; RV32-NEXT: srli a0, a0, 24 +; RV32-NEXT: addi a0, a0, -24 +; RV32-NEXT: insb s1, a0, 3 +; RV32-NEXT: mv a0, s1 +; RV32-NEXT: lw s4, 8(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: ret +; +; RV64-LABEL: clzv4i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -64 +; RV64-NEXT: .cfi_def_cfa_offset 64 +; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s1, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s2, 32(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s3, 24(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s4, 16(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: .cfi_offset s0, -16 +; RV64-NEXT: .cfi_offset s1, -24 +; RV64-NEXT: .cfi_offset s2, -32 +; RV64-NEXT: .cfi_offset s3, -40 +; RV64-NEXT: .cfi_offset s4, -48 +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: lbu a0, 8(sp) +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: lui a2, 21845 +; RV64-NEXT: addiw a2, a2, 1365 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi a2, a2, 1365 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi a2, a2, 1365 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi s2, a2, 1365 +; RV64-NEXT: and a1, a1, s2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: lui a1, 13107 +; RV64-NEXT: addiw a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi s0, a1, 819 +; RV64-NEXT: and a1, a0, s0 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, s0 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: lui a1, 3855 +; RV64-NEXT: addiw a1, a1, 241 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, -241 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 241 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi s4, a1, -241 +; RV64-NEXT: and a0, a0, s4 +; RV64-NEXT: lui a1, 4112 +; RV64-NEXT: addiw a1, a1, 257 +; RV64-NEXT: slli a1, a1, 16 +; RV64-NEXT: addi a1, a1, 257 +; RV64-NEXT: slli a1, a1, 16 +; RV64-NEXT: addi s3, a1, 257 +; RV64-NEXT: mv a1, s3 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: mv s1, zero +; RV64-NEXT: lbu a1, 9(sp) +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -56 +; RV64-NEXT: insb s1, a0, 0 +; RV64-NEXT: srli a0, a1, 1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: and a1, a1, s2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: and a1, a0, s0 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, s0 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: and a0, a0, s4 +; RV64-NEXT: mv a1, s3 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: lbu a1, 10(sp) +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -56 +; RV64-NEXT: insb s1, a0, 1 +; RV64-NEXT: srli a0, a1, 1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: and a1, a1, s2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: and a1, a0, s0 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, s0 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: and a0, a0, s4 +; RV64-NEXT: mv a1, s3 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: lbu a1, 11(sp) +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -56 +; RV64-NEXT: insb s1, a0, 2 +; RV64-NEXT: srli a0, a1, 1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: and a1, a1, s2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: and a1, a0, s0 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, s0 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: and a0, a0, s4 +; RV64-NEXT: mv a1, s3 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: lbu a1, 12(sp) +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -56 +; RV64-NEXT: insb s1, a0, 3 +; RV64-NEXT: srli a0, a1, 1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: and a1, a1, s2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: and a1, a0, s0 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, s0 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: and a0, a0, s4 +; RV64-NEXT: mv a1, s3 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: lbu a1, 13(sp) +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -56 +; RV64-NEXT: insb s1, a0, 4 +; RV64-NEXT: srli a0, a1, 1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: and a1, a1, s2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: and a1, a0, s0 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, s0 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: and a0, a0, s4 +; RV64-NEXT: mv a1, s3 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: lbu a1, 14(sp) +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -56 +; RV64-NEXT: insb s1, a0, 5 +; RV64-NEXT: srli a0, a1, 1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: and a1, a1, s2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: and a1, a0, s0 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, s0 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: and a0, a0, s4 +; RV64-NEXT: mv a1, s3 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: lbu a1, 15(sp) +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -56 +; RV64-NEXT: insb s1, a0, 6 +; RV64-NEXT: srli a0, a1, 1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: and a1, a1, s2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: and a1, a0, s0 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, s0 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: and a0, a0, s4 +; RV64-NEXT: mv a1, s3 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -56 +; RV64-NEXT: insb s1, a0, 7 +; RV64-NEXT: mv a0, s1 +; RV64-NEXT: ld s4, 16(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s3, 24(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s2, 32(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s1, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 64 +; RV64-NEXT: ret + %a = bitcast i32 %x to <4 x i8> + %b = call <4 x i8> @llvm.ctlz.v4i8(<4 x i8> %a, i1 false) + %c = bitcast <4 x i8> %b to i32 + ret i32 %c +} + +define i32 @clzv2i16(i32 %x, i1) { +; RV32-LABEL: clzv2i16: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s4, 8(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: .cfi_offset s0, -8 +; RV32-NEXT: .cfi_offset s1, -12 +; RV32-NEXT: .cfi_offset s2, -16 +; RV32-NEXT: .cfi_offset s3, -20 +; RV32-NEXT: .cfi_offset s4, -24 +; RV32-NEXT: sw a0, 4(sp) +; RV32-NEXT: lhu a0, 4(sp) +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: not a0, a0 +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: lui a2, 349525 +; RV32-NEXT: addi s2, a2, 1365 +; RV32-NEXT: and a1, a1, s2 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi s1, a1, 819 +; RV32-NEXT: and a1, a0, s1 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: and a0, a0, s1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi s3, a1, -241 +; RV32-NEXT: and a0, a0, s3 +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi s0, a1, 257 +; RV32-NEXT: mv a1, s0 +; RV32-NEXT: call __mulsi3@plt +; RV32-NEXT: lhu a1, 6(sp) +; RV32-NEXT: srli a0, a0, 24 +; RV32-NEXT: addi a0, a0, -16 +; RV32-NEXT: pktb16 s4, zero, a0 +; RV32-NEXT: srli a0, a1, 1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: srli a1, a0, 2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: not a0, a0 +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: and a1, a1, s2 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: and a1, a0, s1 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: and a0, a0, s1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: and a0, a0, s3 +; RV32-NEXT: mv a1, s0 +; RV32-NEXT: call __mulsi3@plt +; RV32-NEXT: srli a0, a0, 24 +; RV32-NEXT: addi a0, a0, -16 +; RV32-NEXT: pkbb16 a0, a0, s4 +; RV32-NEXT: lw s4, 8(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: ret +; +; RV64-LABEL: clzv2i16: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -64 +; RV64-NEXT: .cfi_def_cfa_offset 64 +; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s1, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s2, 32(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s3, 24(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s4, 16(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s5, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: .cfi_offset s0, -16 +; RV64-NEXT: .cfi_offset s1, -24 +; RV64-NEXT: .cfi_offset s2, -32 +; RV64-NEXT: .cfi_offset s3, -40 +; RV64-NEXT: .cfi_offset s4, -48 +; RV64-NEXT: .cfi_offset s5, -56 +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lhu a0, 0(sp) +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: lui a2, 21845 +; RV64-NEXT: addiw a2, a2, 1365 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi a2, a2, 1365 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi a2, a2, 1365 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi s2, a2, 1365 +; RV64-NEXT: and a1, a1, s2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: lui a1, 13107 +; RV64-NEXT: addiw a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi s1, a1, 819 +; RV64-NEXT: and a1, a0, s1 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, s1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: lui a1, 3855 +; RV64-NEXT: addiw a1, a1, 241 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, -241 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 241 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi s3, a1, -241 +; RV64-NEXT: and a0, a0, s3 +; RV64-NEXT: lui a1, 4112 +; RV64-NEXT: addiw a1, a1, 257 +; RV64-NEXT: slli a1, a1, 16 +; RV64-NEXT: addi a1, a1, 257 +; RV64-NEXT: slli a1, a1, 16 +; RV64-NEXT: addi s4, a1, 257 +; RV64-NEXT: mv a1, s4 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: lhu a1, 2(sp) +; RV64-NEXT: addi a0, a0, -48 +; RV64-NEXT: pktb16 a0, zero, a0 +; RV64-NEXT: pktb32 s0, zero, a0 +; RV64-NEXT: srli a0, a1, 1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: and a1, a1, s2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: and a1, a0, s1 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, s1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: and a0, a0, s3 +; RV64-NEXT: mv a1, s4 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -48 +; RV64-NEXT: lhu a1, 4(sp) +; RV64-NEXT: pkbb16 a0, a0, s0 +; RV64-NEXT: pktb32 s0, s0, a0 +; RV64-NEXT: pkbt32 s5, s0, s0 +; RV64-NEXT: srli a0, a1, 1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: and a1, a1, s2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: and a1, a0, s1 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, s1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: and a0, a0, s3 +; RV64-NEXT: mv a1, s4 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -48 +; RV64-NEXT: lhu a1, 6(sp) +; RV64-NEXT: pktb16 a0, s5, a0 +; RV64-NEXT: pkbb32 s0, a0, s0 +; RV64-NEXT: pkbt32 s5, s0, s0 +; RV64-NEXT: srli a0, a1, 1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: and a1, a1, s2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: and a1, a0, s1 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, s1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: and a0, a0, s3 +; RV64-NEXT: mv a1, s4 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -48 +; RV64-NEXT: pkbb16 a0, a0, s5 +; RV64-NEXT: pkbb32 a0, a0, s0 +; RV64-NEXT: ld s5, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s4, 16(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s3, 24(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s2, 32(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s1, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 64 +; RV64-NEXT: ret + %a = bitcast i32 %x to <2 x i16> + %b = call <2 x i16> @llvm.ctlz.v2i16(<2 x i16> %a, i1 false) + %c = bitcast <2 x i16> %b to i32 + ret i32 %c +} + +define i64 @clzv8i8(i64 %x, i1) { +; RV32-LABEL: clzv8i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 40(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 36(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 32(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s3, 28(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s4, 24(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s5, 20(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: .cfi_offset s0, -8 +; RV32-NEXT: .cfi_offset s1, -12 +; RV32-NEXT: .cfi_offset s2, -16 +; RV32-NEXT: .cfi_offset s3, -20 +; RV32-NEXT: .cfi_offset s4, -24 +; RV32-NEXT: .cfi_offset s5, -28 +; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a0, 12(sp) +; RV32-NEXT: lbu a0, 16(sp) +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: not a0, a0 +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: lui a2, 349525 +; RV32-NEXT: addi s3, a2, 1365 +; RV32-NEXT: and a1, a1, s3 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi s0, a1, 819 +; RV32-NEXT: and a1, a0, s0 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: and a0, a0, s0 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi s5, a1, -241 +; RV32-NEXT: and a0, a0, s5 +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi s2, a1, 257 +; RV32-NEXT: mv a1, s2 +; RV32-NEXT: call __mulsi3@plt +; RV32-NEXT: mv s4, zero +; RV32-NEXT: lbu a1, 17(sp) +; RV32-NEXT: srli a0, a0, 24 +; RV32-NEXT: addi a0, a0, -24 +; RV32-NEXT: insb s4, a0, 0 +; RV32-NEXT: srli a0, a1, 1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: srli a1, a0, 2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: not a0, a0 +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: and a1, a1, s3 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: and a1, a0, s0 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: and a0, a0, s0 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: and a0, a0, s5 +; RV32-NEXT: mv a1, s2 +; RV32-NEXT: call __mulsi3@plt +; RV32-NEXT: lbu a1, 18(sp) +; RV32-NEXT: srli a0, a0, 24 +; RV32-NEXT: addi a0, a0, -24 +; RV32-NEXT: insb s4, a0, 1 +; RV32-NEXT: srli a0, a1, 1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: srli a1, a0, 2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: not a0, a0 +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: and a1, a1, s3 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: and a1, a0, s0 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: and a0, a0, s0 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: and a0, a0, s5 +; RV32-NEXT: mv a1, s2 +; RV32-NEXT: call __mulsi3@plt +; RV32-NEXT: lbu a1, 19(sp) +; RV32-NEXT: srli a0, a0, 24 +; RV32-NEXT: addi a0, a0, -24 +; RV32-NEXT: insb s4, a0, 2 +; RV32-NEXT: srli a0, a1, 1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: srli a1, a0, 2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: not a0, a0 +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: and a1, a1, s3 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: and a1, a0, s0 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: and a0, a0, s0 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: and a0, a0, s5 +; RV32-NEXT: mv a1, s2 +; RV32-NEXT: call __mulsi3@plt +; RV32-NEXT: lbu a1, 12(sp) +; RV32-NEXT: srli a0, a0, 24 +; RV32-NEXT: addi a0, a0, -24 +; RV32-NEXT: insb s4, a0, 3 +; RV32-NEXT: srli a0, a1, 1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: srli a1, a0, 2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: not a0, a0 +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: and a1, a1, s3 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: and a1, a0, s0 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: and a0, a0, s0 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: and a0, a0, s5 +; RV32-NEXT: mv a1, s2 +; RV32-NEXT: call __mulsi3@plt +; RV32-NEXT: mv s1, zero +; RV32-NEXT: lbu a1, 13(sp) +; RV32-NEXT: srli a0, a0, 24 +; RV32-NEXT: addi a0, a0, -24 +; RV32-NEXT: insb s1, a0, 0 +; RV32-NEXT: srli a0, a1, 1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: srli a1, a0, 2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: not a0, a0 +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: and a1, a1, s3 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: and a1, a0, s0 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: and a0, a0, s0 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: and a0, a0, s5 +; RV32-NEXT: mv a1, s2 +; RV32-NEXT: call __mulsi3@plt +; RV32-NEXT: lbu a1, 14(sp) +; RV32-NEXT: srli a0, a0, 24 +; RV32-NEXT: addi a0, a0, -24 +; RV32-NEXT: insb s1, a0, 1 +; RV32-NEXT: srli a0, a1, 1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: srli a1, a0, 2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: not a0, a0 +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: and a1, a1, s3 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: and a1, a0, s0 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: and a0, a0, s0 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: and a0, a0, s5 +; RV32-NEXT: mv a1, s2 +; RV32-NEXT: call __mulsi3@plt +; RV32-NEXT: lbu a1, 15(sp) +; RV32-NEXT: srli a0, a0, 24 +; RV32-NEXT: addi a0, a0, -24 +; RV32-NEXT: insb s1, a0, 2 +; RV32-NEXT: srli a0, a1, 1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: srli a1, a0, 2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: not a0, a0 +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: and a1, a1, s3 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: and a1, a0, s0 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: and a0, a0, s0 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: and a0, a0, s5 +; RV32-NEXT: mv a1, s2 +; RV32-NEXT: call __mulsi3@plt +; RV32-NEXT: srli a0, a0, 24 +; RV32-NEXT: addi a0, a0, -24 +; RV32-NEXT: insb s1, a0, 3 +; RV32-NEXT: mv a0, s1 +; RV32-NEXT: mv a1, s4 +; RV32-NEXT: lw s5, 20(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s4, 24(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s3, 28(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 32(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 36(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 40(sp) # 4-byte Folded Reload +; RV32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 48 +; RV32-NEXT: ret +; +; RV64-LABEL: clzv8i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -64 +; RV64-NEXT: .cfi_def_cfa_offset 64 +; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s1, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s2, 32(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s3, 24(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s4, 16(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: .cfi_offset s0, -16 +; RV64-NEXT: .cfi_offset s1, -24 +; RV64-NEXT: .cfi_offset s2, -32 +; RV64-NEXT: .cfi_offset s3, -40 +; RV64-NEXT: .cfi_offset s4, -48 +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: lbu a0, 8(sp) +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: lui a2, 21845 +; RV64-NEXT: addiw a2, a2, 1365 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi a2, a2, 1365 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi a2, a2, 1365 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi s2, a2, 1365 +; RV64-NEXT: and a1, a1, s2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: lui a1, 13107 +; RV64-NEXT: addiw a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi s0, a1, 819 +; RV64-NEXT: and a1, a0, s0 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, s0 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: lui a1, 3855 +; RV64-NEXT: addiw a1, a1, 241 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, -241 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 241 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi s4, a1, -241 +; RV64-NEXT: and a0, a0, s4 +; RV64-NEXT: lui a1, 4112 +; RV64-NEXT: addiw a1, a1, 257 +; RV64-NEXT: slli a1, a1, 16 +; RV64-NEXT: addi a1, a1, 257 +; RV64-NEXT: slli a1, a1, 16 +; RV64-NEXT: addi s3, a1, 257 +; RV64-NEXT: mv a1, s3 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: mv s1, zero +; RV64-NEXT: lbu a1, 9(sp) +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -56 +; RV64-NEXT: insb s1, a0, 0 +; RV64-NEXT: srli a0, a1, 1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: and a1, a1, s2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: and a1, a0, s0 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, s0 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: and a0, a0, s4 +; RV64-NEXT: mv a1, s3 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: lbu a1, 10(sp) +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -56 +; RV64-NEXT: insb s1, a0, 1 +; RV64-NEXT: srli a0, a1, 1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: and a1, a1, s2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: and a1, a0, s0 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, s0 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: and a0, a0, s4 +; RV64-NEXT: mv a1, s3 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: lbu a1, 11(sp) +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -56 +; RV64-NEXT: insb s1, a0, 2 +; RV64-NEXT: srli a0, a1, 1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: and a1, a1, s2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: and a1, a0, s0 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, s0 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: and a0, a0, s4 +; RV64-NEXT: mv a1, s3 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: lbu a1, 12(sp) +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -56 +; RV64-NEXT: insb s1, a0, 3 +; RV64-NEXT: srli a0, a1, 1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: and a1, a1, s2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: and a1, a0, s0 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, s0 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: and a0, a0, s4 +; RV64-NEXT: mv a1, s3 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: lbu a1, 13(sp) +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -56 +; RV64-NEXT: insb s1, a0, 4 +; RV64-NEXT: srli a0, a1, 1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: and a1, a1, s2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: and a1, a0, s0 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, s0 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: and a0, a0, s4 +; RV64-NEXT: mv a1, s3 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: lbu a1, 14(sp) +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -56 +; RV64-NEXT: insb s1, a0, 5 +; RV64-NEXT: srli a0, a1, 1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: and a1, a1, s2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: and a1, a0, s0 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, s0 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: and a0, a0, s4 +; RV64-NEXT: mv a1, s3 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: lbu a1, 15(sp) +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -56 +; RV64-NEXT: insb s1, a0, 6 +; RV64-NEXT: srli a0, a1, 1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: and a1, a1, s2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: and a1, a0, s0 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, s0 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: and a0, a0, s4 +; RV64-NEXT: mv a1, s3 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -56 +; RV64-NEXT: insb s1, a0, 7 +; RV64-NEXT: mv a0, s1 +; RV64-NEXT: ld s4, 16(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s3, 24(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s2, 32(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s1, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 64 +; RV64-NEXT: ret + %a = bitcast i64 %x to <8 x i8> + %b = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false) + %c = bitcast <8 x i8> %b to i64 + ret i64 %c +} + +define i64 @clzv4i16(i64 %x, i1) { +; RV32-LABEL: clzv4i16: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 40(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 36(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 32(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s3, 28(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s4, 24(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s5, 20(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: .cfi_offset s0, -8 +; RV32-NEXT: .cfi_offset s1, -12 +; RV32-NEXT: .cfi_offset s2, -16 +; RV32-NEXT: .cfi_offset s3, -20 +; RV32-NEXT: .cfi_offset s4, -24 +; RV32-NEXT: .cfi_offset s5, -28 +; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a0, 12(sp) +; RV32-NEXT: lhu a0, 16(sp) +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: not a0, a0 +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: lui a2, 349525 +; RV32-NEXT: addi s3, a2, 1365 +; RV32-NEXT: and a1, a1, s3 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi s1, a1, 819 +; RV32-NEXT: and a1, a0, s1 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: and a0, a0, s1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi s4, a1, -241 +; RV32-NEXT: and a0, a0, s4 +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi s0, a1, 257 +; RV32-NEXT: mv a1, s0 +; RV32-NEXT: call __mulsi3@plt +; RV32-NEXT: lhu a1, 18(sp) +; RV32-NEXT: srli a0, a0, 24 +; RV32-NEXT: addi a0, a0, -16 +; RV32-NEXT: pktb16 s2, zero, a0 +; RV32-NEXT: srli a0, a1, 1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: srli a1, a0, 2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: not a0, a0 +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: and a1, a1, s3 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: and a1, a0, s1 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: and a0, a0, s1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: and a0, a0, s4 +; RV32-NEXT: mv a1, s0 +; RV32-NEXT: call __mulsi3@plt +; RV32-NEXT: lhu a1, 12(sp) +; RV32-NEXT: srli a0, a0, 24 +; RV32-NEXT: addi a0, a0, -16 +; RV32-NEXT: pkbb16 s2, a0, s2 +; RV32-NEXT: srli a0, a1, 1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: srli a1, a0, 2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: not a0, a0 +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: and a1, a1, s3 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: and a1, a0, s1 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: and a0, a0, s1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: and a0, a0, s4 +; RV32-NEXT: mv a1, s0 +; RV32-NEXT: call __mulsi3@plt +; RV32-NEXT: lhu a1, 14(sp) +; RV32-NEXT: srli a0, a0, 24 +; RV32-NEXT: addi a0, a0, -16 +; RV32-NEXT: pktb16 s5, zero, a0 +; RV32-NEXT: srli a0, a1, 1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: srli a1, a0, 2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: not a0, a0 +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: and a1, a1, s3 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: and a1, a0, s1 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: and a0, a0, s1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: and a0, a0, s4 +; RV32-NEXT: mv a1, s0 +; RV32-NEXT: call __mulsi3@plt +; RV32-NEXT: srli a0, a0, 24 +; RV32-NEXT: addi a0, a0, -16 +; RV32-NEXT: pkbb16 a0, a0, s5 +; RV32-NEXT: mv a1, s2 +; RV32-NEXT: lw s5, 20(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s4, 24(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s3, 28(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 32(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 36(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 40(sp) # 4-byte Folded Reload +; RV32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 48 +; RV32-NEXT: ret +; +; RV64-LABEL: clzv4i16: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -64 +; RV64-NEXT: .cfi_def_cfa_offset 64 +; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s1, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s2, 32(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s3, 24(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s4, 16(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s5, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: .cfi_offset s0, -16 +; RV64-NEXT: .cfi_offset s1, -24 +; RV64-NEXT: .cfi_offset s2, -32 +; RV64-NEXT: .cfi_offset s3, -40 +; RV64-NEXT: .cfi_offset s4, -48 +; RV64-NEXT: .cfi_offset s5, -56 +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lhu a0, 0(sp) +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: lui a2, 21845 +; RV64-NEXT: addiw a2, a2, 1365 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi a2, a2, 1365 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi a2, a2, 1365 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi s2, a2, 1365 +; RV64-NEXT: and a1, a1, s2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: lui a1, 13107 +; RV64-NEXT: addiw a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi s1, a1, 819 +; RV64-NEXT: and a1, a0, s1 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, s1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: lui a1, 3855 +; RV64-NEXT: addiw a1, a1, 241 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, -241 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 241 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi s3, a1, -241 +; RV64-NEXT: and a0, a0, s3 +; RV64-NEXT: lui a1, 4112 +; RV64-NEXT: addiw a1, a1, 257 +; RV64-NEXT: slli a1, a1, 16 +; RV64-NEXT: addi a1, a1, 257 +; RV64-NEXT: slli a1, a1, 16 +; RV64-NEXT: addi s4, a1, 257 +; RV64-NEXT: mv a1, s4 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: lhu a1, 2(sp) +; RV64-NEXT: addi a0, a0, -48 +; RV64-NEXT: pktb16 a0, zero, a0 +; RV64-NEXT: pktb32 s0, zero, a0 +; RV64-NEXT: srli a0, a1, 1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: and a1, a1, s2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: and a1, a0, s1 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, s1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: and a0, a0, s3 +; RV64-NEXT: mv a1, s4 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -48 +; RV64-NEXT: lhu a1, 4(sp) +; RV64-NEXT: pkbb16 a0, a0, s0 +; RV64-NEXT: pktb32 s0, s0, a0 +; RV64-NEXT: pkbt32 s5, s0, s0 +; RV64-NEXT: srli a0, a1, 1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: and a1, a1, s2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: and a1, a0, s1 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, s1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: and a0, a0, s3 +; RV64-NEXT: mv a1, s4 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -48 +; RV64-NEXT: lhu a1, 6(sp) +; RV64-NEXT: pktb16 a0, s5, a0 +; RV64-NEXT: pkbb32 s0, a0, s0 +; RV64-NEXT: pkbt32 s5, s0, s0 +; RV64-NEXT: srli a0, a1, 1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: and a1, a1, s2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: and a1, a0, s1 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, s1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: and a0, a0, s3 +; RV64-NEXT: mv a1, s4 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -48 +; RV64-NEXT: pkbb16 a0, a0, s5 +; RV64-NEXT: pkbb32 a0, a0, s0 +; RV64-NEXT: ld s5, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s4, 16(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s3, 24(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s2, 32(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s1, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 64 +; RV64-NEXT: ret + %a = bitcast i64 %x to <4 x i16> + %b = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1 false) + %c = bitcast <4 x i16> %b to i64 + ret i64 %c +} + +define i64 @clzv2i32(i64 %x, i1) { +; RV32-LABEL: clzv2i32: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s4, 8(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s5, 4(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: .cfi_offset s0, -8 +; RV32-NEXT: .cfi_offset s1, -12 +; RV32-NEXT: .cfi_offset s2, -16 +; RV32-NEXT: .cfi_offset s3, -20 +; RV32-NEXT: .cfi_offset s4, -24 +; RV32-NEXT: .cfi_offset s5, -28 +; RV32-NEXT: mv s3, a1 +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: not a0, a0 +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: lui a2, 349525 +; RV32-NEXT: addi s4, a2, 1365 +; RV32-NEXT: and a1, a1, s4 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi s0, a1, 819 +; RV32-NEXT: and a1, a0, s0 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: and a0, a0, s0 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi s5, a1, -241 +; RV32-NEXT: and a0, a0, s5 +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi s1, a1, 257 +; RV32-NEXT: mv a1, s1 +; RV32-NEXT: call __mulsi3@plt +; RV32-NEXT: srli s2, a0, 24 +; RV32-NEXT: srli a0, s3, 1 +; RV32-NEXT: or a0, s3, a0 +; RV32-NEXT: srli a1, a0, 2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srli a1, a0, 16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: not a0, a0 +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: and a1, a1, s4 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: and a1, a0, s0 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: and a0, a0, s0 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: and a0, a0, s5 +; RV32-NEXT: mv a1, s1 +; RV32-NEXT: call __mulsi3@plt +; RV32-NEXT: srli a1, a0, 24 +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: lw s5, 4(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s4, 8(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: ret +; +; RV64-LABEL: clzv2i32: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -64 +; RV64-NEXT: .cfi_def_cfa_offset 64 +; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s1, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s2, 32(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s3, 24(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s4, 16(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: .cfi_offset s0, -16 +; RV64-NEXT: .cfi_offset s1, -24 +; RV64-NEXT: .cfi_offset s2, -32 +; RV64-NEXT: .cfi_offset s3, -40 +; RV64-NEXT: .cfi_offset s4, -48 +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: lwu a0, 8(sp) +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: lui a2, 21845 +; RV64-NEXT: addiw a2, a2, 1365 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi a2, a2, 1365 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi a2, a2, 1365 +; RV64-NEXT: slli a2, a2, 12 +; RV64-NEXT: addi s2, a2, 1365 +; RV64-NEXT: and a1, a1, s2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: lui a1, 13107 +; RV64-NEXT: addiw a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi s1, a1, 819 +; RV64-NEXT: and a1, a0, s1 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, s1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: lui a1, 3855 +; RV64-NEXT: addiw a1, a1, 241 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, -241 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 241 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi s3, a1, -241 +; RV64-NEXT: and a0, a0, s3 +; RV64-NEXT: lui a1, 4112 +; RV64-NEXT: addiw a1, a1, 257 +; RV64-NEXT: slli a1, a1, 16 +; RV64-NEXT: addi a1, a1, 257 +; RV64-NEXT: slli a1, a1, 16 +; RV64-NEXT: addi s0, a1, 257 +; RV64-NEXT: mv a1, s0 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: lwu a1, 12(sp) +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -32 +; RV64-NEXT: pktb32 s4, zero, a0 +; RV64-NEXT: srli a0, a1, 1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: srli a1, a0, 2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 8 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 16 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: srli a1, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: and a1, a1, s2 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: and a1, a0, s1 +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: and a0, a0, s1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: and a0, a0, s3 +; RV64-NEXT: mv a1, s0 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: srli a0, a0, 56 +; RV64-NEXT: addi a0, a0, -32 +; RV64-NEXT: pkbb32 a0, a0, s4 +; RV64-NEXT: ld s4, 16(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s3, 24(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s2, 32(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s1, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 64 +; RV64-NEXT: ret + %a = bitcast i64 %x to <2 x i32> + %b = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) + %c = bitcast <2 x i32> %b to i64 + ret i64 %c +} + +declare i8 @llvm.ctlz.i8(i8, i1) +declare i16 @llvm.ctlz.i16(i16, i1) +declare i32 @llvm.ctlz.i32(i32, i1) +declare <4 x i8> @llvm.ctlz.v4i8(<4 x i8>, i1) +declare <2 x i16> @llvm.ctlz.v2i16(<2 x i16>, i1) +declare <8 x i8> @llvm.ctlz.v8i8(<8 x i8>, i1) +declare <4 x i16> @llvm.ctlz.v4i16(<4 x i16>, i1) +declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1)