diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -828,6 +828,8 @@ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::CTLZ, VT, Legal); + // Promote load and store operations. setOperationAction(ISD::LOAD, VT, Promote); AddPromotedToType(ISD::LOAD, VT, PromotedBitwiseVT); @@ -842,6 +844,9 @@ } else { addTypeForP(MVT::v4i8, MVT::i32); addTypeForP(MVT::v2i16, MVT::i32); + + // clo32 and clz32 support i32 type on RV32. + setOperationAction(ISD::CTLZ, MVT::i32, Legal); } setOperationAction(ISD::BSWAP, XLenVT, Legal); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td @@ -39,6 +39,10 @@ def riscv_pktt32 : SDNode<"RISCVISD::PKTT32", SDT_RISCVIntBinOp>; def riscv_pktb32 : SDNode<"RISCVISD::PKTB32", SDT_RISCVIntBinOp>; +def rvp_vnot : PatFrags<(ops node:$in), + [(xor node:$in, (bitconvert (XLenVT -1))), + (xor node:$in, -1)]>; + def uimm3 : Operand, ImmLeaf(Imm);}]> { let ParserMatchClass = UImmAsmOperand<3>; let DecoderMethod = "decodeUImmOperand<3>"; @@ -1347,3 +1351,13 @@ def : Pat<(VT (vselect GPR:$rc, GPR:$rs1, GPR:$rs2)), (BPICK GPR:$rs1, GPR:$rs2, GPR:$rc)>; } // Predicates = [HasStdExtZpn] + +// CLTZ +let Predicates = [HasStdExtZpn] in { +def : Pat<(XVEI8VT (ctlz XVEI8VT:$rs1)), (CLZ8 GPR:$rs1)>; +def : Pat<(XVEI16VT (ctlz XVEI16VT:$rs1)), (CLZ16 GPR:$rs1)>; +def : Pat<(XVEI32VT (ctlz XVEI32VT:$rs1)), (CLZ32 GPR:$rs1)>; +def : Pat<(XVEI8VT (ctlz (rvp_vnot XVEI8VT:$rs1))), (CLO8 GPR:$rs1)>; +def : Pat<(XVEI16VT (ctlz (rvp_vnot XVEI16VT:$rs1))), (CLO16 GPR:$rs1)>; +def : Pat<(XVEI32VT (ctlz (rvp_vnot XVEI32VT:$rs1))), (CLO32 GPR:$rs1)>; +} // Predicates = [HasStdExtZpn] diff --git a/llvm/test/CodeGen/RISCV/rvp/clo.ll b/llvm/test/CodeGen/RISCV/rvp/clo.ll --- a/llvm/test/CodeGen/RISCV/rvp/clo.ll +++ b/llvm/test/CodeGen/RISCV/rvp/clo.ll @@ -4,57 +4,22 @@ ; RUN: llc -mtriple=riscv64 -mattr=+experimental-p -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=RV64 +; TODO: cltz with i8, i16 and i32 types could be optimized to clo8, clo16 and clo32. + define i8 @clo8(i8 %x) { ; RV32-LABEL: clo8: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: andi a1, a0, 255 ; RV32-NEXT: addi a2, zero, 255 ; RV32-NEXT: beq a1, a2, .LBB0_2 ; RV32-NEXT: # %bb.1: # %cond.false ; RV32-NEXT: not a0, a0 ; RV32-NEXT: andi a0, a0, 255 -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 2 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 8 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 16 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: not a0, a0 -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: lui a2, 349525 -; RV32-NEXT: addi a2, a2, 1365 -; RV32-NEXT: and a1, a1, a2 -; RV32-NEXT: sub a0, a0, a1 -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: and a2, a0, a1 -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: add a0, a2, a0 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: call __mulsi3@plt -; RV32-NEXT: srli a0, a0, 24 +; RV32-NEXT: clz32 a0, a0 ; RV32-NEXT: addi a0, a0, -24 -; RV32-NEXT: j .LBB0_3 +; RV32-NEXT: ret ; RV32-NEXT: .LBB0_2: ; RV32-NEXT: addi a0, zero, 8 -; RV32-NEXT: .LBB0_3: # %cond.end -; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: clo8: @@ -140,10 +105,6 @@ define i16 @clo16(i16 %x) { ; RV32-LABEL: clo16: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: lui a1, 16 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a2, a0, a1 @@ -151,44 +112,11 @@ ; RV32-NEXT: # %bb.1: # %cond.false ; RV32-NEXT: not a0, a0 ; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 2 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 8 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 16 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: not a0, a0 -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: lui a2, 349525 -; RV32-NEXT: addi a2, a2, 1365 -; RV32-NEXT: and a1, a1, a2 -; RV32-NEXT: sub a0, a0, a1 -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: and a2, a0, a1 -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: add a0, a2, a0 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: call __mulsi3@plt -; RV32-NEXT: srli a0, a0, 24 +; RV32-NEXT: clz32 a0, a0 ; RV32-NEXT: addi a0, a0, -16 -; RV32-NEXT: j .LBB1_3 +; RV32-NEXT: ret ; RV32-NEXT: .LBB1_2: ; RV32-NEXT: addi a0, zero, 16 -; RV32-NEXT: .LBB1_3: # %cond.end -; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: clo16: @@ -275,51 +203,14 @@ define i32 @clo32(i32 %x) { ; RV32-LABEL: clo32: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: addi a1, zero, -1 ; RV32-NEXT: beq a0, a1, .LBB2_2 ; RV32-NEXT: # %bb.1: # %cond.false ; RV32-NEXT: not a0, a0 -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 2 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 8 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 16 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: not a0, a0 -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: lui a2, 349525 -; RV32-NEXT: addi a2, a2, 1365 -; RV32-NEXT: and a1, a1, a2 -; RV32-NEXT: sub a0, a0, a1 -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: and a2, a0, a1 -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: add a0, a2, a0 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: call __mulsi3@plt -; RV32-NEXT: srli a0, a0, 24 -; RV32-NEXT: j .LBB2_3 +; RV32-NEXT: clz32 a0, a0 +; RV32-NEXT: ret ; RV32-NEXT: .LBB2_2: ; RV32-NEXT: addi a0, zero, 32 -; RV32-NEXT: .LBB2_3: # %cond.end -; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: clo32: @@ -406,440 +297,12 @@ define i32 @clov4i8(i32 %x, i1) { ; RV32-LABEL: clov4i8: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: .cfi_offset s1, -12 -; RV32-NEXT: .cfi_offset s2, -16 -; RV32-NEXT: .cfi_offset s3, -20 -; RV32-NEXT: .cfi_offset s4, -24 -; RV32-NEXT: addi a1, zero, -1 -; RV32-NEXT: xor a0, a0, a1 -; RV32-NEXT: sw a0, 4(sp) -; RV32-NEXT: lbu a0, 4(sp) -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 2 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 8 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 16 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: not a0, a0 -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: lui a2, 349525 -; RV32-NEXT: addi s2, a2, 1365 -; RV32-NEXT: and a1, a1, s2 -; RV32-NEXT: sub a0, a0, a1 -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi s0, a1, 819 -; RV32-NEXT: and a1, a0, s0 -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: and a0, a0, s0 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi s4, a1, -241 -; RV32-NEXT: and a0, a0, s4 -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi s3, a1, 257 -; RV32-NEXT: mv a1, s3 -; RV32-NEXT: call __mulsi3@plt -; RV32-NEXT: mv s1, zero -; RV32-NEXT: lbu a1, 5(sp) -; RV32-NEXT: srli a0, a0, 24 -; RV32-NEXT: addi a0, a0, -24 -; RV32-NEXT: insb s1, a0, 0 -; RV32-NEXT: srli a0, a1, 1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: srli a1, a0, 2 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 8 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 16 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: not a0, a0 -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: and a1, a1, s2 -; RV32-NEXT: sub a0, a0, a1 -; RV32-NEXT: and a1, a0, s0 -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: and a0, a0, s0 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: and a0, a0, s4 -; RV32-NEXT: mv a1, s3 -; RV32-NEXT: call __mulsi3@plt -; RV32-NEXT: lbu a1, 6(sp) -; RV32-NEXT: srli a0, a0, 24 -; RV32-NEXT: addi a0, a0, -24 -; RV32-NEXT: insb s1, a0, 1 -; RV32-NEXT: srli a0, a1, 1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: srli a1, a0, 2 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 8 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 16 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: not a0, a0 -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: and a1, a1, s2 -; RV32-NEXT: sub a0, a0, a1 -; RV32-NEXT: and a1, a0, s0 -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: and a0, a0, s0 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: and a0, a0, s4 -; RV32-NEXT: mv a1, s3 -; RV32-NEXT: call __mulsi3@plt -; RV32-NEXT: lbu a1, 7(sp) -; RV32-NEXT: srli a0, a0, 24 -; RV32-NEXT: addi a0, a0, -24 -; RV32-NEXT: insb s1, a0, 2 -; RV32-NEXT: srli a0, a1, 1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: srli a1, a0, 2 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 8 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 16 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: not a0, a0 -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: and a1, a1, s2 -; RV32-NEXT: sub a0, a0, a1 -; RV32-NEXT: and a1, a0, s0 -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: and a0, a0, s0 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: and a0, a0, s4 -; RV32-NEXT: mv a1, s3 -; RV32-NEXT: call __mulsi3@plt -; RV32-NEXT: srli a0, a0, 24 -; RV32-NEXT: addi a0, a0, -24 -; RV32-NEXT: insb s1, a0, 3 -; RV32-NEXT: mv a0, s1 -; RV32-NEXT: lw s4, 8(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s2, 16(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: clo8 a0, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: clov4i8: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -64 -; RV64-NEXT: .cfi_def_cfa_offset 64 -; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s1, 40(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s2, 32(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s3, 24(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s4, 16(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: .cfi_offset s0, -16 -; RV64-NEXT: .cfi_offset s1, -24 -; RV64-NEXT: .cfi_offset s2, -32 -; RV64-NEXT: .cfi_offset s3, -40 -; RV64-NEXT: .cfi_offset s4, -48 -; RV64-NEXT: addi a1, zero, -1 -; RV64-NEXT: xor a0, a0, a1 -; RV64-NEXT: sd a0, 8(sp) -; RV64-NEXT: lbu a0, 8(sp) -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 8 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 16 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: not a0, a0 -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: lui a2, 21845 -; RV64-NEXT: addiw a2, a2, 1365 -; RV64-NEXT: slli a2, a2, 12 -; RV64-NEXT: addi a2, a2, 1365 -; RV64-NEXT: slli a2, a2, 12 -; RV64-NEXT: addi a2, a2, 1365 -; RV64-NEXT: slli a2, a2, 12 -; RV64-NEXT: addi s2, a2, 1365 -; RV64-NEXT: and a1, a1, s2 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: lui a1, 13107 -; RV64-NEXT: addiw a1, a1, 819 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 819 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 819 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi s0, a1, 819 -; RV64-NEXT: and a1, a0, s0 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: and a0, a0, s0 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: lui a1, 3855 -; RV64-NEXT: addiw a1, a1, 241 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -241 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 241 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi s4, a1, -241 -; RV64-NEXT: and a0, a0, s4 -; RV64-NEXT: lui a1, 4112 -; RV64-NEXT: addiw a1, a1, 257 -; RV64-NEXT: slli a1, a1, 16 -; RV64-NEXT: addi a1, a1, 257 -; RV64-NEXT: slli a1, a1, 16 -; RV64-NEXT: addi s3, a1, 257 -; RV64-NEXT: mv a1, s3 -; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: mv s1, zero -; RV64-NEXT: lbu a1, 9(sp) -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: addi a0, a0, -56 -; RV64-NEXT: insb s1, a0, 0 -; RV64-NEXT: srli a0, a1, 1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: srli a1, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 8 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 16 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: not a0, a0 -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: and a1, a1, s2 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: and a1, a0, s0 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: and a0, a0, s0 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: and a0, a0, s4 -; RV64-NEXT: mv a1, s3 -; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: lbu a1, 10(sp) -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: addi a0, a0, -56 -; RV64-NEXT: insb s1, a0, 1 -; RV64-NEXT: srli a0, a1, 1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: srli a1, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 8 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 16 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: not a0, a0 -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: and a1, a1, s2 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: and a1, a0, s0 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: and a0, a0, s0 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: and a0, a0, s4 -; RV64-NEXT: mv a1, s3 -; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: lbu a1, 11(sp) -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: addi a0, a0, -56 -; RV64-NEXT: insb s1, a0, 2 -; RV64-NEXT: srli a0, a1, 1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: srli a1, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 8 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 16 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: not a0, a0 -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: and a1, a1, s2 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: and a1, a0, s0 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: and a0, a0, s0 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: and a0, a0, s4 -; RV64-NEXT: mv a1, s3 -; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: lbu a1, 12(sp) -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: addi a0, a0, -56 -; RV64-NEXT: insb s1, a0, 3 -; RV64-NEXT: srli a0, a1, 1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: srli a1, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 8 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 16 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: not a0, a0 -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: and a1, a1, s2 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: and a1, a0, s0 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: and a0, a0, s0 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: and a0, a0, s4 -; RV64-NEXT: mv a1, s3 -; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: lbu a1, 13(sp) -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: addi a0, a0, -56 -; RV64-NEXT: insb s1, a0, 4 -; RV64-NEXT: srli a0, a1, 1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: srli a1, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 8 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 16 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: not a0, a0 -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: and a1, a1, s2 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: and a1, a0, s0 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: and a0, a0, s0 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: and a0, a0, s4 -; RV64-NEXT: mv a1, s3 -; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: lbu a1, 14(sp) -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: addi a0, a0, -56 -; RV64-NEXT: insb s1, a0, 5 -; RV64-NEXT: srli a0, a1, 1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: srli a1, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 8 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 16 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: not a0, a0 -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: and a1, a1, s2 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: and a1, a0, s0 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: and a0, a0, s0 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: and a0, a0, s4 -; RV64-NEXT: mv a1, s3 -; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: lbu a1, 15(sp) -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: addi a0, a0, -56 -; RV64-NEXT: insb s1, a0, 6 -; RV64-NEXT: srli a0, a1, 1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: srli a1, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 8 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 16 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: not a0, a0 -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: and a1, a1, s2 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: and a1, a0, s0 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: and a0, a0, s0 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: and a0, a0, s4 -; RV64-NEXT: mv a1, s3 -; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: addi a0, a0, -56 -; RV64-NEXT: insb s1, a0, 7 -; RV64-NEXT: mv a0, s1 -; RV64-NEXT: ld s4, 16(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s3, 24(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s2, 32(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s1, 40(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 64 +; RV64-NEXT: clo8 a0, a0 ; RV64-NEXT: ret %a = bitcast i32 %x to <4 x i8> %neg = xor <4 x i8> %a, @@ -851,275 +314,12 @@ define i32 @clov2i16(i32 %x, i1) { ; RV32-LABEL: clov2i16: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: .cfi_offset s1, -12 -; RV32-NEXT: .cfi_offset s2, -16 -; RV32-NEXT: .cfi_offset s3, -20 -; RV32-NEXT: .cfi_offset s4, -24 -; RV32-NEXT: addi a1, zero, -1 -; RV32-NEXT: xor a0, a0, a1 -; RV32-NEXT: sw a0, 4(sp) -; RV32-NEXT: lhu a0, 4(sp) -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 2 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 8 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 16 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: not a0, a0 -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: lui a2, 349525 -; RV32-NEXT: addi s2, a2, 1365 -; RV32-NEXT: and a1, a1, s2 -; RV32-NEXT: sub a0, a0, a1 -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi s1, a1, 819 -; RV32-NEXT: and a1, a0, s1 -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: and a0, a0, s1 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi s3, a1, -241 -; RV32-NEXT: and a0, a0, s3 -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi s0, a1, 257 -; RV32-NEXT: mv a1, s0 -; RV32-NEXT: call __mulsi3@plt -; RV32-NEXT: lhu a1, 6(sp) -; RV32-NEXT: srli a0, a0, 24 -; RV32-NEXT: addi a0, a0, -16 -; RV32-NEXT: pktb16 s4, zero, a0 -; RV32-NEXT: srli a0, a1, 1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: srli a1, a0, 2 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 8 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 16 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: not a0, a0 -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: and a1, a1, s2 -; RV32-NEXT: sub a0, a0, a1 -; RV32-NEXT: and a1, a0, s1 -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: and a0, a0, s1 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: and a0, a0, s3 -; RV32-NEXT: mv a1, s0 -; RV32-NEXT: call __mulsi3@plt -; RV32-NEXT: srli a0, a0, 24 -; RV32-NEXT: addi a0, a0, -16 -; RV32-NEXT: pkbb16 a0, a0, s4 -; RV32-NEXT: lw s4, 8(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s2, 16(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: clo16 a0, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: clov2i16: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -64 -; RV64-NEXT: .cfi_def_cfa_offset 64 -; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s1, 40(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s2, 32(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s3, 24(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s4, 16(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s5, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: .cfi_offset s0, -16 -; RV64-NEXT: .cfi_offset s1, -24 -; RV64-NEXT: .cfi_offset s2, -32 -; RV64-NEXT: .cfi_offset s3, -40 -; RV64-NEXT: .cfi_offset s4, -48 -; RV64-NEXT: .cfi_offset s5, -56 -; RV64-NEXT: addi a1, zero, -1 -; RV64-NEXT: xor a0, a0, a1 -; RV64-NEXT: sd a0, 0(sp) -; RV64-NEXT: lhu a0, 0(sp) -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 8 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 16 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: not a0, a0 -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: lui a2, 21845 -; RV64-NEXT: addiw a2, a2, 1365 -; RV64-NEXT: slli a2, a2, 12 -; RV64-NEXT: addi a2, a2, 1365 -; RV64-NEXT: slli a2, a2, 12 -; RV64-NEXT: addi a2, a2, 1365 -; RV64-NEXT: slli a2, a2, 12 -; RV64-NEXT: addi s2, a2, 1365 -; RV64-NEXT: and a1, a1, s2 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: lui a1, 13107 -; RV64-NEXT: addiw a1, a1, 819 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 819 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 819 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi s1, a1, 819 -; RV64-NEXT: and a1, a0, s1 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: and a0, a0, s1 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: lui a1, 3855 -; RV64-NEXT: addiw a1, a1, 241 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -241 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 241 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi s3, a1, -241 -; RV64-NEXT: and a0, a0, s3 -; RV64-NEXT: lui a1, 4112 -; RV64-NEXT: addiw a1, a1, 257 -; RV64-NEXT: slli a1, a1, 16 -; RV64-NEXT: addi a1, a1, 257 -; RV64-NEXT: slli a1, a1, 16 -; RV64-NEXT: addi s4, a1, 257 -; RV64-NEXT: mv a1, s4 -; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: lhu a1, 2(sp) -; RV64-NEXT: addi a0, a0, -48 -; RV64-NEXT: pktb16 a0, zero, a0 -; RV64-NEXT: pktb32 s0, zero, a0 -; RV64-NEXT: srli a0, a1, 1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: srli a1, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 8 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 16 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: not a0, a0 -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: and a1, a1, s2 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: and a1, a0, s1 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: and a0, a0, s1 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: and a0, a0, s3 -; RV64-NEXT: mv a1, s4 -; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: addi a0, a0, -48 -; RV64-NEXT: lhu a1, 4(sp) -; RV64-NEXT: pkbb16 a0, a0, s0 -; RV64-NEXT: pktb32 s0, s0, a0 -; RV64-NEXT: pkbt32 s5, s0, s0 -; RV64-NEXT: srli a0, a1, 1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: srli a1, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 8 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 16 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: not a0, a0 -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: and a1, a1, s2 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: and a1, a0, s1 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: and a0, a0, s1 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: and a0, a0, s3 -; RV64-NEXT: mv a1, s4 -; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: addi a0, a0, -48 -; RV64-NEXT: lhu a1, 6(sp) -; RV64-NEXT: pktb16 a0, s5, a0 -; RV64-NEXT: pkbb32 s0, a0, s0 -; RV64-NEXT: pkbt32 s5, s0, s0 -; RV64-NEXT: srli a0, a1, 1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: srli a1, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 8 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 16 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: not a0, a0 -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: and a1, a1, s2 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: and a1, a0, s1 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: and a0, a0, s1 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: and a0, a0, s3 -; RV64-NEXT: mv a1, s4 -; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: addi a0, a0, -48 -; RV64-NEXT: pkbb16 a0, a0, s5 -; RV64-NEXT: pkbb32 a0, a0, s0 -; RV64-NEXT: ld s5, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s4, 16(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s3, 24(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s2, 32(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s1, 40(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 64 +; RV64-NEXT: clo16 a0, a0 ; RV64-NEXT: ret %a = bitcast i32 %x to <2 x i16> %neg = xor <2 x i16> %a, @@ -1131,555 +331,13 @@ define i64 @clov8i8(i64 %x, i1) { ; RV32-LABEL: clov8i8: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -48 -; RV32-NEXT: .cfi_def_cfa_offset 48 -; RV32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 40(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s1, 36(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s2, 32(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s3, 28(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s4, 24(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s5, 20(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: .cfi_offset s1, -12 -; RV32-NEXT: .cfi_offset s2, -16 -; RV32-NEXT: .cfi_offset s3, -20 -; RV32-NEXT: .cfi_offset s4, -24 -; RV32-NEXT: .cfi_offset s5, -28 -; RV32-NEXT: addi a2, zero, -1 -; RV32-NEXT: xor a0, a0, a2 -; RV32-NEXT: xor a1, a1, a2 -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: lbu a0, 16(sp) -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 2 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 8 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 16 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: not a0, a0 -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: lui a2, 349525 -; RV32-NEXT: addi s3, a2, 1365 -; RV32-NEXT: and a1, a1, s3 -; RV32-NEXT: sub a0, a0, a1 -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi s0, a1, 819 -; RV32-NEXT: and a1, a0, s0 -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: and a0, a0, s0 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi s5, a1, -241 -; RV32-NEXT: and a0, a0, s5 -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi s2, a1, 257 -; RV32-NEXT: mv a1, s2 -; RV32-NEXT: call __mulsi3@plt -; RV32-NEXT: mv s4, zero -; RV32-NEXT: lbu a1, 17(sp) -; RV32-NEXT: srli a0, a0, 24 -; RV32-NEXT: addi a0, a0, -24 -; RV32-NEXT: insb s4, a0, 0 -; RV32-NEXT: srli a0, a1, 1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: srli a1, a0, 2 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 8 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 16 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: not a0, a0 -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: and a1, a1, s3 -; RV32-NEXT: sub a0, a0, a1 -; RV32-NEXT: and a1, a0, s0 -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: and a0, a0, s0 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: and a0, a0, s5 -; RV32-NEXT: mv a1, s2 -; RV32-NEXT: call __mulsi3@plt -; RV32-NEXT: lbu a1, 18(sp) -; RV32-NEXT: srli a0, a0, 24 -; RV32-NEXT: addi a0, a0, -24 -; RV32-NEXT: insb s4, a0, 1 -; RV32-NEXT: srli a0, a1, 1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: srli a1, a0, 2 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 8 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 16 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: not a0, a0 -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: and a1, a1, s3 -; RV32-NEXT: sub a0, a0, a1 -; RV32-NEXT: and a1, a0, s0 -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: and a0, a0, s0 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: and a0, a0, s5 -; RV32-NEXT: mv a1, s2 -; RV32-NEXT: call __mulsi3@plt -; RV32-NEXT: lbu a1, 19(sp) -; RV32-NEXT: srli a0, a0, 24 -; RV32-NEXT: addi a0, a0, -24 -; RV32-NEXT: insb s4, a0, 2 -; RV32-NEXT: srli a0, a1, 1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: srli a1, a0, 2 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 8 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 16 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: not a0, a0 -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: and a1, a1, s3 -; RV32-NEXT: sub a0, a0, a1 -; RV32-NEXT: and a1, a0, s0 -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: and a0, a0, s0 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: and a0, a0, s5 -; RV32-NEXT: mv a1, s2 -; RV32-NEXT: call __mulsi3@plt -; RV32-NEXT: lbu a1, 12(sp) -; RV32-NEXT: srli a0, a0, 24 -; RV32-NEXT: addi a0, a0, -24 -; RV32-NEXT: insb s4, a0, 3 -; RV32-NEXT: srli a0, a1, 1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: srli a1, a0, 2 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 8 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 16 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: not a0, a0 -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: and a1, a1, s3 -; RV32-NEXT: sub a0, a0, a1 -; RV32-NEXT: and a1, a0, s0 -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: and a0, a0, s0 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: and a0, a0, s5 -; RV32-NEXT: mv a1, s2 -; RV32-NEXT: call __mulsi3@plt -; RV32-NEXT: mv s1, zero -; RV32-NEXT: lbu a1, 13(sp) -; RV32-NEXT: srli a0, a0, 24 -; RV32-NEXT: addi a0, a0, -24 -; RV32-NEXT: insb s1, a0, 0 -; RV32-NEXT: srli a0, a1, 1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: srli a1, a0, 2 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 8 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 16 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: not a0, a0 -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: and a1, a1, s3 -; RV32-NEXT: sub a0, a0, a1 -; RV32-NEXT: and a1, a0, s0 -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: and a0, a0, s0 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: and a0, a0, s5 -; RV32-NEXT: mv a1, s2 -; RV32-NEXT: call __mulsi3@plt -; RV32-NEXT: lbu a1, 14(sp) -; RV32-NEXT: srli a0, a0, 24 -; RV32-NEXT: addi a0, a0, -24 -; RV32-NEXT: insb s1, a0, 1 -; RV32-NEXT: srli a0, a1, 1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: srli a1, a0, 2 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 8 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 16 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: not a0, a0 -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: and a1, a1, s3 -; RV32-NEXT: sub a0, a0, a1 -; RV32-NEXT: and a1, a0, s0 -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: and a0, a0, s0 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: and a0, a0, s5 -; RV32-NEXT: mv a1, s2 -; RV32-NEXT: call __mulsi3@plt -; RV32-NEXT: lbu a1, 15(sp) -; RV32-NEXT: srli a0, a0, 24 -; RV32-NEXT: addi a0, a0, -24 -; RV32-NEXT: insb s1, a0, 2 -; RV32-NEXT: srli a0, a1, 1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: srli a1, a0, 2 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 8 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 16 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: not a0, a0 -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: and a1, a1, s3 -; RV32-NEXT: sub a0, a0, a1 -; RV32-NEXT: and a1, a0, s0 -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: and a0, a0, s0 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: and a0, a0, s5 -; RV32-NEXT: mv a1, s2 -; RV32-NEXT: call __mulsi3@plt -; RV32-NEXT: srli a0, a0, 24 -; RV32-NEXT: addi a0, a0, -24 -; RV32-NEXT: insb s1, a0, 3 -; RV32-NEXT: mv a0, s1 -; RV32-NEXT: mv a1, s4 -; RV32-NEXT: lw s5, 20(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s4, 24(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s3, 28(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s2, 32(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s1, 36(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s0, 40(sp) # 4-byte Folded Reload -; RV32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 48 +; RV32-NEXT: clo8 a1, a1 +; RV32-NEXT: clo8 a0, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: clov8i8: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -64 -; RV64-NEXT: .cfi_def_cfa_offset 64 -; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s1, 40(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s2, 32(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s3, 24(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s4, 16(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: .cfi_offset s0, -16 -; RV64-NEXT: .cfi_offset s1, -24 -; RV64-NEXT: .cfi_offset s2, -32 -; RV64-NEXT: .cfi_offset s3, -40 -; RV64-NEXT: .cfi_offset s4, -48 -; RV64-NEXT: addi a1, zero, -1 -; RV64-NEXT: xor a0, a0, a1 -; RV64-NEXT: sd a0, 8(sp) -; RV64-NEXT: lbu a0, 8(sp) -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 8 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 16 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: not a0, a0 -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: lui a2, 21845 -; RV64-NEXT: addiw a2, a2, 1365 -; RV64-NEXT: slli a2, a2, 12 -; RV64-NEXT: addi a2, a2, 1365 -; RV64-NEXT: slli a2, a2, 12 -; RV64-NEXT: addi a2, a2, 1365 -; RV64-NEXT: slli a2, a2, 12 -; RV64-NEXT: addi s2, a2, 1365 -; RV64-NEXT: and a1, a1, s2 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: lui a1, 13107 -; RV64-NEXT: addiw a1, a1, 819 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 819 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 819 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi s0, a1, 819 -; RV64-NEXT: and a1, a0, s0 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: and a0, a0, s0 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: lui a1, 3855 -; RV64-NEXT: addiw a1, a1, 241 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -241 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 241 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi s4, a1, -241 -; RV64-NEXT: and a0, a0, s4 -; RV64-NEXT: lui a1, 4112 -; RV64-NEXT: addiw a1, a1, 257 -; RV64-NEXT: slli a1, a1, 16 -; RV64-NEXT: addi a1, a1, 257 -; RV64-NEXT: slli a1, a1, 16 -; RV64-NEXT: addi s3, a1, 257 -; RV64-NEXT: mv a1, s3 -; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: mv s1, zero -; RV64-NEXT: lbu a1, 9(sp) -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: addi a0, a0, -56 -; RV64-NEXT: insb s1, a0, 0 -; RV64-NEXT: srli a0, a1, 1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: srli a1, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 8 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 16 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: not a0, a0 -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: and a1, a1, s2 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: and a1, a0, s0 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: and a0, a0, s0 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: and a0, a0, s4 -; RV64-NEXT: mv a1, s3 -; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: lbu a1, 10(sp) -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: addi a0, a0, -56 -; RV64-NEXT: insb s1, a0, 1 -; RV64-NEXT: srli a0, a1, 1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: srli a1, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 8 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 16 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: not a0, a0 -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: and a1, a1, s2 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: and a1, a0, s0 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: and a0, a0, s0 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: and a0, a0, s4 -; RV64-NEXT: mv a1, s3 -; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: lbu a1, 11(sp) -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: addi a0, a0, -56 -; RV64-NEXT: insb s1, a0, 2 -; RV64-NEXT: srli a0, a1, 1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: srli a1, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 8 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 16 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: not a0, a0 -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: and a1, a1, s2 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: and a1, a0, s0 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: and a0, a0, s0 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: and a0, a0, s4 -; RV64-NEXT: mv a1, s3 -; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: lbu a1, 12(sp) -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: addi a0, a0, -56 -; RV64-NEXT: insb s1, a0, 3 -; RV64-NEXT: srli a0, a1, 1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: srli a1, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 8 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 16 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: not a0, a0 -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: and a1, a1, s2 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: and a1, a0, s0 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: and a0, a0, s0 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: and a0, a0, s4 -; RV64-NEXT: mv a1, s3 -; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: lbu a1, 13(sp) -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: addi a0, a0, -56 -; RV64-NEXT: insb s1, a0, 4 -; RV64-NEXT: srli a0, a1, 1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: srli a1, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 8 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 16 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: not a0, a0 -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: and a1, a1, s2 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: and a1, a0, s0 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: and a0, a0, s0 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: and a0, a0, s4 -; RV64-NEXT: mv a1, s3 -; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: lbu a1, 14(sp) -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: addi a0, a0, -56 -; RV64-NEXT: insb s1, a0, 5 -; RV64-NEXT: srli a0, a1, 1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: srli a1, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 8 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 16 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: not a0, a0 -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: and a1, a1, s2 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: and a1, a0, s0 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: and a0, a0, s0 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: and a0, a0, s4 -; RV64-NEXT: mv a1, s3 -; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: lbu a1, 15(sp) -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: addi a0, a0, -56 -; RV64-NEXT: insb s1, a0, 6 -; RV64-NEXT: srli a0, a1, 1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: srli a1, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 8 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 16 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: not a0, a0 -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: and a1, a1, s2 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: and a1, a0, s0 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: and a0, a0, s0 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: and a0, a0, s4 -; RV64-NEXT: mv a1, s3 -; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: addi a0, a0, -56 -; RV64-NEXT: insb s1, a0, 7 -; RV64-NEXT: mv a0, s1 -; RV64-NEXT: ld s4, 16(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s3, 24(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s2, 32(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s1, 40(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 64 +; RV64-NEXT: clo8 a0, a0 ; RV64-NEXT: ret %a = bitcast i64 %x to <8 x i8> %neg = xor <8 x i8> %a, @@ -1691,335 +349,13 @@ define i64 @clov4i16(i64 %x, i1) { ; RV32-LABEL: clov4i16: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -48 -; RV32-NEXT: .cfi_def_cfa_offset 48 -; RV32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 40(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s1, 36(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s2, 32(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s3, 28(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s4, 24(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s5, 20(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: .cfi_offset s1, -12 -; RV32-NEXT: .cfi_offset s2, -16 -; RV32-NEXT: .cfi_offset s3, -20 -; RV32-NEXT: .cfi_offset s4, -24 -; RV32-NEXT: .cfi_offset s5, -28 -; RV32-NEXT: addi a2, zero, -1 -; RV32-NEXT: xor a0, a0, a2 -; RV32-NEXT: xor a1, a1, a2 -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: lhu a0, 16(sp) -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 2 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 8 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 16 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: not a0, a0 -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: lui a2, 349525 -; RV32-NEXT: addi s3, a2, 1365 -; RV32-NEXT: and a1, a1, s3 -; RV32-NEXT: sub a0, a0, a1 -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi s1, a1, 819 -; RV32-NEXT: and a1, a0, s1 -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: and a0, a0, s1 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi s4, a1, -241 -; RV32-NEXT: and a0, a0, s4 -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi s0, a1, 257 -; RV32-NEXT: mv a1, s0 -; RV32-NEXT: call __mulsi3@plt -; RV32-NEXT: lhu a1, 18(sp) -; RV32-NEXT: srli a0, a0, 24 -; RV32-NEXT: addi a0, a0, -16 -; RV32-NEXT: pktb16 s2, zero, a0 -; RV32-NEXT: srli a0, a1, 1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: srli a1, a0, 2 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 8 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 16 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: not a0, a0 -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: and a1, a1, s3 -; RV32-NEXT: sub a0, a0, a1 -; RV32-NEXT: and a1, a0, s1 -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: and a0, a0, s1 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: and a0, a0, s4 -; RV32-NEXT: mv a1, s0 -; RV32-NEXT: call __mulsi3@plt -; RV32-NEXT: lhu a1, 12(sp) -; RV32-NEXT: srli a0, a0, 24 -; RV32-NEXT: addi a0, a0, -16 -; RV32-NEXT: pkbb16 s2, a0, s2 -; RV32-NEXT: srli a0, a1, 1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: srli a1, a0, 2 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 8 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 16 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: not a0, a0 -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: and a1, a1, s3 -; RV32-NEXT: sub a0, a0, a1 -; RV32-NEXT: and a1, a0, s1 -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: and a0, a0, s1 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: and a0, a0, s4 -; RV32-NEXT: mv a1, s0 -; RV32-NEXT: call __mulsi3@plt -; RV32-NEXT: lhu a1, 14(sp) -; RV32-NEXT: srli a0, a0, 24 -; RV32-NEXT: addi a0, a0, -16 -; RV32-NEXT: pktb16 s5, zero, a0 -; RV32-NEXT: srli a0, a1, 1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: srli a1, a0, 2 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 8 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 16 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: not a0, a0 -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: and a1, a1, s3 -; RV32-NEXT: sub a0, a0, a1 -; RV32-NEXT: and a1, a0, s1 -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: and a0, a0, s1 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: and a0, a0, s4 -; RV32-NEXT: mv a1, s0 -; RV32-NEXT: call __mulsi3@plt -; RV32-NEXT: srli a0, a0, 24 -; RV32-NEXT: addi a0, a0, -16 -; RV32-NEXT: pkbb16 a0, a0, s5 -; RV32-NEXT: mv a1, s2 -; RV32-NEXT: lw s5, 20(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s4, 24(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s3, 28(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s2, 32(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s1, 36(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s0, 40(sp) # 4-byte Folded Reload -; RV32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 48 +; RV32-NEXT: clo16 a1, a1 +; RV32-NEXT: clo16 a0, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: clov4i16: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -64 -; RV64-NEXT: .cfi_def_cfa_offset 64 -; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s1, 40(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s2, 32(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s3, 24(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s4, 16(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s5, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: .cfi_offset s0, -16 -; RV64-NEXT: .cfi_offset s1, -24 -; RV64-NEXT: .cfi_offset s2, -32 -; RV64-NEXT: .cfi_offset s3, -40 -; RV64-NEXT: .cfi_offset s4, -48 -; RV64-NEXT: .cfi_offset s5, -56 -; RV64-NEXT: addi a1, zero, -1 -; RV64-NEXT: xor a0, a0, a1 -; RV64-NEXT: sd a0, 0(sp) -; RV64-NEXT: lhu a0, 0(sp) -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 8 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 16 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: not a0, a0 -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: lui a2, 21845 -; RV64-NEXT: addiw a2, a2, 1365 -; RV64-NEXT: slli a2, a2, 12 -; RV64-NEXT: addi a2, a2, 1365 -; RV64-NEXT: slli a2, a2, 12 -; RV64-NEXT: addi a2, a2, 1365 -; RV64-NEXT: slli a2, a2, 12 -; RV64-NEXT: addi s2, a2, 1365 -; RV64-NEXT: and a1, a1, s2 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: lui a1, 13107 -; RV64-NEXT: addiw a1, a1, 819 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 819 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 819 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi s1, a1, 819 -; RV64-NEXT: and a1, a0, s1 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: and a0, a0, s1 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: lui a1, 3855 -; RV64-NEXT: addiw a1, a1, 241 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -241 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 241 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi s3, a1, -241 -; RV64-NEXT: and a0, a0, s3 -; RV64-NEXT: lui a1, 4112 -; RV64-NEXT: addiw a1, a1, 257 -; RV64-NEXT: slli a1, a1, 16 -; RV64-NEXT: addi a1, a1, 257 -; RV64-NEXT: slli a1, a1, 16 -; RV64-NEXT: addi s4, a1, 257 -; RV64-NEXT: mv a1, s4 -; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: lhu a1, 2(sp) -; RV64-NEXT: addi a0, a0, -48 -; RV64-NEXT: pktb16 a0, zero, a0 -; RV64-NEXT: pktb32 s0, zero, a0 -; RV64-NEXT: srli a0, a1, 1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: srli a1, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 8 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 16 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: not a0, a0 -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: and a1, a1, s2 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: and a1, a0, s1 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: and a0, a0, s1 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: and a0, a0, s3 -; RV64-NEXT: mv a1, s4 -; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: addi a0, a0, -48 -; RV64-NEXT: lhu a1, 4(sp) -; RV64-NEXT: pkbb16 a0, a0, s0 -; RV64-NEXT: pktb32 s0, s0, a0 -; RV64-NEXT: pkbt32 s5, s0, s0 -; RV64-NEXT: srli a0, a1, 1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: srli a1, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 8 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 16 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: not a0, a0 -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: and a1, a1, s2 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: and a1, a0, s1 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: and a0, a0, s1 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: and a0, a0, s3 -; RV64-NEXT: mv a1, s4 -; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: addi a0, a0, -48 -; RV64-NEXT: lhu a1, 6(sp) -; RV64-NEXT: pktb16 a0, s5, a0 -; RV64-NEXT: pkbb32 s0, a0, s0 -; RV64-NEXT: pkbt32 s5, s0, s0 -; RV64-NEXT: srli a0, a1, 1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: srli a1, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 8 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 16 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: not a0, a0 -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: and a1, a1, s2 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: and a1, a0, s1 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: and a0, a0, s1 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: and a0, a0, s3 -; RV64-NEXT: mv a1, s4 -; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: addi a0, a0, -48 -; RV64-NEXT: pkbb16 a0, a0, s5 -; RV64-NEXT: pkbb32 a0, a0, s0 -; RV64-NEXT: ld s5, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s4, 16(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s3, 24(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s2, 32(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s1, 40(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 64 +; RV64-NEXT: clo16 a0, a0 ; RV64-NEXT: ret %a = bitcast i64 %x to <4 x i16> %neg = xor <4 x i16> %a, @@ -2031,205 +367,13 @@ define i64 @clov2i32(i64 %x, i1) { ; RV32-LABEL: clov2i32: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: .cfi_offset s1, -12 -; RV32-NEXT: .cfi_offset s2, -16 -; RV32-NEXT: .cfi_offset s3, -20 -; RV32-NEXT: .cfi_offset s4, -24 -; RV32-NEXT: .cfi_offset s5, -28 -; RV32-NEXT: not s3, a1 -; RV32-NEXT: not a0, a0 -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 2 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 8 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 16 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: not a0, a0 -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: lui a2, 349525 -; RV32-NEXT: addi s4, a2, 1365 -; RV32-NEXT: and a1, a1, s4 -; RV32-NEXT: sub a0, a0, a1 -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi s1, a1, 819 -; RV32-NEXT: and a1, a0, s1 -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: and a0, a0, s1 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi s5, a1, -241 -; RV32-NEXT: and a0, a0, s5 -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi s0, a1, 257 -; RV32-NEXT: mv a1, s0 -; RV32-NEXT: call __mulsi3@plt -; RV32-NEXT: srli s2, a0, 24 -; RV32-NEXT: srli a0, s3, 1 -; RV32-NEXT: or a0, s3, a0 -; RV32-NEXT: srli a1, a0, 2 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 8 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 16 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: not a0, a0 -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: and a1, a1, s4 -; RV32-NEXT: sub a0, a0, a1 -; RV32-NEXT: and a1, a0, s1 -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: and a0, a0, s1 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: and a0, a0, s5 -; RV32-NEXT: mv a1, s0 -; RV32-NEXT: call __mulsi3@plt -; RV32-NEXT: srli a1, a0, 24 -; RV32-NEXT: mv a0, s2 -; RV32-NEXT: lw s5, 4(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s4, 8(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s2, 16(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: clo32 a0, a0 +; RV32-NEXT: clo32 a1, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: clov2i32: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -64 -; RV64-NEXT: .cfi_def_cfa_offset 64 -; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s1, 40(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s2, 32(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s3, 24(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s4, 16(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: .cfi_offset s0, -16 -; RV64-NEXT: .cfi_offset s1, -24 -; RV64-NEXT: .cfi_offset s2, -32 -; RV64-NEXT: .cfi_offset s3, -40 -; RV64-NEXT: .cfi_offset s4, -48 -; RV64-NEXT: addi a1, zero, -1 -; RV64-NEXT: xor a0, a0, a1 -; RV64-NEXT: sd a0, 8(sp) -; RV64-NEXT: lwu a0, 8(sp) -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 8 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 16 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: not a0, a0 -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: lui a2, 21845 -; RV64-NEXT: addiw a2, a2, 1365 -; RV64-NEXT: slli a2, a2, 12 -; RV64-NEXT: addi a2, a2, 1365 -; RV64-NEXT: slli a2, a2, 12 -; RV64-NEXT: addi a2, a2, 1365 -; RV64-NEXT: slli a2, a2, 12 -; RV64-NEXT: addi s2, a2, 1365 -; RV64-NEXT: and a1, a1, s2 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: lui a1, 13107 -; RV64-NEXT: addiw a1, a1, 819 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 819 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 819 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi s1, a1, 819 -; RV64-NEXT: and a1, a0, s1 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: and a0, a0, s1 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: lui a1, 3855 -; RV64-NEXT: addiw a1, a1, 241 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -241 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 241 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi s3, a1, -241 -; RV64-NEXT: and a0, a0, s3 -; RV64-NEXT: lui a1, 4112 -; RV64-NEXT: addiw a1, a1, 257 -; RV64-NEXT: slli a1, a1, 16 -; RV64-NEXT: addi a1, a1, 257 -; RV64-NEXT: slli a1, a1, 16 -; RV64-NEXT: addi s0, a1, 257 -; RV64-NEXT: mv a1, s0 -; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: lwu a1, 12(sp) -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: addi a0, a0, -32 -; RV64-NEXT: pktb32 s4, zero, a0 -; RV64-NEXT: srli a0, a1, 1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: srli a1, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 8 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 16 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: not a0, a0 -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: and a1, a1, s2 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: and a1, a0, s1 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: and a0, a0, s1 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: and a0, a0, s3 -; RV64-NEXT: mv a1, s0 -; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: addi a0, a0, -32 -; RV64-NEXT: pkbb32 a0, a0, s4 -; RV64-NEXT: ld s4, 16(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s3, 24(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s2, 32(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s1, 40(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 64 +; RV64-NEXT: clo32 a0, a0 ; RV64-NEXT: ret %a = bitcast i64 %x to <2 x i32> %neg = xor <2 x i32> %a, diff --git a/llvm/test/CodeGen/RISCV/rvp/clz.ll b/llvm/test/CodeGen/RISCV/rvp/clz.ll --- a/llvm/test/CodeGen/RISCV/rvp/clz.ll +++ b/llvm/test/CodeGen/RISCV/rvp/clz.ll @@ -4,54 +4,19 @@ ; RUN: llc -mtriple=riscv64 -mattr=+experimental-p -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=RV64 +; TODO: cltz with i8, i16 and i32 types could be optimized to clz8, clz16 and clz32. + define i8 @clz8(i8 %x) { ; RV32-LABEL: clz8: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: andi a0, a0, 255 ; RV32-NEXT: beqz a0, .LBB0_2 ; RV32-NEXT: # %bb.1: # %cond.false -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 2 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 8 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 16 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: not a0, a0 -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: lui a2, 349525 -; RV32-NEXT: addi a2, a2, 1365 -; RV32-NEXT: and a1, a1, a2 -; RV32-NEXT: sub a0, a0, a1 -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: and a2, a0, a1 -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: add a0, a2, a0 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: call __mulsi3@plt -; RV32-NEXT: srli a0, a0, 24 +; RV32-NEXT: clz32 a0, a0 ; RV32-NEXT: addi a0, a0, -24 -; RV32-NEXT: j .LBB0_3 +; RV32-NEXT: ret ; RV32-NEXT: .LBB0_2: ; RV32-NEXT: addi a0, zero, 8 -; RV32-NEXT: .LBB0_3: # %cond.end -; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: clz8: @@ -133,53 +98,16 @@ define i16 @clz16(i16 %x) { ; RV32-LABEL: clz16: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: lui a1, 16 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a0, a0, a1 ; RV32-NEXT: beqz a0, .LBB1_2 ; RV32-NEXT: # %bb.1: # %cond.false -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 2 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 8 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 16 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: not a0, a0 -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: lui a2, 349525 -; RV32-NEXT: addi a2, a2, 1365 -; RV32-NEXT: and a1, a1, a2 -; RV32-NEXT: sub a0, a0, a1 -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: and a2, a0, a1 -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: add a0, a2, a0 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: call __mulsi3@plt -; RV32-NEXT: srli a0, a0, 24 +; RV32-NEXT: clz32 a0, a0 ; RV32-NEXT: addi a0, a0, -16 -; RV32-NEXT: j .LBB1_3 +; RV32-NEXT: ret ; RV32-NEXT: .LBB1_2: ; RV32-NEXT: addi a0, zero, 16 -; RV32-NEXT: .LBB1_3: # %cond.end -; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: clz16: @@ -263,49 +191,12 @@ define i32 @clz32(i32 %x) { ; RV32-LABEL: clz32: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: beqz a0, .LBB2_2 ; RV32-NEXT: # %bb.1: # %cond.false -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 2 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 8 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 16 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: not a0, a0 -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: lui a2, 349525 -; RV32-NEXT: addi a2, a2, 1365 -; RV32-NEXT: and a1, a1, a2 -; RV32-NEXT: sub a0, a0, a1 -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: and a2, a0, a1 -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: add a0, a2, a0 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: call __mulsi3@plt -; RV32-NEXT: srli a0, a0, 24 -; RV32-NEXT: j .LBB2_3 +; RV32-NEXT: clz32 a0, a0 +; RV32-NEXT: ret ; RV32-NEXT: .LBB2_2: ; RV32-NEXT: addi a0, zero, 32 -; RV32-NEXT: .LBB2_3: # %cond.end -; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: clz32: @@ -389,436 +280,12 @@ define i32 @clzv4i8(i32 %x, i1) { ; RV32-LABEL: clzv4i8: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: .cfi_offset s1, -12 -; RV32-NEXT: .cfi_offset s2, -16 -; RV32-NEXT: .cfi_offset s3, -20 -; RV32-NEXT: .cfi_offset s4, -24 -; RV32-NEXT: sw a0, 4(sp) -; RV32-NEXT: lbu a0, 4(sp) -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 2 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 8 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 16 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: not a0, a0 -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: lui a2, 349525 -; RV32-NEXT: addi s2, a2, 1365 -; RV32-NEXT: and a1, a1, s2 -; RV32-NEXT: sub a0, a0, a1 -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi s0, a1, 819 -; RV32-NEXT: and a1, a0, s0 -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: and a0, a0, s0 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi s4, a1, -241 -; RV32-NEXT: and a0, a0, s4 -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi s3, a1, 257 -; RV32-NEXT: mv a1, s3 -; RV32-NEXT: call __mulsi3@plt -; RV32-NEXT: mv s1, zero -; RV32-NEXT: lbu a1, 5(sp) -; RV32-NEXT: srli a0, a0, 24 -; RV32-NEXT: addi a0, a0, -24 -; RV32-NEXT: insb s1, a0, 0 -; RV32-NEXT: srli a0, a1, 1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: srli a1, a0, 2 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 8 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 16 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: not a0, a0 -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: and a1, a1, s2 -; RV32-NEXT: sub a0, a0, a1 -; RV32-NEXT: and a1, a0, s0 -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: and a0, a0, s0 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: and a0, a0, s4 -; RV32-NEXT: mv a1, s3 -; RV32-NEXT: call __mulsi3@plt -; RV32-NEXT: lbu a1, 6(sp) -; RV32-NEXT: srli a0, a0, 24 -; RV32-NEXT: addi a0, a0, -24 -; RV32-NEXT: insb s1, a0, 1 -; RV32-NEXT: srli a0, a1, 1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: srli a1, a0, 2 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 8 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 16 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: not a0, a0 -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: and a1, a1, s2 -; RV32-NEXT: sub a0, a0, a1 -; RV32-NEXT: and a1, a0, s0 -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: and a0, a0, s0 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: and a0, a0, s4 -; RV32-NEXT: mv a1, s3 -; RV32-NEXT: call __mulsi3@plt -; RV32-NEXT: lbu a1, 7(sp) -; RV32-NEXT: srli a0, a0, 24 -; RV32-NEXT: addi a0, a0, -24 -; RV32-NEXT: insb s1, a0, 2 -; RV32-NEXT: srli a0, a1, 1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: srli a1, a0, 2 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 8 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 16 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: not a0, a0 -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: and a1, a1, s2 -; RV32-NEXT: sub a0, a0, a1 -; RV32-NEXT: and a1, a0, s0 -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: and a0, a0, s0 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: and a0, a0, s4 -; RV32-NEXT: mv a1, s3 -; RV32-NEXT: call __mulsi3@plt -; RV32-NEXT: srli a0, a0, 24 -; RV32-NEXT: addi a0, a0, -24 -; RV32-NEXT: insb s1, a0, 3 -; RV32-NEXT: mv a0, s1 -; RV32-NEXT: lw s4, 8(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s2, 16(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: clz8 a0, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: clzv4i8: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -64 -; RV64-NEXT: .cfi_def_cfa_offset 64 -; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s1, 40(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s2, 32(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s3, 24(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s4, 16(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: .cfi_offset s0, -16 -; RV64-NEXT: .cfi_offset s1, -24 -; RV64-NEXT: .cfi_offset s2, -32 -; RV64-NEXT: .cfi_offset s3, -40 -; RV64-NEXT: .cfi_offset s4, -48 -; RV64-NEXT: sd a0, 8(sp) -; RV64-NEXT: lbu a0, 8(sp) -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 8 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 16 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: not a0, a0 -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: lui a2, 21845 -; RV64-NEXT: addiw a2, a2, 1365 -; RV64-NEXT: slli a2, a2, 12 -; RV64-NEXT: addi a2, a2, 1365 -; RV64-NEXT: slli a2, a2, 12 -; RV64-NEXT: addi a2, a2, 1365 -; RV64-NEXT: slli a2, a2, 12 -; RV64-NEXT: addi s2, a2, 1365 -; RV64-NEXT: and a1, a1, s2 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: lui a1, 13107 -; RV64-NEXT: addiw a1, a1, 819 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 819 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 819 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi s0, a1, 819 -; RV64-NEXT: and a1, a0, s0 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: and a0, a0, s0 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: lui a1, 3855 -; RV64-NEXT: addiw a1, a1, 241 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -241 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 241 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi s4, a1, -241 -; RV64-NEXT: and a0, a0, s4 -; RV64-NEXT: lui a1, 4112 -; RV64-NEXT: addiw a1, a1, 257 -; RV64-NEXT: slli a1, a1, 16 -; RV64-NEXT: addi a1, a1, 257 -; RV64-NEXT: slli a1, a1, 16 -; RV64-NEXT: addi s3, a1, 257 -; RV64-NEXT: mv a1, s3 -; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: mv s1, zero -; RV64-NEXT: lbu a1, 9(sp) -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: addi a0, a0, -56 -; RV64-NEXT: insb s1, a0, 0 -; RV64-NEXT: srli a0, a1, 1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: srli a1, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 8 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 16 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: not a0, a0 -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: and a1, a1, s2 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: and a1, a0, s0 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: and a0, a0, s0 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: and a0, a0, s4 -; RV64-NEXT: mv a1, s3 -; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: lbu a1, 10(sp) -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: addi a0, a0, -56 -; RV64-NEXT: insb s1, a0, 1 -; RV64-NEXT: srli a0, a1, 1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: srli a1, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 8 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 16 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: not a0, a0 -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: and a1, a1, s2 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: and a1, a0, s0 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: and a0, a0, s0 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: and a0, a0, s4 -; RV64-NEXT: mv a1, s3 -; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: lbu a1, 11(sp) -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: addi a0, a0, -56 -; RV64-NEXT: insb s1, a0, 2 -; RV64-NEXT: srli a0, a1, 1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: srli a1, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 8 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 16 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: not a0, a0 -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: and a1, a1, s2 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: and a1, a0, s0 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: and a0, a0, s0 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: and a0, a0, s4 -; RV64-NEXT: mv a1, s3 -; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: lbu a1, 12(sp) -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: addi a0, a0, -56 -; RV64-NEXT: insb s1, a0, 3 -; RV64-NEXT: srli a0, a1, 1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: srli a1, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 8 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 16 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: not a0, a0 -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: and a1, a1, s2 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: and a1, a0, s0 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: and a0, a0, s0 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: and a0, a0, s4 -; RV64-NEXT: mv a1, s3 -; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: lbu a1, 13(sp) -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: addi a0, a0, -56 -; RV64-NEXT: insb s1, a0, 4 -; RV64-NEXT: srli a0, a1, 1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: srli a1, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 8 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 16 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: not a0, a0 -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: and a1, a1, s2 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: and a1, a0, s0 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: and a0, a0, s0 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: and a0, a0, s4 -; RV64-NEXT: mv a1, s3 -; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: lbu a1, 14(sp) -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: addi a0, a0, -56 -; RV64-NEXT: insb s1, a0, 5 -; RV64-NEXT: srli a0, a1, 1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: srli a1, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 8 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 16 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: not a0, a0 -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: and a1, a1, s2 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: and a1, a0, s0 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: and a0, a0, s0 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: and a0, a0, s4 -; RV64-NEXT: mv a1, s3 -; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: lbu a1, 15(sp) -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: addi a0, a0, -56 -; RV64-NEXT: insb s1, a0, 6 -; RV64-NEXT: srli a0, a1, 1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: srli a1, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 8 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 16 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: not a0, a0 -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: and a1, a1, s2 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: and a1, a0, s0 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: and a0, a0, s0 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: and a0, a0, s4 -; RV64-NEXT: mv a1, s3 -; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: addi a0, a0, -56 -; RV64-NEXT: insb s1, a0, 7 -; RV64-NEXT: mv a0, s1 -; RV64-NEXT: ld s4, 16(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s3, 24(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s2, 32(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s1, 40(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 64 +; RV64-NEXT: clz8 a0, a0 ; RV64-NEXT: ret %a = bitcast i32 %x to <4 x i8> %b = call <4 x i8> @llvm.ctlz.v4i8(<4 x i8> %a, i1 false) @@ -829,271 +296,12 @@ define i32 @clzv2i16(i32 %x, i1) { ; RV32-LABEL: clzv2i16: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: .cfi_offset s1, -12 -; RV32-NEXT: .cfi_offset s2, -16 -; RV32-NEXT: .cfi_offset s3, -20 -; RV32-NEXT: .cfi_offset s4, -24 -; RV32-NEXT: sw a0, 4(sp) -; RV32-NEXT: lhu a0, 4(sp) -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 2 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 8 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 16 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: not a0, a0 -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: lui a2, 349525 -; RV32-NEXT: addi s2, a2, 1365 -; RV32-NEXT: and a1, a1, s2 -; RV32-NEXT: sub a0, a0, a1 -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi s1, a1, 819 -; RV32-NEXT: and a1, a0, s1 -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: and a0, a0, s1 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi s3, a1, -241 -; RV32-NEXT: and a0, a0, s3 -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi s0, a1, 257 -; RV32-NEXT: mv a1, s0 -; RV32-NEXT: call __mulsi3@plt -; RV32-NEXT: lhu a1, 6(sp) -; RV32-NEXT: srli a0, a0, 24 -; RV32-NEXT: addi a0, a0, -16 -; RV32-NEXT: pktb16 s4, zero, a0 -; RV32-NEXT: srli a0, a1, 1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: srli a1, a0, 2 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 8 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 16 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: not a0, a0 -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: and a1, a1, s2 -; RV32-NEXT: sub a0, a0, a1 -; RV32-NEXT: and a1, a0, s1 -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: and a0, a0, s1 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: and a0, a0, s3 -; RV32-NEXT: mv a1, s0 -; RV32-NEXT: call __mulsi3@plt -; RV32-NEXT: srli a0, a0, 24 -; RV32-NEXT: addi a0, a0, -16 -; RV32-NEXT: pkbb16 a0, a0, s4 -; RV32-NEXT: lw s4, 8(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s2, 16(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: clz16 a0, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: clzv2i16: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -64 -; RV64-NEXT: .cfi_def_cfa_offset 64 -; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s1, 40(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s2, 32(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s3, 24(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s4, 16(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s5, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: .cfi_offset s0, -16 -; RV64-NEXT: .cfi_offset s1, -24 -; RV64-NEXT: .cfi_offset s2, -32 -; RV64-NEXT: .cfi_offset s3, -40 -; RV64-NEXT: .cfi_offset s4, -48 -; RV64-NEXT: .cfi_offset s5, -56 -; RV64-NEXT: sd a0, 0(sp) -; RV64-NEXT: lhu a0, 0(sp) -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 8 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 16 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: not a0, a0 -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: lui a2, 21845 -; RV64-NEXT: addiw a2, a2, 1365 -; RV64-NEXT: slli a2, a2, 12 -; RV64-NEXT: addi a2, a2, 1365 -; RV64-NEXT: slli a2, a2, 12 -; RV64-NEXT: addi a2, a2, 1365 -; RV64-NEXT: slli a2, a2, 12 -; RV64-NEXT: addi s2, a2, 1365 -; RV64-NEXT: and a1, a1, s2 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: lui a1, 13107 -; RV64-NEXT: addiw a1, a1, 819 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 819 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 819 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi s1, a1, 819 -; RV64-NEXT: and a1, a0, s1 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: and a0, a0, s1 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: lui a1, 3855 -; RV64-NEXT: addiw a1, a1, 241 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -241 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 241 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi s3, a1, -241 -; RV64-NEXT: and a0, a0, s3 -; RV64-NEXT: lui a1, 4112 -; RV64-NEXT: addiw a1, a1, 257 -; RV64-NEXT: slli a1, a1, 16 -; RV64-NEXT: addi a1, a1, 257 -; RV64-NEXT: slli a1, a1, 16 -; RV64-NEXT: addi s4, a1, 257 -; RV64-NEXT: mv a1, s4 -; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: lhu a1, 2(sp) -; RV64-NEXT: addi a0, a0, -48 -; RV64-NEXT: pktb16 a0, zero, a0 -; RV64-NEXT: pktb32 s0, zero, a0 -; RV64-NEXT: srli a0, a1, 1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: srli a1, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 8 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 16 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: not a0, a0 -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: and a1, a1, s2 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: and a1, a0, s1 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: and a0, a0, s1 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: and a0, a0, s3 -; RV64-NEXT: mv a1, s4 -; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: addi a0, a0, -48 -; RV64-NEXT: lhu a1, 4(sp) -; RV64-NEXT: pkbb16 a0, a0, s0 -; RV64-NEXT: pktb32 s0, s0, a0 -; RV64-NEXT: pkbt32 s5, s0, s0 -; RV64-NEXT: srli a0, a1, 1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: srli a1, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 8 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 16 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: not a0, a0 -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: and a1, a1, s2 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: and a1, a0, s1 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: and a0, a0, s1 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: and a0, a0, s3 -; RV64-NEXT: mv a1, s4 -; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: addi a0, a0, -48 -; RV64-NEXT: lhu a1, 6(sp) -; RV64-NEXT: pktb16 a0, s5, a0 -; RV64-NEXT: pkbb32 s0, a0, s0 -; RV64-NEXT: pkbt32 s5, s0, s0 -; RV64-NEXT: srli a0, a1, 1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: srli a1, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 8 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 16 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: not a0, a0 -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: and a1, a1, s2 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: and a1, a0, s1 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: and a0, a0, s1 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: and a0, a0, s3 -; RV64-NEXT: mv a1, s4 -; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: addi a0, a0, -48 -; RV64-NEXT: pkbb16 a0, a0, s5 -; RV64-NEXT: pkbb32 a0, a0, s0 -; RV64-NEXT: ld s5, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s4, 16(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s3, 24(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s2, 32(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s1, 40(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 64 +; RV64-NEXT: clz16 a0, a0 ; RV64-NEXT: ret %a = bitcast i32 %x to <2 x i16> %b = call <2 x i16> @llvm.ctlz.v2i16(<2 x i16> %a, i1 false) @@ -1104,550 +312,13 @@ define i64 @clzv8i8(i64 %x, i1) { ; RV32-LABEL: clzv8i8: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -48 -; RV32-NEXT: .cfi_def_cfa_offset 48 -; RV32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 40(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s1, 36(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s2, 32(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s3, 28(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s4, 24(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s5, 20(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: .cfi_offset s1, -12 -; RV32-NEXT: .cfi_offset s2, -16 -; RV32-NEXT: .cfi_offset s3, -20 -; RV32-NEXT: .cfi_offset s4, -24 -; RV32-NEXT: .cfi_offset s5, -28 -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: lbu a0, 16(sp) -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 2 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 8 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 16 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: not a0, a0 -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: lui a2, 349525 -; RV32-NEXT: addi s3, a2, 1365 -; RV32-NEXT: and a1, a1, s3 -; RV32-NEXT: sub a0, a0, a1 -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi s0, a1, 819 -; RV32-NEXT: and a1, a0, s0 -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: and a0, a0, s0 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi s5, a1, -241 -; RV32-NEXT: and a0, a0, s5 -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi s2, a1, 257 -; RV32-NEXT: mv a1, s2 -; RV32-NEXT: call __mulsi3@plt -; RV32-NEXT: mv s4, zero -; RV32-NEXT: lbu a1, 17(sp) -; RV32-NEXT: srli a0, a0, 24 -; RV32-NEXT: addi a0, a0, -24 -; RV32-NEXT: insb s4, a0, 0 -; RV32-NEXT: srli a0, a1, 1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: srli a1, a0, 2 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 8 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 16 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: not a0, a0 -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: and a1, a1, s3 -; RV32-NEXT: sub a0, a0, a1 -; RV32-NEXT: and a1, a0, s0 -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: and a0, a0, s0 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: and a0, a0, s5 -; RV32-NEXT: mv a1, s2 -; RV32-NEXT: call __mulsi3@plt -; RV32-NEXT: lbu a1, 18(sp) -; RV32-NEXT: srli a0, a0, 24 -; RV32-NEXT: addi a0, a0, -24 -; RV32-NEXT: insb s4, a0, 1 -; RV32-NEXT: srli a0, a1, 1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: srli a1, a0, 2 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 8 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 16 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: not a0, a0 -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: and a1, a1, s3 -; RV32-NEXT: sub a0, a0, a1 -; RV32-NEXT: and a1, a0, s0 -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: and a0, a0, s0 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: and a0, a0, s5 -; RV32-NEXT: mv a1, s2 -; RV32-NEXT: call __mulsi3@plt -; RV32-NEXT: lbu a1, 19(sp) -; RV32-NEXT: srli a0, a0, 24 -; RV32-NEXT: addi a0, a0, -24 -; RV32-NEXT: insb s4, a0, 2 -; RV32-NEXT: srli a0, a1, 1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: srli a1, a0, 2 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 8 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 16 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: not a0, a0 -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: and a1, a1, s3 -; RV32-NEXT: sub a0, a0, a1 -; RV32-NEXT: and a1, a0, s0 -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: and a0, a0, s0 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: and a0, a0, s5 -; RV32-NEXT: mv a1, s2 -; RV32-NEXT: call __mulsi3@plt -; RV32-NEXT: lbu a1, 12(sp) -; RV32-NEXT: srli a0, a0, 24 -; RV32-NEXT: addi a0, a0, -24 -; RV32-NEXT: insb s4, a0, 3 -; RV32-NEXT: srli a0, a1, 1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: srli a1, a0, 2 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 8 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 16 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: not a0, a0 -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: and a1, a1, s3 -; RV32-NEXT: sub a0, a0, a1 -; RV32-NEXT: and a1, a0, s0 -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: and a0, a0, s0 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: and a0, a0, s5 -; RV32-NEXT: mv a1, s2 -; RV32-NEXT: call __mulsi3@plt -; RV32-NEXT: mv s1, zero -; RV32-NEXT: lbu a1, 13(sp) -; RV32-NEXT: srli a0, a0, 24 -; RV32-NEXT: addi a0, a0, -24 -; RV32-NEXT: insb s1, a0, 0 -; RV32-NEXT: srli a0, a1, 1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: srli a1, a0, 2 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 8 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 16 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: not a0, a0 -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: and a1, a1, s3 -; RV32-NEXT: sub a0, a0, a1 -; RV32-NEXT: and a1, a0, s0 -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: and a0, a0, s0 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: and a0, a0, s5 -; RV32-NEXT: mv a1, s2 -; RV32-NEXT: call __mulsi3@plt -; RV32-NEXT: lbu a1, 14(sp) -; RV32-NEXT: srli a0, a0, 24 -; RV32-NEXT: addi a0, a0, -24 -; RV32-NEXT: insb s1, a0, 1 -; RV32-NEXT: srli a0, a1, 1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: srli a1, a0, 2 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 8 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 16 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: not a0, a0 -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: and a1, a1, s3 -; RV32-NEXT: sub a0, a0, a1 -; RV32-NEXT: and a1, a0, s0 -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: and a0, a0, s0 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: and a0, a0, s5 -; RV32-NEXT: mv a1, s2 -; RV32-NEXT: call __mulsi3@plt -; RV32-NEXT: lbu a1, 15(sp) -; RV32-NEXT: srli a0, a0, 24 -; RV32-NEXT: addi a0, a0, -24 -; RV32-NEXT: insb s1, a0, 2 -; RV32-NEXT: srli a0, a1, 1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: srli a1, a0, 2 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 8 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 16 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: not a0, a0 -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: and a1, a1, s3 -; RV32-NEXT: sub a0, a0, a1 -; RV32-NEXT: and a1, a0, s0 -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: and a0, a0, s0 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: and a0, a0, s5 -; RV32-NEXT: mv a1, s2 -; RV32-NEXT: call __mulsi3@plt -; RV32-NEXT: srli a0, a0, 24 -; RV32-NEXT: addi a0, a0, -24 -; RV32-NEXT: insb s1, a0, 3 -; RV32-NEXT: mv a0, s1 -; RV32-NEXT: mv a1, s4 -; RV32-NEXT: lw s5, 20(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s4, 24(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s3, 28(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s2, 32(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s1, 36(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s0, 40(sp) # 4-byte Folded Reload -; RV32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 48 +; RV32-NEXT: clz8 a1, a1 +; RV32-NEXT: clz8 a0, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: clzv8i8: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -64 -; RV64-NEXT: .cfi_def_cfa_offset 64 -; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s1, 40(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s2, 32(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s3, 24(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s4, 16(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: .cfi_offset s0, -16 -; RV64-NEXT: .cfi_offset s1, -24 -; RV64-NEXT: .cfi_offset s2, -32 -; RV64-NEXT: .cfi_offset s3, -40 -; RV64-NEXT: .cfi_offset s4, -48 -; RV64-NEXT: sd a0, 8(sp) -; RV64-NEXT: lbu a0, 8(sp) -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 8 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 16 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: not a0, a0 -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: lui a2, 21845 -; RV64-NEXT: addiw a2, a2, 1365 -; RV64-NEXT: slli a2, a2, 12 -; RV64-NEXT: addi a2, a2, 1365 -; RV64-NEXT: slli a2, a2, 12 -; RV64-NEXT: addi a2, a2, 1365 -; RV64-NEXT: slli a2, a2, 12 -; RV64-NEXT: addi s2, a2, 1365 -; RV64-NEXT: and a1, a1, s2 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: lui a1, 13107 -; RV64-NEXT: addiw a1, a1, 819 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 819 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 819 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi s0, a1, 819 -; RV64-NEXT: and a1, a0, s0 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: and a0, a0, s0 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: lui a1, 3855 -; RV64-NEXT: addiw a1, a1, 241 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -241 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 241 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi s4, a1, -241 -; RV64-NEXT: and a0, a0, s4 -; RV64-NEXT: lui a1, 4112 -; RV64-NEXT: addiw a1, a1, 257 -; RV64-NEXT: slli a1, a1, 16 -; RV64-NEXT: addi a1, a1, 257 -; RV64-NEXT: slli a1, a1, 16 -; RV64-NEXT: addi s3, a1, 257 -; RV64-NEXT: mv a1, s3 -; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: mv s1, zero -; RV64-NEXT: lbu a1, 9(sp) -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: addi a0, a0, -56 -; RV64-NEXT: insb s1, a0, 0 -; RV64-NEXT: srli a0, a1, 1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: srli a1, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 8 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 16 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: not a0, a0 -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: and a1, a1, s2 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: and a1, a0, s0 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: and a0, a0, s0 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: and a0, a0, s4 -; RV64-NEXT: mv a1, s3 -; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: lbu a1, 10(sp) -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: addi a0, a0, -56 -; RV64-NEXT: insb s1, a0, 1 -; RV64-NEXT: srli a0, a1, 1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: srli a1, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 8 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 16 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: not a0, a0 -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: and a1, a1, s2 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: and a1, a0, s0 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: and a0, a0, s0 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: and a0, a0, s4 -; RV64-NEXT: mv a1, s3 -; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: lbu a1, 11(sp) -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: addi a0, a0, -56 -; RV64-NEXT: insb s1, a0, 2 -; RV64-NEXT: srli a0, a1, 1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: srli a1, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 8 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 16 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: not a0, a0 -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: and a1, a1, s2 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: and a1, a0, s0 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: and a0, a0, s0 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: and a0, a0, s4 -; RV64-NEXT: mv a1, s3 -; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: lbu a1, 12(sp) -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: addi a0, a0, -56 -; RV64-NEXT: insb s1, a0, 3 -; RV64-NEXT: srli a0, a1, 1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: srli a1, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 8 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 16 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: not a0, a0 -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: and a1, a1, s2 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: and a1, a0, s0 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: and a0, a0, s0 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: and a0, a0, s4 -; RV64-NEXT: mv a1, s3 -; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: lbu a1, 13(sp) -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: addi a0, a0, -56 -; RV64-NEXT: insb s1, a0, 4 -; RV64-NEXT: srli a0, a1, 1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: srli a1, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 8 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 16 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: not a0, a0 -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: and a1, a1, s2 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: and a1, a0, s0 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: and a0, a0, s0 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: and a0, a0, s4 -; RV64-NEXT: mv a1, s3 -; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: lbu a1, 14(sp) -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: addi a0, a0, -56 -; RV64-NEXT: insb s1, a0, 5 -; RV64-NEXT: srli a0, a1, 1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: srli a1, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 8 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 16 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: not a0, a0 -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: and a1, a1, s2 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: and a1, a0, s0 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: and a0, a0, s0 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: and a0, a0, s4 -; RV64-NEXT: mv a1, s3 -; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: lbu a1, 15(sp) -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: addi a0, a0, -56 -; RV64-NEXT: insb s1, a0, 6 -; RV64-NEXT: srli a0, a1, 1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: srli a1, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 8 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 16 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: not a0, a0 -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: and a1, a1, s2 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: and a1, a0, s0 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: and a0, a0, s0 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: and a0, a0, s4 -; RV64-NEXT: mv a1, s3 -; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: addi a0, a0, -56 -; RV64-NEXT: insb s1, a0, 7 -; RV64-NEXT: mv a0, s1 -; RV64-NEXT: ld s4, 16(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s3, 24(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s2, 32(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s1, 40(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 64 +; RV64-NEXT: clz8 a0, a0 ; RV64-NEXT: ret %a = bitcast i64 %x to <8 x i8> %b = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false) @@ -1658,330 +329,13 @@ define i64 @clzv4i16(i64 %x, i1) { ; RV32-LABEL: clzv4i16: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -48 -; RV32-NEXT: .cfi_def_cfa_offset 48 -; RV32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 40(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s1, 36(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s2, 32(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s3, 28(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s4, 24(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s5, 20(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: .cfi_offset s1, -12 -; RV32-NEXT: .cfi_offset s2, -16 -; RV32-NEXT: .cfi_offset s3, -20 -; RV32-NEXT: .cfi_offset s4, -24 -; RV32-NEXT: .cfi_offset s5, -28 -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: lhu a0, 16(sp) -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 2 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 8 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 16 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: not a0, a0 -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: lui a2, 349525 -; RV32-NEXT: addi s3, a2, 1365 -; RV32-NEXT: and a1, a1, s3 -; RV32-NEXT: sub a0, a0, a1 -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi s1, a1, 819 -; RV32-NEXT: and a1, a0, s1 -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: and a0, a0, s1 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi s4, a1, -241 -; RV32-NEXT: and a0, a0, s4 -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi s0, a1, 257 -; RV32-NEXT: mv a1, s0 -; RV32-NEXT: call __mulsi3@plt -; RV32-NEXT: lhu a1, 18(sp) -; RV32-NEXT: srli a0, a0, 24 -; RV32-NEXT: addi a0, a0, -16 -; RV32-NEXT: pktb16 s2, zero, a0 -; RV32-NEXT: srli a0, a1, 1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: srli a1, a0, 2 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 8 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 16 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: not a0, a0 -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: and a1, a1, s3 -; RV32-NEXT: sub a0, a0, a1 -; RV32-NEXT: and a1, a0, s1 -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: and a0, a0, s1 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: and a0, a0, s4 -; RV32-NEXT: mv a1, s0 -; RV32-NEXT: call __mulsi3@plt -; RV32-NEXT: lhu a1, 12(sp) -; RV32-NEXT: srli a0, a0, 24 -; RV32-NEXT: addi a0, a0, -16 -; RV32-NEXT: pkbb16 s2, a0, s2 -; RV32-NEXT: srli a0, a1, 1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: srli a1, a0, 2 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 8 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 16 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: not a0, a0 -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: and a1, a1, s3 -; RV32-NEXT: sub a0, a0, a1 -; RV32-NEXT: and a1, a0, s1 -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: and a0, a0, s1 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: and a0, a0, s4 -; RV32-NEXT: mv a1, s0 -; RV32-NEXT: call __mulsi3@plt -; RV32-NEXT: lhu a1, 14(sp) -; RV32-NEXT: srli a0, a0, 24 -; RV32-NEXT: addi a0, a0, -16 -; RV32-NEXT: pktb16 s5, zero, a0 -; RV32-NEXT: srli a0, a1, 1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: srli a1, a0, 2 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 8 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 16 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: not a0, a0 -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: and a1, a1, s3 -; RV32-NEXT: sub a0, a0, a1 -; RV32-NEXT: and a1, a0, s1 -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: and a0, a0, s1 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: and a0, a0, s4 -; RV32-NEXT: mv a1, s0 -; RV32-NEXT: call __mulsi3@plt -; RV32-NEXT: srli a0, a0, 24 -; RV32-NEXT: addi a0, a0, -16 -; RV32-NEXT: pkbb16 a0, a0, s5 -; RV32-NEXT: mv a1, s2 -; RV32-NEXT: lw s5, 20(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s4, 24(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s3, 28(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s2, 32(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s1, 36(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s0, 40(sp) # 4-byte Folded Reload -; RV32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 48 +; RV32-NEXT: clz16 a1, a1 +; RV32-NEXT: clz16 a0, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: clzv4i16: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -64 -; RV64-NEXT: .cfi_def_cfa_offset 64 -; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s1, 40(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s2, 32(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s3, 24(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s4, 16(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s5, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: .cfi_offset s0, -16 -; RV64-NEXT: .cfi_offset s1, -24 -; RV64-NEXT: .cfi_offset s2, -32 -; RV64-NEXT: .cfi_offset s3, -40 -; RV64-NEXT: .cfi_offset s4, -48 -; RV64-NEXT: .cfi_offset s5, -56 -; RV64-NEXT: sd a0, 0(sp) -; RV64-NEXT: lhu a0, 0(sp) -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 8 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 16 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: not a0, a0 -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: lui a2, 21845 -; RV64-NEXT: addiw a2, a2, 1365 -; RV64-NEXT: slli a2, a2, 12 -; RV64-NEXT: addi a2, a2, 1365 -; RV64-NEXT: slli a2, a2, 12 -; RV64-NEXT: addi a2, a2, 1365 -; RV64-NEXT: slli a2, a2, 12 -; RV64-NEXT: addi s2, a2, 1365 -; RV64-NEXT: and a1, a1, s2 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: lui a1, 13107 -; RV64-NEXT: addiw a1, a1, 819 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 819 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 819 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi s1, a1, 819 -; RV64-NEXT: and a1, a0, s1 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: and a0, a0, s1 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: lui a1, 3855 -; RV64-NEXT: addiw a1, a1, 241 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -241 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 241 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi s3, a1, -241 -; RV64-NEXT: and a0, a0, s3 -; RV64-NEXT: lui a1, 4112 -; RV64-NEXT: addiw a1, a1, 257 -; RV64-NEXT: slli a1, a1, 16 -; RV64-NEXT: addi a1, a1, 257 -; RV64-NEXT: slli a1, a1, 16 -; RV64-NEXT: addi s4, a1, 257 -; RV64-NEXT: mv a1, s4 -; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: lhu a1, 2(sp) -; RV64-NEXT: addi a0, a0, -48 -; RV64-NEXT: pktb16 a0, zero, a0 -; RV64-NEXT: pktb32 s0, zero, a0 -; RV64-NEXT: srli a0, a1, 1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: srli a1, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 8 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 16 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: not a0, a0 -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: and a1, a1, s2 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: and a1, a0, s1 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: and a0, a0, s1 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: and a0, a0, s3 -; RV64-NEXT: mv a1, s4 -; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: addi a0, a0, -48 -; RV64-NEXT: lhu a1, 4(sp) -; RV64-NEXT: pkbb16 a0, a0, s0 -; RV64-NEXT: pktb32 s0, s0, a0 -; RV64-NEXT: pkbt32 s5, s0, s0 -; RV64-NEXT: srli a0, a1, 1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: srli a1, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 8 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 16 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: not a0, a0 -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: and a1, a1, s2 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: and a1, a0, s1 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: and a0, a0, s1 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: and a0, a0, s3 -; RV64-NEXT: mv a1, s4 -; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: addi a0, a0, -48 -; RV64-NEXT: lhu a1, 6(sp) -; RV64-NEXT: pktb16 a0, s5, a0 -; RV64-NEXT: pkbb32 s0, a0, s0 -; RV64-NEXT: pkbt32 s5, s0, s0 -; RV64-NEXT: srli a0, a1, 1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: srli a1, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 8 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 16 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: not a0, a0 -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: and a1, a1, s2 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: and a1, a0, s1 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: and a0, a0, s1 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: and a0, a0, s3 -; RV64-NEXT: mv a1, s4 -; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: addi a0, a0, -48 -; RV64-NEXT: pkbb16 a0, a0, s5 -; RV64-NEXT: pkbb32 a0, a0, s0 -; RV64-NEXT: ld s5, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s4, 16(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s3, 24(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s2, 32(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s1, 40(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 64 +; RV64-NEXT: clz16 a0, a0 ; RV64-NEXT: ret %a = bitcast i64 %x to <4 x i16> %b = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1 false) @@ -1992,202 +346,13 @@ define i64 @clzv2i32(i64 %x, i1) { ; RV32-LABEL: clzv2i32: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: .cfi_offset s1, -12 -; RV32-NEXT: .cfi_offset s2, -16 -; RV32-NEXT: .cfi_offset s3, -20 -; RV32-NEXT: .cfi_offset s4, -24 -; RV32-NEXT: .cfi_offset s5, -28 -; RV32-NEXT: mv s3, a1 -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 2 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 8 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 16 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: not a0, a0 -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: lui a2, 349525 -; RV32-NEXT: addi s4, a2, 1365 -; RV32-NEXT: and a1, a1, s4 -; RV32-NEXT: sub a0, a0, a1 -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi s0, a1, 819 -; RV32-NEXT: and a1, a0, s0 -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: and a0, a0, s0 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi s5, a1, -241 -; RV32-NEXT: and a0, a0, s5 -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi s1, a1, 257 -; RV32-NEXT: mv a1, s1 -; RV32-NEXT: call __mulsi3@plt -; RV32-NEXT: srli s2, a0, 24 -; RV32-NEXT: srli a0, s3, 1 -; RV32-NEXT: or a0, s3, a0 -; RV32-NEXT: srli a1, a0, 2 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 8 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: srli a1, a0, 16 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: not a0, a0 -; RV32-NEXT: srli a1, a0, 1 -; RV32-NEXT: and a1, a1, s4 -; RV32-NEXT: sub a0, a0, a1 -; RV32-NEXT: and a1, a0, s0 -; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: and a0, a0, s0 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: srli a1, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: and a0, a0, s5 -; RV32-NEXT: mv a1, s1 -; RV32-NEXT: call __mulsi3@plt -; RV32-NEXT: srli a1, a0, 24 -; RV32-NEXT: mv a0, s2 -; RV32-NEXT: lw s5, 4(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s4, 8(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s2, 16(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: clz32 a0, a0 +; RV32-NEXT: clz32 a1, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: clzv2i32: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -64 -; RV64-NEXT: .cfi_def_cfa_offset 64 -; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s1, 40(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s2, 32(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s3, 24(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s4, 16(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: .cfi_offset s0, -16 -; RV64-NEXT: .cfi_offset s1, -24 -; RV64-NEXT: .cfi_offset s2, -32 -; RV64-NEXT: .cfi_offset s3, -40 -; RV64-NEXT: .cfi_offset s4, -48 -; RV64-NEXT: sd a0, 8(sp) -; RV64-NEXT: lwu a0, 8(sp) -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 8 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 16 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: not a0, a0 -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: lui a2, 21845 -; RV64-NEXT: addiw a2, a2, 1365 -; RV64-NEXT: slli a2, a2, 12 -; RV64-NEXT: addi a2, a2, 1365 -; RV64-NEXT: slli a2, a2, 12 -; RV64-NEXT: addi a2, a2, 1365 -; RV64-NEXT: slli a2, a2, 12 -; RV64-NEXT: addi s2, a2, 1365 -; RV64-NEXT: and a1, a1, s2 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: lui a1, 13107 -; RV64-NEXT: addiw a1, a1, 819 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 819 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 819 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi s1, a1, 819 -; RV64-NEXT: and a1, a0, s1 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: and a0, a0, s1 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: lui a1, 3855 -; RV64-NEXT: addiw a1, a1, 241 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -241 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 241 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi s3, a1, -241 -; RV64-NEXT: and a0, a0, s3 -; RV64-NEXT: lui a1, 4112 -; RV64-NEXT: addiw a1, a1, 257 -; RV64-NEXT: slli a1, a1, 16 -; RV64-NEXT: addi a1, a1, 257 -; RV64-NEXT: slli a1, a1, 16 -; RV64-NEXT: addi s0, a1, 257 -; RV64-NEXT: mv a1, s0 -; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: lwu a1, 12(sp) -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: addi a0, a0, -32 -; RV64-NEXT: pktb32 s4, zero, a0 -; RV64-NEXT: srli a0, a1, 1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: srli a1, a0, 2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 8 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 16 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: srli a1, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: not a0, a0 -; RV64-NEXT: srli a1, a0, 1 -; RV64-NEXT: and a1, a1, s2 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: and a1, a0, s1 -; RV64-NEXT: srli a0, a0, 2 -; RV64-NEXT: and a0, a0, s1 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: srli a1, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: and a0, a0, s3 -; RV64-NEXT: mv a1, s0 -; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: addi a0, a0, -32 -; RV64-NEXT: pkbb32 a0, a0, s4 -; RV64-NEXT: ld s4, 16(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s3, 24(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s2, 32(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s1, 40(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 64 +; RV64-NEXT: clz32 a0, a0 ; RV64-NEXT: ret %a = bitcast i64 %x to <2 x i32> %b = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false)