diff --git a/llvm/test/CodeGen/RISCV/rvp/vector-cmp.ll b/llvm/test/CodeGen/RISCV/rvp/vector-cmp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvp/vector-cmp.ll @@ -0,0 +1,5101 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-p -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=RV32 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-p -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=RV64 + +; cmpeq8 + +define i32 @seteqv4i8(i32 %a, i32 %b) nounwind { +; RV32-LABEL: seteqv4i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: lbu a0, 13(sp) +; RV32-NEXT: lbu a3, 9(sp) +; RV32-NEXT: addi a1, zero, -1 +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: beq a3, a0, .LBB0_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a2, zero +; RV32-NEXT: .LBB0_2: +; RV32-NEXT: lbu a0, 12(sp) +; RV32-NEXT: lbu a4, 8(sp) +; RV32-NEXT: addi a3, zero, -1 +; RV32-NEXT: beq a4, a0, .LBB0_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: mv a3, zero +; RV32-NEXT: .LBB0_4: +; RV32-NEXT: lbu a4, 14(sp) +; RV32-NEXT: lbu a5, 10(sp) +; RV32-NEXT: mv a0, zero +; RV32-NEXT: insb a0, a3, 0 +; RV32-NEXT: insb a0, a2, 1 +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: beq a5, a4, .LBB0_6 +; RV32-NEXT: # %bb.5: +; RV32-NEXT: mv a2, zero +; RV32-NEXT: .LBB0_6: +; RV32-NEXT: lbu a3, 15(sp) +; RV32-NEXT: lbu a4, 11(sp) +; RV32-NEXT: insb a0, a2, 2 +; RV32-NEXT: beq a4, a3, .LBB0_8 +; RV32-NEXT: # %bb.7: +; RV32-NEXT: mv a1, zero +; RV32-NEXT: .LBB0_8: +; RV32-NEXT: insb a0, a1, 3 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: seteqv4i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lbu a0, 9(sp) +; RV64-NEXT: lbu a3, 1(sp) +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: beq a3, a0, .LBB0_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB0_2: +; RV64-NEXT: lbu a0, 8(sp) +; RV64-NEXT: lbu a4, 0(sp) +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: beq a4, a0, .LBB0_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: mv a3, zero +; RV64-NEXT: .LBB0_4: +; RV64-NEXT: lbu a4, 10(sp) +; RV64-NEXT: lbu a5, 2(sp) +; RV64-NEXT: mv a0, zero +; RV64-NEXT: insb a0, a3, 0 +; RV64-NEXT: insb a0, a2, 1 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: beq a5, a4, .LBB0_6 +; RV64-NEXT: # %bb.5: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB0_6: +; RV64-NEXT: lbu a3, 11(sp) +; RV64-NEXT: lbu a4, 3(sp) +; RV64-NEXT: insb a0, a2, 2 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: beq a4, a3, .LBB0_8 +; RV64-NEXT: # %bb.7: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB0_8: +; RV64-NEXT: lbu a3, 12(sp) +; RV64-NEXT: lbu a4, 4(sp) +; RV64-NEXT: insb a0, a2, 3 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: beq a4, a3, .LBB0_10 +; RV64-NEXT: # %bb.9: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB0_10: +; RV64-NEXT: lbu a3, 13(sp) +; RV64-NEXT: lbu a4, 5(sp) +; RV64-NEXT: insb a0, a2, 4 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: beq a4, a3, .LBB0_12 +; RV64-NEXT: # %bb.11: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB0_12: +; RV64-NEXT: lbu a3, 14(sp) +; RV64-NEXT: lbu a4, 6(sp) +; RV64-NEXT: insb a0, a2, 5 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: beq a4, a3, .LBB0_14 +; RV64-NEXT: # %bb.13: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB0_14: +; RV64-NEXT: lbu a3, 15(sp) +; RV64-NEXT: lbu a4, 7(sp) +; RV64-NEXT: insb a0, a2, 6 +; RV64-NEXT: beq a4, a3, .LBB0_16 +; RV64-NEXT: # %bb.15: +; RV64-NEXT: mv a1, zero +; RV64-NEXT: .LBB0_16: +; RV64-NEXT: insb a0, a1, 7 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <4 x i8> + %tmp2 = bitcast i32 %b to <4 x i8> + %cmp = icmp eq <4 x i8> %tmp1, %tmp2 + %sext = sext <4 x i1> %cmp to <4 x i8> + %res = bitcast <4 x i8> %sext to i32 + ret i32 %res +} + +define i64 @seteqv8i8(i64 %a, i64 %b) nounwind { +; RV32-LABEL: seteqv8i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a3, 12(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: sw a0, 0(sp) +; RV32-NEXT: lbu a1, 13(sp) +; RV32-NEXT: lbu a2, 5(sp) +; RV32-NEXT: addi a6, zero, -1 +; RV32-NEXT: addi t0, zero, -1 +; RV32-NEXT: beq a2, a1, .LBB1_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv t0, zero +; RV32-NEXT: .LBB1_2: +; RV32-NEXT: lbu a1, 12(sp) +; RV32-NEXT: lbu a2, 4(sp) +; RV32-NEXT: addi a5, zero, -1 +; RV32-NEXT: beq a2, a1, .LBB1_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: mv a5, zero +; RV32-NEXT: .LBB1_4: +; RV32-NEXT: lbu a1, 14(sp) +; RV32-NEXT: lbu a2, 6(sp) +; RV32-NEXT: addi t1, zero, -1 +; RV32-NEXT: beq a2, a1, .LBB1_6 +; RV32-NEXT: # %bb.5: +; RV32-NEXT: mv t1, zero +; RV32-NEXT: .LBB1_6: +; RV32-NEXT: lbu a1, 15(sp) +; RV32-NEXT: lbu a2, 7(sp) +; RV32-NEXT: addi a7, zero, -1 +; RV32-NEXT: beq a2, a1, .LBB1_8 +; RV32-NEXT: # %bb.7: +; RV32-NEXT: mv a7, zero +; RV32-NEXT: .LBB1_8: +; RV32-NEXT: lbu a2, 9(sp) +; RV32-NEXT: lbu a3, 1(sp) +; RV32-NEXT: mv a1, zero +; RV32-NEXT: insb a1, a5, 0 +; RV32-NEXT: addi a5, zero, -1 +; RV32-NEXT: beq a3, a2, .LBB1_10 +; RV32-NEXT: # %bb.9: +; RV32-NEXT: mv a5, zero +; RV32-NEXT: .LBB1_10: +; RV32-NEXT: lbu a3, 8(sp) +; RV32-NEXT: lbu a0, 0(sp) +; RV32-NEXT: insb a1, t0, 1 +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: beq a0, a3, .LBB1_12 +; RV32-NEXT: # %bb.11: +; RV32-NEXT: mv a2, zero +; RV32-NEXT: .LBB1_12: +; RV32-NEXT: mv a0, zero +; RV32-NEXT: lbu a3, 10(sp) +; RV32-NEXT: lbu a4, 2(sp) +; RV32-NEXT: insb a1, t1, 2 +; RV32-NEXT: insb a0, a2, 0 +; RV32-NEXT: insb a0, a5, 1 +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: beq a4, a3, .LBB1_14 +; RV32-NEXT: # %bb.13: +; RV32-NEXT: mv a2, zero +; RV32-NEXT: .LBB1_14: +; RV32-NEXT: lbu a3, 11(sp) +; RV32-NEXT: lbu a4, 3(sp) +; RV32-NEXT: insb a1, a7, 3 +; RV32-NEXT: insb a0, a2, 2 +; RV32-NEXT: beq a4, a3, .LBB1_16 +; RV32-NEXT: # %bb.15: +; RV32-NEXT: mv a6, zero +; RV32-NEXT: .LBB1_16: +; RV32-NEXT: insb a0, a6, 3 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: seteqv8i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lbu a0, 9(sp) +; RV64-NEXT: lbu a3, 1(sp) +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: beq a3, a0, .LBB1_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB1_2: +; RV64-NEXT: lbu a0, 8(sp) +; RV64-NEXT: lbu a4, 0(sp) +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: beq a4, a0, .LBB1_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: mv a3, zero +; RV64-NEXT: .LBB1_4: +; RV64-NEXT: lbu a4, 10(sp) +; RV64-NEXT: lbu a5, 2(sp) +; RV64-NEXT: mv a0, zero +; RV64-NEXT: insb a0, a3, 0 +; RV64-NEXT: insb a0, a2, 1 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: beq a5, a4, .LBB1_6 +; RV64-NEXT: # %bb.5: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB1_6: +; RV64-NEXT: lbu a3, 11(sp) +; RV64-NEXT: lbu a4, 3(sp) +; RV64-NEXT: insb a0, a2, 2 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: beq a4, a3, .LBB1_8 +; RV64-NEXT: # %bb.7: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB1_8: +; RV64-NEXT: lbu a3, 12(sp) +; RV64-NEXT: lbu a4, 4(sp) +; RV64-NEXT: insb a0, a2, 3 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: beq a4, a3, .LBB1_10 +; RV64-NEXT: # %bb.9: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB1_10: +; RV64-NEXT: lbu a3, 13(sp) +; RV64-NEXT: lbu a4, 5(sp) +; RV64-NEXT: insb a0, a2, 4 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: beq a4, a3, .LBB1_12 +; RV64-NEXT: # %bb.11: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB1_12: +; RV64-NEXT: lbu a3, 14(sp) +; RV64-NEXT: lbu a4, 6(sp) +; RV64-NEXT: insb a0, a2, 5 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: beq a4, a3, .LBB1_14 +; RV64-NEXT: # %bb.13: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB1_14: +; RV64-NEXT: lbu a3, 15(sp) +; RV64-NEXT: lbu a4, 7(sp) +; RV64-NEXT: insb a0, a2, 6 +; RV64-NEXT: beq a4, a3, .LBB1_16 +; RV64-NEXT: # %bb.15: +; RV64-NEXT: mv a1, zero +; RV64-NEXT: .LBB1_16: +; RV64-NEXT: insb a0, a1, 7 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <8 x i8> + %tmp2 = bitcast i64 %b to <8 x i8> + %cmp = icmp eq <8 x i8> %tmp1, %tmp2 + %sext = sext <8 x i1> %cmp to <8 x i8> + %res = bitcast <8 x i8> %sext to i64 + ret i64 %res +} + +define i32 @setnev4i8(i32 %a, i32 %b) nounwind { +; RV32-LABEL: setnev4i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: lbu a0, 13(sp) +; RV32-NEXT: lbu a3, 9(sp) +; RV32-NEXT: addi a1, zero, -1 +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: bne a3, a0, .LBB2_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a2, zero +; RV32-NEXT: .LBB2_2: +; RV32-NEXT: lbu a0, 12(sp) +; RV32-NEXT: lbu a4, 8(sp) +; RV32-NEXT: addi a3, zero, -1 +; RV32-NEXT: bne a4, a0, .LBB2_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: mv a3, zero +; RV32-NEXT: .LBB2_4: +; RV32-NEXT: lbu a4, 14(sp) +; RV32-NEXT: lbu a5, 10(sp) +; RV32-NEXT: mv a0, zero +; RV32-NEXT: insb a0, a3, 0 +; RV32-NEXT: insb a0, a2, 1 +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: bne a5, a4, .LBB2_6 +; RV32-NEXT: # %bb.5: +; RV32-NEXT: mv a2, zero +; RV32-NEXT: .LBB2_6: +; RV32-NEXT: lbu a3, 15(sp) +; RV32-NEXT: lbu a4, 11(sp) +; RV32-NEXT: insb a0, a2, 2 +; RV32-NEXT: bne a4, a3, .LBB2_8 +; RV32-NEXT: # %bb.7: +; RV32-NEXT: mv a1, zero +; RV32-NEXT: .LBB2_8: +; RV32-NEXT: insb a0, a1, 3 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: setnev4i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lbu a0, 9(sp) +; RV64-NEXT: lbu a3, 1(sp) +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bne a3, a0, .LBB2_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB2_2: +; RV64-NEXT: lbu a0, 8(sp) +; RV64-NEXT: lbu a4, 0(sp) +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: bne a4, a0, .LBB2_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: mv a3, zero +; RV64-NEXT: .LBB2_4: +; RV64-NEXT: lbu a4, 10(sp) +; RV64-NEXT: lbu a5, 2(sp) +; RV64-NEXT: mv a0, zero +; RV64-NEXT: insb a0, a3, 0 +; RV64-NEXT: insb a0, a2, 1 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bne a5, a4, .LBB2_6 +; RV64-NEXT: # %bb.5: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB2_6: +; RV64-NEXT: lbu a3, 11(sp) +; RV64-NEXT: lbu a4, 3(sp) +; RV64-NEXT: insb a0, a2, 2 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bne a4, a3, .LBB2_8 +; RV64-NEXT: # %bb.7: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB2_8: +; RV64-NEXT: lbu a3, 12(sp) +; RV64-NEXT: lbu a4, 4(sp) +; RV64-NEXT: insb a0, a2, 3 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bne a4, a3, .LBB2_10 +; RV64-NEXT: # %bb.9: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB2_10: +; RV64-NEXT: lbu a3, 13(sp) +; RV64-NEXT: lbu a4, 5(sp) +; RV64-NEXT: insb a0, a2, 4 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bne a4, a3, .LBB2_12 +; RV64-NEXT: # %bb.11: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB2_12: +; RV64-NEXT: lbu a3, 14(sp) +; RV64-NEXT: lbu a4, 6(sp) +; RV64-NEXT: insb a0, a2, 5 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bne a4, a3, .LBB2_14 +; RV64-NEXT: # %bb.13: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB2_14: +; RV64-NEXT: lbu a3, 15(sp) +; RV64-NEXT: lbu a4, 7(sp) +; RV64-NEXT: insb a0, a2, 6 +; RV64-NEXT: bne a4, a3, .LBB2_16 +; RV64-NEXT: # %bb.15: +; RV64-NEXT: mv a1, zero +; RV64-NEXT: .LBB2_16: +; RV64-NEXT: insb a0, a1, 7 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <4 x i8> + %tmp2 = bitcast i32 %b to <4 x i8> + %cmp = icmp ne <4 x i8> %tmp1, %tmp2 + %sext = sext <4 x i1> %cmp to <4 x i8> + %res = bitcast <4 x i8> %sext to i32 + ret i32 %res +} + +define i64 @setnev8i8(i64 %a, i64 %b) nounwind { +; RV32-LABEL: setnev8i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a3, 12(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: sw a0, 0(sp) +; RV32-NEXT: lbu a1, 13(sp) +; RV32-NEXT: lbu a2, 5(sp) +; RV32-NEXT: addi a6, zero, -1 +; RV32-NEXT: addi t0, zero, -1 +; RV32-NEXT: bne a2, a1, .LBB3_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv t0, zero +; RV32-NEXT: .LBB3_2: +; RV32-NEXT: lbu a1, 12(sp) +; RV32-NEXT: lbu a2, 4(sp) +; RV32-NEXT: addi a5, zero, -1 +; RV32-NEXT: bne a2, a1, .LBB3_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: mv a5, zero +; RV32-NEXT: .LBB3_4: +; RV32-NEXT: lbu a1, 14(sp) +; RV32-NEXT: lbu a2, 6(sp) +; RV32-NEXT: addi t1, zero, -1 +; RV32-NEXT: bne a2, a1, .LBB3_6 +; RV32-NEXT: # %bb.5: +; RV32-NEXT: mv t1, zero +; RV32-NEXT: .LBB3_6: +; RV32-NEXT: lbu a1, 15(sp) +; RV32-NEXT: lbu a2, 7(sp) +; RV32-NEXT: addi a7, zero, -1 +; RV32-NEXT: bne a2, a1, .LBB3_8 +; RV32-NEXT: # %bb.7: +; RV32-NEXT: mv a7, zero +; RV32-NEXT: .LBB3_8: +; RV32-NEXT: lbu a2, 9(sp) +; RV32-NEXT: lbu a3, 1(sp) +; RV32-NEXT: mv a1, zero +; RV32-NEXT: insb a1, a5, 0 +; RV32-NEXT: addi a5, zero, -1 +; RV32-NEXT: bne a3, a2, .LBB3_10 +; RV32-NEXT: # %bb.9: +; RV32-NEXT: mv a5, zero +; RV32-NEXT: .LBB3_10: +; RV32-NEXT: lbu a3, 8(sp) +; RV32-NEXT: lbu a0, 0(sp) +; RV32-NEXT: insb a1, t0, 1 +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: bne a0, a3, .LBB3_12 +; RV32-NEXT: # %bb.11: +; RV32-NEXT: mv a2, zero +; RV32-NEXT: .LBB3_12: +; RV32-NEXT: mv a0, zero +; RV32-NEXT: lbu a3, 10(sp) +; RV32-NEXT: lbu a4, 2(sp) +; RV32-NEXT: insb a1, t1, 2 +; RV32-NEXT: insb a0, a2, 0 +; RV32-NEXT: insb a0, a5, 1 +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: bne a4, a3, .LBB3_14 +; RV32-NEXT: # %bb.13: +; RV32-NEXT: mv a2, zero +; RV32-NEXT: .LBB3_14: +; RV32-NEXT: lbu a3, 11(sp) +; RV32-NEXT: lbu a4, 3(sp) +; RV32-NEXT: insb a1, a7, 3 +; RV32-NEXT: insb a0, a2, 2 +; RV32-NEXT: bne a4, a3, .LBB3_16 +; RV32-NEXT: # %bb.15: +; RV32-NEXT: mv a6, zero +; RV32-NEXT: .LBB3_16: +; RV32-NEXT: insb a0, a6, 3 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: setnev8i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lbu a0, 9(sp) +; RV64-NEXT: lbu a3, 1(sp) +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bne a3, a0, .LBB3_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB3_2: +; RV64-NEXT: lbu a0, 8(sp) +; RV64-NEXT: lbu a4, 0(sp) +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: bne a4, a0, .LBB3_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: mv a3, zero +; RV64-NEXT: .LBB3_4: +; RV64-NEXT: lbu a4, 10(sp) +; RV64-NEXT: lbu a5, 2(sp) +; RV64-NEXT: mv a0, zero +; RV64-NEXT: insb a0, a3, 0 +; RV64-NEXT: insb a0, a2, 1 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bne a5, a4, .LBB3_6 +; RV64-NEXT: # %bb.5: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB3_6: +; RV64-NEXT: lbu a3, 11(sp) +; RV64-NEXT: lbu a4, 3(sp) +; RV64-NEXT: insb a0, a2, 2 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bne a4, a3, .LBB3_8 +; RV64-NEXT: # %bb.7: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB3_8: +; RV64-NEXT: lbu a3, 12(sp) +; RV64-NEXT: lbu a4, 4(sp) +; RV64-NEXT: insb a0, a2, 3 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bne a4, a3, .LBB3_10 +; RV64-NEXT: # %bb.9: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB3_10: +; RV64-NEXT: lbu a3, 13(sp) +; RV64-NEXT: lbu a4, 5(sp) +; RV64-NEXT: insb a0, a2, 4 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bne a4, a3, .LBB3_12 +; RV64-NEXT: # %bb.11: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB3_12: +; RV64-NEXT: lbu a3, 14(sp) +; RV64-NEXT: lbu a4, 6(sp) +; RV64-NEXT: insb a0, a2, 5 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bne a4, a3, .LBB3_14 +; RV64-NEXT: # %bb.13: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB3_14: +; RV64-NEXT: lbu a3, 15(sp) +; RV64-NEXT: lbu a4, 7(sp) +; RV64-NEXT: insb a0, a2, 6 +; RV64-NEXT: bne a4, a3, .LBB3_16 +; RV64-NEXT: # %bb.15: +; RV64-NEXT: mv a1, zero +; RV64-NEXT: .LBB3_16: +; RV64-NEXT: insb a0, a1, 7 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <8 x i8> + %tmp2 = bitcast i64 %b to <8 x i8> + %cmp = icmp ne <8 x i8> %tmp1, %tmp2 + %sext = sext <8 x i1> %cmp to <8 x i8> + %res = bitcast <8 x i8> %sext to i64 + ret i64 %res +} + +; scmplt8 + +define i32 @setltv4i8(i32 %a, i32 %b) nounwind { +; RV32-LABEL: setltv4i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: lb a1, 13(sp) +; RV32-NEXT: lb a2, 9(sp) +; RV32-NEXT: mv a0, zero +; RV32-NEXT: slli a1, a1, 24 +; RV32-NEXT: srai a1, a1, 24 +; RV32-NEXT: slli a2, a2, 24 +; RV32-NEXT: srai a2, a2, 24 +; RV32-NEXT: lb a3, 12(sp) +; RV32-NEXT: slt a1, a2, a1 +; RV32-NEXT: lb a2, 8(sp) +; RV32-NEXT: neg a1, a1 +; RV32-NEXT: slli a3, a3, 24 +; RV32-NEXT: srai a3, a3, 24 +; RV32-NEXT: slli a2, a2, 24 +; RV32-NEXT: srai a2, a2, 24 +; RV32-NEXT: slt a2, a2, a3 +; RV32-NEXT: neg a2, a2 +; RV32-NEXT: lb a3, 14(sp) +; RV32-NEXT: insb a0, a2, 0 +; RV32-NEXT: lb a2, 10(sp) +; RV32-NEXT: insb a0, a1, 1 +; RV32-NEXT: slli a1, a3, 24 +; RV32-NEXT: srai a1, a1, 24 +; RV32-NEXT: slli a2, a2, 24 +; RV32-NEXT: srai a2, a2, 24 +; RV32-NEXT: slt a1, a2, a1 +; RV32-NEXT: lb a2, 15(sp) +; RV32-NEXT: neg a1, a1 +; RV32-NEXT: lb a3, 11(sp) +; RV32-NEXT: insb a0, a1, 2 +; RV32-NEXT: slli a1, a2, 24 +; RV32-NEXT: srai a1, a1, 24 +; RV32-NEXT: slli a2, a3, 24 +; RV32-NEXT: srai a2, a2, 24 +; RV32-NEXT: slt a1, a2, a1 +; RV32-NEXT: neg a1, a1 +; RV32-NEXT: insb a0, a1, 3 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: setltv4i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lb a1, 9(sp) +; RV64-NEXT: lb a2, 1(sp) +; RV64-NEXT: mv a0, zero +; RV64-NEXT: slli a1, a1, 56 +; RV64-NEXT: srai a1, a1, 56 +; RV64-NEXT: slli a2, a2, 56 +; RV64-NEXT: srai a2, a2, 56 +; RV64-NEXT: lb a3, 8(sp) +; RV64-NEXT: slt a1, a2, a1 +; RV64-NEXT: lb a2, 0(sp) +; RV64-NEXT: neg a1, a1 +; RV64-NEXT: slli a3, a3, 56 +; RV64-NEXT: srai a3, a3, 56 +; RV64-NEXT: slli a2, a2, 56 +; RV64-NEXT: srai a2, a2, 56 +; RV64-NEXT: slt a2, a2, a3 +; RV64-NEXT: neg a2, a2 +; RV64-NEXT: lb a3, 10(sp) +; RV64-NEXT: insb a0, a2, 0 +; RV64-NEXT: lb a2, 2(sp) +; RV64-NEXT: insb a0, a1, 1 +; RV64-NEXT: slli a1, a3, 56 +; RV64-NEXT: srai a1, a1, 56 +; RV64-NEXT: slli a2, a2, 56 +; RV64-NEXT: srai a2, a2, 56 +; RV64-NEXT: slt a1, a2, a1 +; RV64-NEXT: lb a2, 11(sp) +; RV64-NEXT: neg a1, a1 +; RV64-NEXT: lb a3, 3(sp) +; RV64-NEXT: insb a0, a1, 2 +; RV64-NEXT: slli a1, a2, 56 +; RV64-NEXT: srai a1, a1, 56 +; RV64-NEXT: slli a2, a3, 56 +; RV64-NEXT: srai a2, a2, 56 +; RV64-NEXT: slt a1, a2, a1 +; RV64-NEXT: lb a2, 12(sp) +; RV64-NEXT: neg a1, a1 +; RV64-NEXT: lb a3, 4(sp) +; RV64-NEXT: insb a0, a1, 3 +; RV64-NEXT: slli a1, a2, 56 +; RV64-NEXT: srai a1, a1, 56 +; RV64-NEXT: slli a2, a3, 56 +; RV64-NEXT: srai a2, a2, 56 +; RV64-NEXT: slt a1, a2, a1 +; RV64-NEXT: lb a2, 13(sp) +; RV64-NEXT: neg a1, a1 +; RV64-NEXT: lb a3, 5(sp) +; RV64-NEXT: insb a0, a1, 4 +; RV64-NEXT: slli a1, a2, 56 +; RV64-NEXT: srai a1, a1, 56 +; RV64-NEXT: slli a2, a3, 56 +; RV64-NEXT: srai a2, a2, 56 +; RV64-NEXT: slt a1, a2, a1 +; RV64-NEXT: lb a2, 14(sp) +; RV64-NEXT: neg a1, a1 +; RV64-NEXT: lb a3, 6(sp) +; RV64-NEXT: insb a0, a1, 5 +; RV64-NEXT: slli a1, a2, 56 +; RV64-NEXT: srai a1, a1, 56 +; RV64-NEXT: slli a2, a3, 56 +; RV64-NEXT: srai a2, a2, 56 +; RV64-NEXT: slt a1, a2, a1 +; RV64-NEXT: lb a2, 15(sp) +; RV64-NEXT: neg a1, a1 +; RV64-NEXT: lb a3, 7(sp) +; RV64-NEXT: insb a0, a1, 6 +; RV64-NEXT: slli a1, a2, 56 +; RV64-NEXT: srai a1, a1, 56 +; RV64-NEXT: slli a2, a3, 56 +; RV64-NEXT: srai a2, a2, 56 +; RV64-NEXT: slt a1, a2, a1 +; RV64-NEXT: neg a1, a1 +; RV64-NEXT: insb a0, a1, 7 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <4 x i8> + %tmp2 = bitcast i32 %b to <4 x i8> + %cmp = icmp slt <4 x i8> %tmp1, %tmp2 + %sext = sext <4 x i1> %cmp to <4 x i8> + %res = bitcast <4 x i8> %sext to i32 + ret i32 %res +} + +define i64 @setltv8i8(i64 %a, i64 %b) nounwind { +; RV32-LABEL: setltv8i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a3, 12(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: sw a0, 0(sp) +; RV32-NEXT: lb a2, 13(sp) +; RV32-NEXT: lb a3, 5(sp) +; RV32-NEXT: mv a1, zero +; RV32-NEXT: mv a0, zero +; RV32-NEXT: slli a2, a2, 24 +; RV32-NEXT: srai a2, a2, 24 +; RV32-NEXT: slli a3, a3, 24 +; RV32-NEXT: srai a3, a3, 24 +; RV32-NEXT: lb a4, 12(sp) +; RV32-NEXT: slt a2, a3, a2 +; RV32-NEXT: lb a3, 4(sp) +; RV32-NEXT: neg a2, a2 +; RV32-NEXT: slli a4, a4, 24 +; RV32-NEXT: srai a4, a4, 24 +; RV32-NEXT: slli a3, a3, 24 +; RV32-NEXT: srai a3, a3, 24 +; RV32-NEXT: slt a3, a3, a4 +; RV32-NEXT: neg a3, a3 +; RV32-NEXT: lb a4, 14(sp) +; RV32-NEXT: insb a1, a3, 0 +; RV32-NEXT: lb a3, 6(sp) +; RV32-NEXT: insb a1, a2, 1 +; RV32-NEXT: slli a2, a4, 24 +; RV32-NEXT: srai a2, a2, 24 +; RV32-NEXT: slli a3, a3, 24 +; RV32-NEXT: srai a3, a3, 24 +; RV32-NEXT: slt a2, a3, a2 +; RV32-NEXT: lb a3, 15(sp) +; RV32-NEXT: neg a2, a2 +; RV32-NEXT: lb a4, 7(sp) +; RV32-NEXT: insb a1, a2, 2 +; RV32-NEXT: slli a2, a3, 24 +; RV32-NEXT: srai a2, a2, 24 +; RV32-NEXT: slli a3, a4, 24 +; RV32-NEXT: srai a3, a3, 24 +; RV32-NEXT: slt a2, a3, a2 +; RV32-NEXT: lb a3, 9(sp) +; RV32-NEXT: neg a2, a2 +; RV32-NEXT: lb a4, 1(sp) +; RV32-NEXT: insb a1, a2, 3 +; RV32-NEXT: slli a2, a3, 24 +; RV32-NEXT: srai a2, a2, 24 +; RV32-NEXT: slli a3, a4, 24 +; RV32-NEXT: srai a3, a3, 24 +; RV32-NEXT: lb a4, 8(sp) +; RV32-NEXT: slt a2, a3, a2 +; RV32-NEXT: lb a3, 0(sp) +; RV32-NEXT: neg a2, a2 +; RV32-NEXT: slli a4, a4, 24 +; RV32-NEXT: srai a4, a4, 24 +; RV32-NEXT: slli a3, a3, 24 +; RV32-NEXT: srai a3, a3, 24 +; RV32-NEXT: slt a3, a3, a4 +; RV32-NEXT: neg a3, a3 +; RV32-NEXT: lb a4, 10(sp) +; RV32-NEXT: insb a0, a3, 0 +; RV32-NEXT: lb a3, 2(sp) +; RV32-NEXT: insb a0, a2, 1 +; RV32-NEXT: slli a2, a4, 24 +; RV32-NEXT: srai a2, a2, 24 +; RV32-NEXT: slli a3, a3, 24 +; RV32-NEXT: srai a3, a3, 24 +; RV32-NEXT: slt a2, a3, a2 +; RV32-NEXT: lb a3, 11(sp) +; RV32-NEXT: neg a2, a2 +; RV32-NEXT: lb a4, 3(sp) +; RV32-NEXT: insb a0, a2, 2 +; RV32-NEXT: slli a2, a3, 24 +; RV32-NEXT: srai a2, a2, 24 +; RV32-NEXT: slli a3, a4, 24 +; RV32-NEXT: srai a3, a3, 24 +; RV32-NEXT: slt a2, a3, a2 +; RV32-NEXT: neg a2, a2 +; RV32-NEXT: insb a0, a2, 3 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: setltv8i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lb a1, 9(sp) +; RV64-NEXT: lb a2, 1(sp) +; RV64-NEXT: mv a0, zero +; RV64-NEXT: slli a1, a1, 56 +; RV64-NEXT: srai a1, a1, 56 +; RV64-NEXT: slli a2, a2, 56 +; RV64-NEXT: srai a2, a2, 56 +; RV64-NEXT: lb a3, 8(sp) +; RV64-NEXT: slt a1, a2, a1 +; RV64-NEXT: lb a2, 0(sp) +; RV64-NEXT: neg a1, a1 +; RV64-NEXT: slli a3, a3, 56 +; RV64-NEXT: srai a3, a3, 56 +; RV64-NEXT: slli a2, a2, 56 +; RV64-NEXT: srai a2, a2, 56 +; RV64-NEXT: slt a2, a2, a3 +; RV64-NEXT: neg a2, a2 +; RV64-NEXT: lb a3, 10(sp) +; RV64-NEXT: insb a0, a2, 0 +; RV64-NEXT: lb a2, 2(sp) +; RV64-NEXT: insb a0, a1, 1 +; RV64-NEXT: slli a1, a3, 56 +; RV64-NEXT: srai a1, a1, 56 +; RV64-NEXT: slli a2, a2, 56 +; RV64-NEXT: srai a2, a2, 56 +; RV64-NEXT: slt a1, a2, a1 +; RV64-NEXT: lb a2, 11(sp) +; RV64-NEXT: neg a1, a1 +; RV64-NEXT: lb a3, 3(sp) +; RV64-NEXT: insb a0, a1, 2 +; RV64-NEXT: slli a1, a2, 56 +; RV64-NEXT: srai a1, a1, 56 +; RV64-NEXT: slli a2, a3, 56 +; RV64-NEXT: srai a2, a2, 56 +; RV64-NEXT: slt a1, a2, a1 +; RV64-NEXT: lb a2, 12(sp) +; RV64-NEXT: neg a1, a1 +; RV64-NEXT: lb a3, 4(sp) +; RV64-NEXT: insb a0, a1, 3 +; RV64-NEXT: slli a1, a2, 56 +; RV64-NEXT: srai a1, a1, 56 +; RV64-NEXT: slli a2, a3, 56 +; RV64-NEXT: srai a2, a2, 56 +; RV64-NEXT: slt a1, a2, a1 +; RV64-NEXT: lb a2, 13(sp) +; RV64-NEXT: neg a1, a1 +; RV64-NEXT: lb a3, 5(sp) +; RV64-NEXT: insb a0, a1, 4 +; RV64-NEXT: slli a1, a2, 56 +; RV64-NEXT: srai a1, a1, 56 +; RV64-NEXT: slli a2, a3, 56 +; RV64-NEXT: srai a2, a2, 56 +; RV64-NEXT: slt a1, a2, a1 +; RV64-NEXT: lb a2, 14(sp) +; RV64-NEXT: neg a1, a1 +; RV64-NEXT: lb a3, 6(sp) +; RV64-NEXT: insb a0, a1, 5 +; RV64-NEXT: slli a1, a2, 56 +; RV64-NEXT: srai a1, a1, 56 +; RV64-NEXT: slli a2, a3, 56 +; RV64-NEXT: srai a2, a2, 56 +; RV64-NEXT: slt a1, a2, a1 +; RV64-NEXT: lb a2, 15(sp) +; RV64-NEXT: neg a1, a1 +; RV64-NEXT: lb a3, 7(sp) +; RV64-NEXT: insb a0, a1, 6 +; RV64-NEXT: slli a1, a2, 56 +; RV64-NEXT: srai a1, a1, 56 +; RV64-NEXT: slli a2, a3, 56 +; RV64-NEXT: srai a2, a2, 56 +; RV64-NEXT: slt a1, a2, a1 +; RV64-NEXT: neg a1, a1 +; RV64-NEXT: insb a0, a1, 7 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <8 x i8> + %tmp2 = bitcast i64 %b to <8 x i8> + %cmp = icmp slt <8 x i8> %tmp1, %tmp2 + %sext = sext <8 x i1> %cmp to <8 x i8> + %res = bitcast <8 x i8> %sext to i64 + ret i64 %res +} + +define i32 @setgtv4i8(i32 %a, i32 %b) nounwind { +; RV32-LABEL: setgtv4i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: lb a0, 9(sp) +; RV32-NEXT: lb a1, 13(sp) +; RV32-NEXT: slli a0, a0, 24 +; RV32-NEXT: srai a0, a0, 24 +; RV32-NEXT: slli a1, a1, 24 +; RV32-NEXT: srai a3, a1, 24 +; RV32-NEXT: addi a1, zero, -1 +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: blt a3, a0, .LBB6_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a2, zero +; RV32-NEXT: .LBB6_2: +; RV32-NEXT: lb a0, 8(sp) +; RV32-NEXT: lb a3, 12(sp) +; RV32-NEXT: slli a0, a0, 24 +; RV32-NEXT: srai a0, a0, 24 +; RV32-NEXT: slli a3, a3, 24 +; RV32-NEXT: srai a4, a3, 24 +; RV32-NEXT: addi a3, zero, -1 +; RV32-NEXT: blt a4, a0, .LBB6_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: mv a3, zero +; RV32-NEXT: .LBB6_4: +; RV32-NEXT: mv a0, zero +; RV32-NEXT: lb a4, 10(sp) +; RV32-NEXT: insb a0, a3, 0 +; RV32-NEXT: lb a3, 14(sp) +; RV32-NEXT: insb a0, a2, 1 +; RV32-NEXT: slli a2, a4, 24 +; RV32-NEXT: srai a4, a2, 24 +; RV32-NEXT: slli a2, a3, 24 +; RV32-NEXT: srai a3, a2, 24 +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: blt a3, a4, .LBB6_6 +; RV32-NEXT: # %bb.5: +; RV32-NEXT: mv a2, zero +; RV32-NEXT: .LBB6_6: +; RV32-NEXT: lb a3, 11(sp) +; RV32-NEXT: lb a4, 15(sp) +; RV32-NEXT: slli a3, a3, 24 +; RV32-NEXT: srai a3, a3, 24 +; RV32-NEXT: slli a4, a4, 24 +; RV32-NEXT: srai a4, a4, 24 +; RV32-NEXT: insb a0, a2, 2 +; RV32-NEXT: blt a4, a3, .LBB6_8 +; RV32-NEXT: # %bb.7: +; RV32-NEXT: mv a1, zero +; RV32-NEXT: .LBB6_8: +; RV32-NEXT: insb a0, a1, 3 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: setgtv4i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: lb a0, 1(sp) +; RV64-NEXT: lb a1, 9(sp) +; RV64-NEXT: slli a0, a0, 56 +; RV64-NEXT: srai a0, a0, 56 +; RV64-NEXT: slli a1, a1, 56 +; RV64-NEXT: srai a3, a1, 56 +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: blt a3, a0, .LBB6_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB6_2: +; RV64-NEXT: lb a0, 0(sp) +; RV64-NEXT: lb a3, 8(sp) +; RV64-NEXT: slli a0, a0, 56 +; RV64-NEXT: srai a0, a0, 56 +; RV64-NEXT: slli a3, a3, 56 +; RV64-NEXT: srai a4, a3, 56 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: blt a4, a0, .LBB6_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: mv a3, zero +; RV64-NEXT: .LBB6_4: +; RV64-NEXT: mv a0, zero +; RV64-NEXT: lb a4, 2(sp) +; RV64-NEXT: insb a0, a3, 0 +; RV64-NEXT: lb a3, 10(sp) +; RV64-NEXT: insb a0, a2, 1 +; RV64-NEXT: slli a2, a4, 56 +; RV64-NEXT: srai a4, a2, 56 +; RV64-NEXT: slli a2, a3, 56 +; RV64-NEXT: srai a3, a2, 56 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: blt a3, a4, .LBB6_6 +; RV64-NEXT: # %bb.5: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB6_6: +; RV64-NEXT: lb a3, 3(sp) +; RV64-NEXT: lb a4, 11(sp) +; RV64-NEXT: insb a0, a2, 2 +; RV64-NEXT: slli a2, a3, 56 +; RV64-NEXT: srai a3, a2, 56 +; RV64-NEXT: slli a2, a4, 56 +; RV64-NEXT: srai a4, a2, 56 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: blt a4, a3, .LBB6_8 +; RV64-NEXT: # %bb.7: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB6_8: +; RV64-NEXT: lb a3, 4(sp) +; RV64-NEXT: lb a4, 12(sp) +; RV64-NEXT: insb a0, a2, 3 +; RV64-NEXT: slli a2, a3, 56 +; RV64-NEXT: srai a3, a2, 56 +; RV64-NEXT: slli a2, a4, 56 +; RV64-NEXT: srai a4, a2, 56 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: blt a4, a3, .LBB6_10 +; RV64-NEXT: # %bb.9: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB6_10: +; RV64-NEXT: lb a3, 5(sp) +; RV64-NEXT: lb a4, 13(sp) +; RV64-NEXT: insb a0, a2, 4 +; RV64-NEXT: slli a2, a3, 56 +; RV64-NEXT: srai a3, a2, 56 +; RV64-NEXT: slli a2, a4, 56 +; RV64-NEXT: srai a4, a2, 56 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: blt a4, a3, .LBB6_12 +; RV64-NEXT: # %bb.11: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB6_12: +; RV64-NEXT: lb a3, 6(sp) +; RV64-NEXT: lb a4, 14(sp) +; RV64-NEXT: insb a0, a2, 5 +; RV64-NEXT: slli a2, a3, 56 +; RV64-NEXT: srai a3, a2, 56 +; RV64-NEXT: slli a2, a4, 56 +; RV64-NEXT: srai a4, a2, 56 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: blt a4, a3, .LBB6_14 +; RV64-NEXT: # %bb.13: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB6_14: +; RV64-NEXT: lb a3, 7(sp) +; RV64-NEXT: lb a4, 15(sp) +; RV64-NEXT: slli a3, a3, 56 +; RV64-NEXT: srai a3, a3, 56 +; RV64-NEXT: slli a4, a4, 56 +; RV64-NEXT: srai a4, a4, 56 +; RV64-NEXT: insb a0, a2, 6 +; RV64-NEXT: blt a4, a3, .LBB6_16 +; RV64-NEXT: # %bb.15: +; RV64-NEXT: mv a1, zero +; RV64-NEXT: .LBB6_16: +; RV64-NEXT: insb a0, a1, 7 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <4 x i8> + %tmp2 = bitcast i32 %b to <4 x i8> + %cmp = icmp sgt <4 x i8> %tmp1, %tmp2 + %sext = sext <4 x i1> %cmp to <4 x i8> + %res = bitcast <4 x i8> %sext to i32 + ret i32 %res +} + +define i64 @setgtv8i8(i64 %a, i64 %b) nounwind { +; RV32-LABEL: setgtv8i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a3, 12(sp) +; RV32-NEXT: sw a0, 0(sp) +; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: lb a0, 5(sp) +; RV32-NEXT: lb a1, 13(sp) +; RV32-NEXT: slli a0, a0, 24 +; RV32-NEXT: srai a2, a0, 24 +; RV32-NEXT: slli a0, a1, 24 +; RV32-NEXT: srai a1, a0, 24 +; RV32-NEXT: addi a6, zero, -1 +; RV32-NEXT: addi a7, zero, -1 +; RV32-NEXT: blt a1, a2, .LBB7_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a7, zero +; RV32-NEXT: .LBB7_2: +; RV32-NEXT: lb a1, 4(sp) +; RV32-NEXT: lb a2, 12(sp) +; RV32-NEXT: slli a1, a1, 24 +; RV32-NEXT: srai a1, a1, 24 +; RV32-NEXT: slli a2, a2, 24 +; RV32-NEXT: srai a2, a2, 24 +; RV32-NEXT: addi a5, zero, -1 +; RV32-NEXT: blt a2, a1, .LBB7_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: mv a5, zero +; RV32-NEXT: .LBB7_4: +; RV32-NEXT: lb a1, 6(sp) +; RV32-NEXT: lb a2, 14(sp) +; RV32-NEXT: slli a1, a1, 24 +; RV32-NEXT: srai a1, a1, 24 +; RV32-NEXT: slli a2, a2, 24 +; RV32-NEXT: srai a2, a2, 24 +; RV32-NEXT: addi a4, zero, -1 +; RV32-NEXT: blt a2, a1, .LBB7_6 +; RV32-NEXT: # %bb.5: +; RV32-NEXT: mv a4, zero +; RV32-NEXT: .LBB7_6: +; RV32-NEXT: lb a1, 7(sp) +; RV32-NEXT: lb a2, 15(sp) +; RV32-NEXT: slli a1, a1, 24 +; RV32-NEXT: srai a1, a1, 24 +; RV32-NEXT: slli a2, a2, 24 +; RV32-NEXT: srai a2, a2, 24 +; RV32-NEXT: addi t0, zero, -1 +; RV32-NEXT: blt a2, a1, .LBB7_8 +; RV32-NEXT: # %bb.7: +; RV32-NEXT: mv t0, zero +; RV32-NEXT: .LBB7_8: +; RV32-NEXT: lb a2, 1(sp) +; RV32-NEXT: mv a1, zero +; RV32-NEXT: lb a0, 9(sp) +; RV32-NEXT: insb a1, a5, 0 +; RV32-NEXT: slli a2, a2, 24 +; RV32-NEXT: srai a2, a2, 24 +; RV32-NEXT: slli a0, a0, 24 +; RV32-NEXT: srai a0, a0, 24 +; RV32-NEXT: addi a5, zero, -1 +; RV32-NEXT: blt a0, a2, .LBB7_10 +; RV32-NEXT: # %bb.9: +; RV32-NEXT: mv a5, zero +; RV32-NEXT: .LBB7_10: +; RV32-NEXT: lb a0, 0(sp) +; RV32-NEXT: lb a2, 8(sp) +; RV32-NEXT: insb a1, a7, 1 +; RV32-NEXT: slli a0, a0, 24 +; RV32-NEXT: srai a0, a0, 24 +; RV32-NEXT: slli a2, a2, 24 +; RV32-NEXT: srai a3, a2, 24 +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: blt a3, a0, .LBB7_12 +; RV32-NEXT: # %bb.11: +; RV32-NEXT: mv a2, zero +; RV32-NEXT: .LBB7_12: +; RV32-NEXT: mv a0, zero +; RV32-NEXT: insb a1, a4, 2 +; RV32-NEXT: lb a3, 2(sp) +; RV32-NEXT: insb a0, a2, 0 +; RV32-NEXT: lb a2, 10(sp) +; RV32-NEXT: insb a0, a5, 1 +; RV32-NEXT: slli a3, a3, 24 +; RV32-NEXT: srai a3, a3, 24 +; RV32-NEXT: slli a2, a2, 24 +; RV32-NEXT: srai a2, a2, 24 +; RV32-NEXT: addi a4, zero, -1 +; RV32-NEXT: blt a2, a3, .LBB7_14 +; RV32-NEXT: # %bb.13: +; RV32-NEXT: mv a4, zero +; RV32-NEXT: .LBB7_14: +; RV32-NEXT: lb a2, 3(sp) +; RV32-NEXT: lb a3, 11(sp) +; RV32-NEXT: insb a1, t0, 3 +; RV32-NEXT: slli a2, a2, 24 +; RV32-NEXT: srai a2, a2, 24 +; RV32-NEXT: slli a3, a3, 24 +; RV32-NEXT: srai a3, a3, 24 +; RV32-NEXT: insb a0, a4, 2 +; RV32-NEXT: blt a3, a2, .LBB7_16 +; RV32-NEXT: # %bb.15: +; RV32-NEXT: mv a6, zero +; RV32-NEXT: .LBB7_16: +; RV32-NEXT: insb a0, a6, 3 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: setgtv8i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: lb a0, 1(sp) +; RV64-NEXT: lb a1, 9(sp) +; RV64-NEXT: slli a0, a0, 56 +; RV64-NEXT: srai a0, a0, 56 +; RV64-NEXT: slli a1, a1, 56 +; RV64-NEXT: srai a3, a1, 56 +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: blt a3, a0, .LBB7_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB7_2: +; RV64-NEXT: lb a0, 0(sp) +; RV64-NEXT: lb a3, 8(sp) +; RV64-NEXT: slli a0, a0, 56 +; RV64-NEXT: srai a0, a0, 56 +; RV64-NEXT: slli a3, a3, 56 +; RV64-NEXT: srai a4, a3, 56 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: blt a4, a0, .LBB7_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: mv a3, zero +; RV64-NEXT: .LBB7_4: +; RV64-NEXT: mv a0, zero +; RV64-NEXT: lb a4, 2(sp) +; RV64-NEXT: insb a0, a3, 0 +; RV64-NEXT: lb a3, 10(sp) +; RV64-NEXT: insb a0, a2, 1 +; RV64-NEXT: slli a2, a4, 56 +; RV64-NEXT: srai a4, a2, 56 +; RV64-NEXT: slli a2, a3, 56 +; RV64-NEXT: srai a3, a2, 56 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: blt a3, a4, .LBB7_6 +; RV64-NEXT: # %bb.5: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB7_6: +; RV64-NEXT: lb a3, 3(sp) +; RV64-NEXT: lb a4, 11(sp) +; RV64-NEXT: insb a0, a2, 2 +; RV64-NEXT: slli a2, a3, 56 +; RV64-NEXT: srai a3, a2, 56 +; RV64-NEXT: slli a2, a4, 56 +; RV64-NEXT: srai a4, a2, 56 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: blt a4, a3, .LBB7_8 +; RV64-NEXT: # %bb.7: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB7_8: +; RV64-NEXT: lb a3, 4(sp) +; RV64-NEXT: lb a4, 12(sp) +; RV64-NEXT: insb a0, a2, 3 +; RV64-NEXT: slli a2, a3, 56 +; RV64-NEXT: srai a3, a2, 56 +; RV64-NEXT: slli a2, a4, 56 +; RV64-NEXT: srai a4, a2, 56 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: blt a4, a3, .LBB7_10 +; RV64-NEXT: # %bb.9: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB7_10: +; RV64-NEXT: lb a3, 5(sp) +; RV64-NEXT: lb a4, 13(sp) +; RV64-NEXT: insb a0, a2, 4 +; RV64-NEXT: slli a2, a3, 56 +; RV64-NEXT: srai a3, a2, 56 +; RV64-NEXT: slli a2, a4, 56 +; RV64-NEXT: srai a4, a2, 56 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: blt a4, a3, .LBB7_12 +; RV64-NEXT: # %bb.11: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB7_12: +; RV64-NEXT: lb a3, 6(sp) +; RV64-NEXT: lb a4, 14(sp) +; RV64-NEXT: insb a0, a2, 5 +; RV64-NEXT: slli a2, a3, 56 +; RV64-NEXT: srai a3, a2, 56 +; RV64-NEXT: slli a2, a4, 56 +; RV64-NEXT: srai a4, a2, 56 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: blt a4, a3, .LBB7_14 +; RV64-NEXT: # %bb.13: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB7_14: +; RV64-NEXT: lb a3, 7(sp) +; RV64-NEXT: lb a4, 15(sp) +; RV64-NEXT: slli a3, a3, 56 +; RV64-NEXT: srai a3, a3, 56 +; RV64-NEXT: slli a4, a4, 56 +; RV64-NEXT: srai a4, a4, 56 +; RV64-NEXT: insb a0, a2, 6 +; RV64-NEXT: blt a4, a3, .LBB7_16 +; RV64-NEXT: # %bb.15: +; RV64-NEXT: mv a1, zero +; RV64-NEXT: .LBB7_16: +; RV64-NEXT: insb a0, a1, 7 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <8 x i8> + %tmp2 = bitcast i64 %b to <8 x i8> + %cmp = icmp sgt <8 x i8> %tmp1, %tmp2 + %sext = sext <8 x i1> %cmp to <8 x i8> + %res = bitcast <8 x i8> %sext to i64 + ret i64 %res +} + +; scmple8 + +define i32 @setlev4i8(i32 %a, i32 %b) nounwind { +; RV32-LABEL: setlev4i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: lb a0, 9(sp) +; RV32-NEXT: lb a1, 13(sp) +; RV32-NEXT: slli a0, a0, 24 +; RV32-NEXT: srai a0, a0, 24 +; RV32-NEXT: slli a1, a1, 24 +; RV32-NEXT: srai a3, a1, 24 +; RV32-NEXT: addi a1, zero, -1 +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: bge a3, a0, .LBB8_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a2, zero +; RV32-NEXT: .LBB8_2: +; RV32-NEXT: lb a0, 8(sp) +; RV32-NEXT: lb a3, 12(sp) +; RV32-NEXT: slli a0, a0, 24 +; RV32-NEXT: srai a0, a0, 24 +; RV32-NEXT: slli a3, a3, 24 +; RV32-NEXT: srai a4, a3, 24 +; RV32-NEXT: addi a3, zero, -1 +; RV32-NEXT: bge a4, a0, .LBB8_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: mv a3, zero +; RV32-NEXT: .LBB8_4: +; RV32-NEXT: mv a0, zero +; RV32-NEXT: lb a4, 10(sp) +; RV32-NEXT: insb a0, a3, 0 +; RV32-NEXT: lb a3, 14(sp) +; RV32-NEXT: insb a0, a2, 1 +; RV32-NEXT: slli a2, a4, 24 +; RV32-NEXT: srai a4, a2, 24 +; RV32-NEXT: slli a2, a3, 24 +; RV32-NEXT: srai a3, a2, 24 +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: bge a3, a4, .LBB8_6 +; RV32-NEXT: # %bb.5: +; RV32-NEXT: mv a2, zero +; RV32-NEXT: .LBB8_6: +; RV32-NEXT: lb a3, 11(sp) +; RV32-NEXT: lb a4, 15(sp) +; RV32-NEXT: slli a3, a3, 24 +; RV32-NEXT: srai a3, a3, 24 +; RV32-NEXT: slli a4, a4, 24 +; RV32-NEXT: srai a4, a4, 24 +; RV32-NEXT: insb a0, a2, 2 +; RV32-NEXT: bge a4, a3, .LBB8_8 +; RV32-NEXT: # %bb.7: +; RV32-NEXT: mv a1, zero +; RV32-NEXT: .LBB8_8: +; RV32-NEXT: insb a0, a1, 3 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: setlev4i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: lb a0, 1(sp) +; RV64-NEXT: lb a1, 9(sp) +; RV64-NEXT: slli a0, a0, 56 +; RV64-NEXT: srai a0, a0, 56 +; RV64-NEXT: slli a1, a1, 56 +; RV64-NEXT: srai a3, a1, 56 +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bge a3, a0, .LBB8_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB8_2: +; RV64-NEXT: lb a0, 0(sp) +; RV64-NEXT: lb a3, 8(sp) +; RV64-NEXT: slli a0, a0, 56 +; RV64-NEXT: srai a0, a0, 56 +; RV64-NEXT: slli a3, a3, 56 +; RV64-NEXT: srai a4, a3, 56 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: bge a4, a0, .LBB8_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: mv a3, zero +; RV64-NEXT: .LBB8_4: +; RV64-NEXT: mv a0, zero +; RV64-NEXT: lb a4, 2(sp) +; RV64-NEXT: insb a0, a3, 0 +; RV64-NEXT: lb a3, 10(sp) +; RV64-NEXT: insb a0, a2, 1 +; RV64-NEXT: slli a2, a4, 56 +; RV64-NEXT: srai a4, a2, 56 +; RV64-NEXT: slli a2, a3, 56 +; RV64-NEXT: srai a3, a2, 56 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bge a3, a4, .LBB8_6 +; RV64-NEXT: # %bb.5: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB8_6: +; RV64-NEXT: lb a3, 3(sp) +; RV64-NEXT: lb a4, 11(sp) +; RV64-NEXT: insb a0, a2, 2 +; RV64-NEXT: slli a2, a3, 56 +; RV64-NEXT: srai a3, a2, 56 +; RV64-NEXT: slli a2, a4, 56 +; RV64-NEXT: srai a4, a2, 56 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bge a4, a3, .LBB8_8 +; RV64-NEXT: # %bb.7: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB8_8: +; RV64-NEXT: lb a3, 4(sp) +; RV64-NEXT: lb a4, 12(sp) +; RV64-NEXT: insb a0, a2, 3 +; RV64-NEXT: slli a2, a3, 56 +; RV64-NEXT: srai a3, a2, 56 +; RV64-NEXT: slli a2, a4, 56 +; RV64-NEXT: srai a4, a2, 56 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bge a4, a3, .LBB8_10 +; RV64-NEXT: # %bb.9: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB8_10: +; RV64-NEXT: lb a3, 5(sp) +; RV64-NEXT: lb a4, 13(sp) +; RV64-NEXT: insb a0, a2, 4 +; RV64-NEXT: slli a2, a3, 56 +; RV64-NEXT: srai a3, a2, 56 +; RV64-NEXT: slli a2, a4, 56 +; RV64-NEXT: srai a4, a2, 56 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bge a4, a3, .LBB8_12 +; RV64-NEXT: # %bb.11: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB8_12: +; RV64-NEXT: lb a3, 6(sp) +; RV64-NEXT: lb a4, 14(sp) +; RV64-NEXT: insb a0, a2, 5 +; RV64-NEXT: slli a2, a3, 56 +; RV64-NEXT: srai a3, a2, 56 +; RV64-NEXT: slli a2, a4, 56 +; RV64-NEXT: srai a4, a2, 56 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bge a4, a3, .LBB8_14 +; RV64-NEXT: # %bb.13: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB8_14: +; RV64-NEXT: lb a3, 7(sp) +; RV64-NEXT: lb a4, 15(sp) +; RV64-NEXT: slli a3, a3, 56 +; RV64-NEXT: srai a3, a3, 56 +; RV64-NEXT: slli a4, a4, 56 +; RV64-NEXT: srai a4, a4, 56 +; RV64-NEXT: insb a0, a2, 6 +; RV64-NEXT: bge a4, a3, .LBB8_16 +; RV64-NEXT: # %bb.15: +; RV64-NEXT: mv a1, zero +; RV64-NEXT: .LBB8_16: +; RV64-NEXT: insb a0, a1, 7 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <4 x i8> + %tmp2 = bitcast i32 %b to <4 x i8> + %cmp = icmp sle <4 x i8> %tmp1, %tmp2 + %sext = sext <4 x i1> %cmp to <4 x i8> + %res = bitcast <4 x i8> %sext to i32 + ret i32 %res +} + +define i64 @setlev8i8(i64 %a, i64 %b) nounwind { +; RV32-LABEL: setlev8i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a3, 12(sp) +; RV32-NEXT: sw a0, 0(sp) +; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: lb a0, 5(sp) +; RV32-NEXT: lb a1, 13(sp) +; RV32-NEXT: slli a0, a0, 24 +; RV32-NEXT: srai a2, a0, 24 +; RV32-NEXT: slli a0, a1, 24 +; RV32-NEXT: srai a1, a0, 24 +; RV32-NEXT: addi a6, zero, -1 +; RV32-NEXT: addi a7, zero, -1 +; RV32-NEXT: bge a1, a2, .LBB9_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a7, zero +; RV32-NEXT: .LBB9_2: +; RV32-NEXT: lb a1, 4(sp) +; RV32-NEXT: lb a2, 12(sp) +; RV32-NEXT: slli a1, a1, 24 +; RV32-NEXT: srai a1, a1, 24 +; RV32-NEXT: slli a2, a2, 24 +; RV32-NEXT: srai a2, a2, 24 +; RV32-NEXT: addi a5, zero, -1 +; RV32-NEXT: bge a2, a1, .LBB9_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: mv a5, zero +; RV32-NEXT: .LBB9_4: +; RV32-NEXT: lb a1, 6(sp) +; RV32-NEXT: lb a2, 14(sp) +; RV32-NEXT: slli a1, a1, 24 +; RV32-NEXT: srai a1, a1, 24 +; RV32-NEXT: slli a2, a2, 24 +; RV32-NEXT: srai a2, a2, 24 +; RV32-NEXT: addi a4, zero, -1 +; RV32-NEXT: bge a2, a1, .LBB9_6 +; RV32-NEXT: # %bb.5: +; RV32-NEXT: mv a4, zero +; RV32-NEXT: .LBB9_6: +; RV32-NEXT: lb a1, 7(sp) +; RV32-NEXT: lb a2, 15(sp) +; RV32-NEXT: slli a1, a1, 24 +; RV32-NEXT: srai a1, a1, 24 +; RV32-NEXT: slli a2, a2, 24 +; RV32-NEXT: srai a2, a2, 24 +; RV32-NEXT: addi t0, zero, -1 +; RV32-NEXT: bge a2, a1, .LBB9_8 +; RV32-NEXT: # %bb.7: +; RV32-NEXT: mv t0, zero +; RV32-NEXT: .LBB9_8: +; RV32-NEXT: lb a2, 1(sp) +; RV32-NEXT: mv a1, zero +; RV32-NEXT: lb a0, 9(sp) +; RV32-NEXT: insb a1, a5, 0 +; RV32-NEXT: slli a2, a2, 24 +; RV32-NEXT: srai a2, a2, 24 +; RV32-NEXT: slli a0, a0, 24 +; RV32-NEXT: srai a0, a0, 24 +; RV32-NEXT: addi a5, zero, -1 +; RV32-NEXT: bge a0, a2, .LBB9_10 +; RV32-NEXT: # %bb.9: +; RV32-NEXT: mv a5, zero +; RV32-NEXT: .LBB9_10: +; RV32-NEXT: lb a0, 0(sp) +; RV32-NEXT: lb a2, 8(sp) +; RV32-NEXT: insb a1, a7, 1 +; RV32-NEXT: slli a0, a0, 24 +; RV32-NEXT: srai a0, a0, 24 +; RV32-NEXT: slli a2, a2, 24 +; RV32-NEXT: srai a3, a2, 24 +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: bge a3, a0, .LBB9_12 +; RV32-NEXT: # %bb.11: +; RV32-NEXT: mv a2, zero +; RV32-NEXT: .LBB9_12: +; RV32-NEXT: mv a0, zero +; RV32-NEXT: insb a1, a4, 2 +; RV32-NEXT: lb a3, 2(sp) +; RV32-NEXT: insb a0, a2, 0 +; RV32-NEXT: lb a2, 10(sp) +; RV32-NEXT: insb a0, a5, 1 +; RV32-NEXT: slli a3, a3, 24 +; RV32-NEXT: srai a3, a3, 24 +; RV32-NEXT: slli a2, a2, 24 +; RV32-NEXT: srai a2, a2, 24 +; RV32-NEXT: addi a4, zero, -1 +; RV32-NEXT: bge a2, a3, .LBB9_14 +; RV32-NEXT: # %bb.13: +; RV32-NEXT: mv a4, zero +; RV32-NEXT: .LBB9_14: +; RV32-NEXT: lb a2, 3(sp) +; RV32-NEXT: lb a3, 11(sp) +; RV32-NEXT: insb a1, t0, 3 +; RV32-NEXT: slli a2, a2, 24 +; RV32-NEXT: srai a2, a2, 24 +; RV32-NEXT: slli a3, a3, 24 +; RV32-NEXT: srai a3, a3, 24 +; RV32-NEXT: insb a0, a4, 2 +; RV32-NEXT: bge a3, a2, .LBB9_16 +; RV32-NEXT: # %bb.15: +; RV32-NEXT: mv a6, zero +; RV32-NEXT: .LBB9_16: +; RV32-NEXT: insb a0, a6, 3 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: setlev8i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: lb a0, 1(sp) +; RV64-NEXT: lb a1, 9(sp) +; RV64-NEXT: slli a0, a0, 56 +; RV64-NEXT: srai a0, a0, 56 +; RV64-NEXT: slli a1, a1, 56 +; RV64-NEXT: srai a3, a1, 56 +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bge a3, a0, .LBB9_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB9_2: +; RV64-NEXT: lb a0, 0(sp) +; RV64-NEXT: lb a3, 8(sp) +; RV64-NEXT: slli a0, a0, 56 +; RV64-NEXT: srai a0, a0, 56 +; RV64-NEXT: slli a3, a3, 56 +; RV64-NEXT: srai a4, a3, 56 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: bge a4, a0, .LBB9_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: mv a3, zero +; RV64-NEXT: .LBB9_4: +; RV64-NEXT: mv a0, zero +; RV64-NEXT: lb a4, 2(sp) +; RV64-NEXT: insb a0, a3, 0 +; RV64-NEXT: lb a3, 10(sp) +; RV64-NEXT: insb a0, a2, 1 +; RV64-NEXT: slli a2, a4, 56 +; RV64-NEXT: srai a4, a2, 56 +; RV64-NEXT: slli a2, a3, 56 +; RV64-NEXT: srai a3, a2, 56 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bge a3, a4, .LBB9_6 +; RV64-NEXT: # %bb.5: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB9_6: +; RV64-NEXT: lb a3, 3(sp) +; RV64-NEXT: lb a4, 11(sp) +; RV64-NEXT: insb a0, a2, 2 +; RV64-NEXT: slli a2, a3, 56 +; RV64-NEXT: srai a3, a2, 56 +; RV64-NEXT: slli a2, a4, 56 +; RV64-NEXT: srai a4, a2, 56 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bge a4, a3, .LBB9_8 +; RV64-NEXT: # %bb.7: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB9_8: +; RV64-NEXT: lb a3, 4(sp) +; RV64-NEXT: lb a4, 12(sp) +; RV64-NEXT: insb a0, a2, 3 +; RV64-NEXT: slli a2, a3, 56 +; RV64-NEXT: srai a3, a2, 56 +; RV64-NEXT: slli a2, a4, 56 +; RV64-NEXT: srai a4, a2, 56 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bge a4, a3, .LBB9_10 +; RV64-NEXT: # %bb.9: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB9_10: +; RV64-NEXT: lb a3, 5(sp) +; RV64-NEXT: lb a4, 13(sp) +; RV64-NEXT: insb a0, a2, 4 +; RV64-NEXT: slli a2, a3, 56 +; RV64-NEXT: srai a3, a2, 56 +; RV64-NEXT: slli a2, a4, 56 +; RV64-NEXT: srai a4, a2, 56 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bge a4, a3, .LBB9_12 +; RV64-NEXT: # %bb.11: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB9_12: +; RV64-NEXT: lb a3, 6(sp) +; RV64-NEXT: lb a4, 14(sp) +; RV64-NEXT: insb a0, a2, 5 +; RV64-NEXT: slli a2, a3, 56 +; RV64-NEXT: srai a3, a2, 56 +; RV64-NEXT: slli a2, a4, 56 +; RV64-NEXT: srai a4, a2, 56 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bge a4, a3, .LBB9_14 +; RV64-NEXT: # %bb.13: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB9_14: +; RV64-NEXT: lb a3, 7(sp) +; RV64-NEXT: lb a4, 15(sp) +; RV64-NEXT: slli a3, a3, 56 +; RV64-NEXT: srai a3, a3, 56 +; RV64-NEXT: slli a4, a4, 56 +; RV64-NEXT: srai a4, a4, 56 +; RV64-NEXT: insb a0, a2, 6 +; RV64-NEXT: bge a4, a3, .LBB9_16 +; RV64-NEXT: # %bb.15: +; RV64-NEXT: mv a1, zero +; RV64-NEXT: .LBB9_16: +; RV64-NEXT: insb a0, a1, 7 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <8 x i8> + %tmp2 = bitcast i64 %b to <8 x i8> + %cmp = icmp sle <8 x i8> %tmp1, %tmp2 + %sext = sext <8 x i1> %cmp to <8 x i8> + %res = bitcast <8 x i8> %sext to i64 + ret i64 %res +} + +define i32 @setgev4i8(i32 %a, i32 %b) nounwind { +; RV32-LABEL: setgev4i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: lb a0, 13(sp) +; RV32-NEXT: lb a1, 9(sp) +; RV32-NEXT: slli a0, a0, 24 +; RV32-NEXT: srai a0, a0, 24 +; RV32-NEXT: slli a1, a1, 24 +; RV32-NEXT: srai a3, a1, 24 +; RV32-NEXT: addi a1, zero, -1 +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: bge a3, a0, .LBB10_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a2, zero +; RV32-NEXT: .LBB10_2: +; RV32-NEXT: lb a0, 12(sp) +; RV32-NEXT: lb a3, 8(sp) +; RV32-NEXT: slli a0, a0, 24 +; RV32-NEXT: srai a0, a0, 24 +; RV32-NEXT: slli a3, a3, 24 +; RV32-NEXT: srai a4, a3, 24 +; RV32-NEXT: addi a3, zero, -1 +; RV32-NEXT: bge a4, a0, .LBB10_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: mv a3, zero +; RV32-NEXT: .LBB10_4: +; RV32-NEXT: mv a0, zero +; RV32-NEXT: lb a4, 14(sp) +; RV32-NEXT: insb a0, a3, 0 +; RV32-NEXT: lb a3, 10(sp) +; RV32-NEXT: insb a0, a2, 1 +; RV32-NEXT: slli a2, a4, 24 +; RV32-NEXT: srai a4, a2, 24 +; RV32-NEXT: slli a2, a3, 24 +; RV32-NEXT: srai a3, a2, 24 +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: bge a3, a4, .LBB10_6 +; RV32-NEXT: # %bb.5: +; RV32-NEXT: mv a2, zero +; RV32-NEXT: .LBB10_6: +; RV32-NEXT: lb a3, 15(sp) +; RV32-NEXT: lb a4, 11(sp) +; RV32-NEXT: slli a3, a3, 24 +; RV32-NEXT: srai a3, a3, 24 +; RV32-NEXT: slli a4, a4, 24 +; RV32-NEXT: srai a4, a4, 24 +; RV32-NEXT: insb a0, a2, 2 +; RV32-NEXT: bge a4, a3, .LBB10_8 +; RV32-NEXT: # %bb.7: +; RV32-NEXT: mv a1, zero +; RV32-NEXT: .LBB10_8: +; RV32-NEXT: insb a0, a1, 3 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: setgev4i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lb a0, 9(sp) +; RV64-NEXT: lb a1, 1(sp) +; RV64-NEXT: slli a0, a0, 56 +; RV64-NEXT: srai a0, a0, 56 +; RV64-NEXT: slli a1, a1, 56 +; RV64-NEXT: srai a3, a1, 56 +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bge a3, a0, .LBB10_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB10_2: +; RV64-NEXT: lb a0, 8(sp) +; RV64-NEXT: lb a3, 0(sp) +; RV64-NEXT: slli a0, a0, 56 +; RV64-NEXT: srai a0, a0, 56 +; RV64-NEXT: slli a3, a3, 56 +; RV64-NEXT: srai a4, a3, 56 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: bge a4, a0, .LBB10_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: mv a3, zero +; RV64-NEXT: .LBB10_4: +; RV64-NEXT: mv a0, zero +; RV64-NEXT: lb a4, 10(sp) +; RV64-NEXT: insb a0, a3, 0 +; RV64-NEXT: lb a3, 2(sp) +; RV64-NEXT: insb a0, a2, 1 +; RV64-NEXT: slli a2, a4, 56 +; RV64-NEXT: srai a4, a2, 56 +; RV64-NEXT: slli a2, a3, 56 +; RV64-NEXT: srai a3, a2, 56 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bge a3, a4, .LBB10_6 +; RV64-NEXT: # %bb.5: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB10_6: +; RV64-NEXT: lb a3, 11(sp) +; RV64-NEXT: lb a4, 3(sp) +; RV64-NEXT: insb a0, a2, 2 +; RV64-NEXT: slli a2, a3, 56 +; RV64-NEXT: srai a3, a2, 56 +; RV64-NEXT: slli a2, a4, 56 +; RV64-NEXT: srai a4, a2, 56 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bge a4, a3, .LBB10_8 +; RV64-NEXT: # %bb.7: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB10_8: +; RV64-NEXT: lb a3, 12(sp) +; RV64-NEXT: lb a4, 4(sp) +; RV64-NEXT: insb a0, a2, 3 +; RV64-NEXT: slli a2, a3, 56 +; RV64-NEXT: srai a3, a2, 56 +; RV64-NEXT: slli a2, a4, 56 +; RV64-NEXT: srai a4, a2, 56 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bge a4, a3, .LBB10_10 +; RV64-NEXT: # %bb.9: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB10_10: +; RV64-NEXT: lb a3, 13(sp) +; RV64-NEXT: lb a4, 5(sp) +; RV64-NEXT: insb a0, a2, 4 +; RV64-NEXT: slli a2, a3, 56 +; RV64-NEXT: srai a3, a2, 56 +; RV64-NEXT: slli a2, a4, 56 +; RV64-NEXT: srai a4, a2, 56 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bge a4, a3, .LBB10_12 +; RV64-NEXT: # %bb.11: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB10_12: +; RV64-NEXT: lb a3, 14(sp) +; RV64-NEXT: lb a4, 6(sp) +; RV64-NEXT: insb a0, a2, 5 +; RV64-NEXT: slli a2, a3, 56 +; RV64-NEXT: srai a3, a2, 56 +; RV64-NEXT: slli a2, a4, 56 +; RV64-NEXT: srai a4, a2, 56 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bge a4, a3, .LBB10_14 +; RV64-NEXT: # %bb.13: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB10_14: +; RV64-NEXT: lb a3, 15(sp) +; RV64-NEXT: lb a4, 7(sp) +; RV64-NEXT: slli a3, a3, 56 +; RV64-NEXT: srai a3, a3, 56 +; RV64-NEXT: slli a4, a4, 56 +; RV64-NEXT: srai a4, a4, 56 +; RV64-NEXT: insb a0, a2, 6 +; RV64-NEXT: bge a4, a3, .LBB10_16 +; RV64-NEXT: # %bb.15: +; RV64-NEXT: mv a1, zero +; RV64-NEXT: .LBB10_16: +; RV64-NEXT: insb a0, a1, 7 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <4 x i8> + %tmp2 = bitcast i32 %b to <4 x i8> + %cmp = icmp sge <4 x i8> %tmp1, %tmp2 + %sext = sext <4 x i1> %cmp to <4 x i8> + %res = bitcast <4 x i8> %sext to i32 + ret i32 %res +} + +define i64 @setgev8i8(i64 %a, i64 %b) nounwind { +; RV32-LABEL: setgev8i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a3, 12(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: sw a0, 0(sp) +; RV32-NEXT: lb a0, 13(sp) +; RV32-NEXT: lb a1, 5(sp) +; RV32-NEXT: slli a0, a0, 24 +; RV32-NEXT: srai a2, a0, 24 +; RV32-NEXT: slli a0, a1, 24 +; RV32-NEXT: srai a1, a0, 24 +; RV32-NEXT: addi a6, zero, -1 +; RV32-NEXT: addi a7, zero, -1 +; RV32-NEXT: bge a1, a2, .LBB11_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a7, zero +; RV32-NEXT: .LBB11_2: +; RV32-NEXT: lb a1, 12(sp) +; RV32-NEXT: lb a2, 4(sp) +; RV32-NEXT: slli a1, a1, 24 +; RV32-NEXT: srai a1, a1, 24 +; RV32-NEXT: slli a2, a2, 24 +; RV32-NEXT: srai a2, a2, 24 +; RV32-NEXT: addi a5, zero, -1 +; RV32-NEXT: bge a2, a1, .LBB11_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: mv a5, zero +; RV32-NEXT: .LBB11_4: +; RV32-NEXT: lb a1, 14(sp) +; RV32-NEXT: lb a2, 6(sp) +; RV32-NEXT: slli a1, a1, 24 +; RV32-NEXT: srai a1, a1, 24 +; RV32-NEXT: slli a2, a2, 24 +; RV32-NEXT: srai a2, a2, 24 +; RV32-NEXT: addi a4, zero, -1 +; RV32-NEXT: bge a2, a1, .LBB11_6 +; RV32-NEXT: # %bb.5: +; RV32-NEXT: mv a4, zero +; RV32-NEXT: .LBB11_6: +; RV32-NEXT: lb a1, 15(sp) +; RV32-NEXT: lb a2, 7(sp) +; RV32-NEXT: slli a1, a1, 24 +; RV32-NEXT: srai a1, a1, 24 +; RV32-NEXT: slli a2, a2, 24 +; RV32-NEXT: srai a2, a2, 24 +; RV32-NEXT: addi t0, zero, -1 +; RV32-NEXT: bge a2, a1, .LBB11_8 +; RV32-NEXT: # %bb.7: +; RV32-NEXT: mv t0, zero +; RV32-NEXT: .LBB11_8: +; RV32-NEXT: lb a2, 9(sp) +; RV32-NEXT: mv a1, zero +; RV32-NEXT: lb a0, 1(sp) +; RV32-NEXT: insb a1, a5, 0 +; RV32-NEXT: slli a2, a2, 24 +; RV32-NEXT: srai a2, a2, 24 +; RV32-NEXT: slli a0, a0, 24 +; RV32-NEXT: srai a0, a0, 24 +; RV32-NEXT: addi a5, zero, -1 +; RV32-NEXT: bge a0, a2, .LBB11_10 +; RV32-NEXT: # %bb.9: +; RV32-NEXT: mv a5, zero +; RV32-NEXT: .LBB11_10: +; RV32-NEXT: lb a0, 8(sp) +; RV32-NEXT: lb a2, 0(sp) +; RV32-NEXT: insb a1, a7, 1 +; RV32-NEXT: slli a0, a0, 24 +; RV32-NEXT: srai a0, a0, 24 +; RV32-NEXT: slli a2, a2, 24 +; RV32-NEXT: srai a3, a2, 24 +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: bge a3, a0, .LBB11_12 +; RV32-NEXT: # %bb.11: +; RV32-NEXT: mv a2, zero +; RV32-NEXT: .LBB11_12: +; RV32-NEXT: mv a0, zero +; RV32-NEXT: insb a1, a4, 2 +; RV32-NEXT: lb a3, 10(sp) +; RV32-NEXT: insb a0, a2, 0 +; RV32-NEXT: lb a2, 2(sp) +; RV32-NEXT: insb a0, a5, 1 +; RV32-NEXT: slli a3, a3, 24 +; RV32-NEXT: srai a3, a3, 24 +; RV32-NEXT: slli a2, a2, 24 +; RV32-NEXT: srai a2, a2, 24 +; RV32-NEXT: addi a4, zero, -1 +; RV32-NEXT: bge a2, a3, .LBB11_14 +; RV32-NEXT: # %bb.13: +; RV32-NEXT: mv a4, zero +; RV32-NEXT: .LBB11_14: +; RV32-NEXT: lb a2, 11(sp) +; RV32-NEXT: lb a3, 3(sp) +; RV32-NEXT: insb a1, t0, 3 +; RV32-NEXT: slli a2, a2, 24 +; RV32-NEXT: srai a2, a2, 24 +; RV32-NEXT: slli a3, a3, 24 +; RV32-NEXT: srai a3, a3, 24 +; RV32-NEXT: insb a0, a4, 2 +; RV32-NEXT: bge a3, a2, .LBB11_16 +; RV32-NEXT: # %bb.15: +; RV32-NEXT: mv a6, zero +; RV32-NEXT: .LBB11_16: +; RV32-NEXT: insb a0, a6, 3 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: setgev8i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lb a0, 9(sp) +; RV64-NEXT: lb a1, 1(sp) +; RV64-NEXT: slli a0, a0, 56 +; RV64-NEXT: srai a0, a0, 56 +; RV64-NEXT: slli a1, a1, 56 +; RV64-NEXT: srai a3, a1, 56 +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bge a3, a0, .LBB11_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB11_2: +; RV64-NEXT: lb a0, 8(sp) +; RV64-NEXT: lb a3, 0(sp) +; RV64-NEXT: slli a0, a0, 56 +; RV64-NEXT: srai a0, a0, 56 +; RV64-NEXT: slli a3, a3, 56 +; RV64-NEXT: srai a4, a3, 56 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: bge a4, a0, .LBB11_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: mv a3, zero +; RV64-NEXT: .LBB11_4: +; RV64-NEXT: mv a0, zero +; RV64-NEXT: lb a4, 10(sp) +; RV64-NEXT: insb a0, a3, 0 +; RV64-NEXT: lb a3, 2(sp) +; RV64-NEXT: insb a0, a2, 1 +; RV64-NEXT: slli a2, a4, 56 +; RV64-NEXT: srai a4, a2, 56 +; RV64-NEXT: slli a2, a3, 56 +; RV64-NEXT: srai a3, a2, 56 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bge a3, a4, .LBB11_6 +; RV64-NEXT: # %bb.5: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB11_6: +; RV64-NEXT: lb a3, 11(sp) +; RV64-NEXT: lb a4, 3(sp) +; RV64-NEXT: insb a0, a2, 2 +; RV64-NEXT: slli a2, a3, 56 +; RV64-NEXT: srai a3, a2, 56 +; RV64-NEXT: slli a2, a4, 56 +; RV64-NEXT: srai a4, a2, 56 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bge a4, a3, .LBB11_8 +; RV64-NEXT: # %bb.7: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB11_8: +; RV64-NEXT: lb a3, 12(sp) +; RV64-NEXT: lb a4, 4(sp) +; RV64-NEXT: insb a0, a2, 3 +; RV64-NEXT: slli a2, a3, 56 +; RV64-NEXT: srai a3, a2, 56 +; RV64-NEXT: slli a2, a4, 56 +; RV64-NEXT: srai a4, a2, 56 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bge a4, a3, .LBB11_10 +; RV64-NEXT: # %bb.9: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB11_10: +; RV64-NEXT: lb a3, 13(sp) +; RV64-NEXT: lb a4, 5(sp) +; RV64-NEXT: insb a0, a2, 4 +; RV64-NEXT: slli a2, a3, 56 +; RV64-NEXT: srai a3, a2, 56 +; RV64-NEXT: slli a2, a4, 56 +; RV64-NEXT: srai a4, a2, 56 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bge a4, a3, .LBB11_12 +; RV64-NEXT: # %bb.11: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB11_12: +; RV64-NEXT: lb a3, 14(sp) +; RV64-NEXT: lb a4, 6(sp) +; RV64-NEXT: insb a0, a2, 5 +; RV64-NEXT: slli a2, a3, 56 +; RV64-NEXT: srai a3, a2, 56 +; RV64-NEXT: slli a2, a4, 56 +; RV64-NEXT: srai a4, a2, 56 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bge a4, a3, .LBB11_14 +; RV64-NEXT: # %bb.13: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB11_14: +; RV64-NEXT: lb a3, 15(sp) +; RV64-NEXT: lb a4, 7(sp) +; RV64-NEXT: slli a3, a3, 56 +; RV64-NEXT: srai a3, a3, 56 +; RV64-NEXT: slli a4, a4, 56 +; RV64-NEXT: srai a4, a4, 56 +; RV64-NEXT: insb a0, a2, 6 +; RV64-NEXT: bge a4, a3, .LBB11_16 +; RV64-NEXT: # %bb.15: +; RV64-NEXT: mv a1, zero +; RV64-NEXT: .LBB11_16: +; RV64-NEXT: insb a0, a1, 7 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <8 x i8> + %tmp2 = bitcast i64 %b to <8 x i8> + %cmp = icmp sge <8 x i8> %tmp1, %tmp2 + %sext = sext <8 x i1> %cmp to <8 x i8> + %res = bitcast <8 x i8> %sext to i64 + ret i64 %res +} + +; ucmplt8 + +define i32 @setultv4i8(i32 %a, i32 %b) nounwind { +; RV32-LABEL: setultv4i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: lbu a0, 13(sp) +; RV32-NEXT: lbu a3, 9(sp) +; RV32-NEXT: addi a1, zero, -1 +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: bltu a3, a0, .LBB12_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a2, zero +; RV32-NEXT: .LBB12_2: +; RV32-NEXT: lbu a0, 12(sp) +; RV32-NEXT: lbu a4, 8(sp) +; RV32-NEXT: addi a3, zero, -1 +; RV32-NEXT: bltu a4, a0, .LBB12_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: mv a3, zero +; RV32-NEXT: .LBB12_4: +; RV32-NEXT: lbu a4, 14(sp) +; RV32-NEXT: lbu a5, 10(sp) +; RV32-NEXT: mv a0, zero +; RV32-NEXT: insb a0, a3, 0 +; RV32-NEXT: insb a0, a2, 1 +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: bltu a5, a4, .LBB12_6 +; RV32-NEXT: # %bb.5: +; RV32-NEXT: mv a2, zero +; RV32-NEXT: .LBB12_6: +; RV32-NEXT: lbu a3, 15(sp) +; RV32-NEXT: lbu a4, 11(sp) +; RV32-NEXT: insb a0, a2, 2 +; RV32-NEXT: bltu a4, a3, .LBB12_8 +; RV32-NEXT: # %bb.7: +; RV32-NEXT: mv a1, zero +; RV32-NEXT: .LBB12_8: +; RV32-NEXT: insb a0, a1, 3 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: setultv4i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lbu a0, 9(sp) +; RV64-NEXT: lbu a3, 1(sp) +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bltu a3, a0, .LBB12_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB12_2: +; RV64-NEXT: lbu a0, 8(sp) +; RV64-NEXT: lbu a4, 0(sp) +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: bltu a4, a0, .LBB12_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: mv a3, zero +; RV64-NEXT: .LBB12_4: +; RV64-NEXT: lbu a4, 10(sp) +; RV64-NEXT: lbu a5, 2(sp) +; RV64-NEXT: mv a0, zero +; RV64-NEXT: insb a0, a3, 0 +; RV64-NEXT: insb a0, a2, 1 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bltu a5, a4, .LBB12_6 +; RV64-NEXT: # %bb.5: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB12_6: +; RV64-NEXT: lbu a3, 11(sp) +; RV64-NEXT: lbu a4, 3(sp) +; RV64-NEXT: insb a0, a2, 2 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bltu a4, a3, .LBB12_8 +; RV64-NEXT: # %bb.7: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB12_8: +; RV64-NEXT: lbu a3, 12(sp) +; RV64-NEXT: lbu a4, 4(sp) +; RV64-NEXT: insb a0, a2, 3 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bltu a4, a3, .LBB12_10 +; RV64-NEXT: # %bb.9: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB12_10: +; RV64-NEXT: lbu a3, 13(sp) +; RV64-NEXT: lbu a4, 5(sp) +; RV64-NEXT: insb a0, a2, 4 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bltu a4, a3, .LBB12_12 +; RV64-NEXT: # %bb.11: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB12_12: +; RV64-NEXT: lbu a3, 14(sp) +; RV64-NEXT: lbu a4, 6(sp) +; RV64-NEXT: insb a0, a2, 5 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bltu a4, a3, .LBB12_14 +; RV64-NEXT: # %bb.13: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB12_14: +; RV64-NEXT: lbu a3, 15(sp) +; RV64-NEXT: lbu a4, 7(sp) +; RV64-NEXT: insb a0, a2, 6 +; RV64-NEXT: bltu a4, a3, .LBB12_16 +; RV64-NEXT: # %bb.15: +; RV64-NEXT: mv a1, zero +; RV64-NEXT: .LBB12_16: +; RV64-NEXT: insb a0, a1, 7 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <4 x i8> + %tmp2 = bitcast i32 %b to <4 x i8> + %cmp = icmp ult <4 x i8> %tmp1, %tmp2 + %sext = sext <4 x i1> %cmp to <4 x i8> + %res = bitcast <4 x i8> %sext to i32 + ret i32 %res +} + +define i64 @setultv8i8(i64 %a, i64 %b) nounwind { +; RV32-LABEL: setultv8i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a3, 12(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: sw a0, 0(sp) +; RV32-NEXT: lbu a1, 13(sp) +; RV32-NEXT: lbu a2, 5(sp) +; RV32-NEXT: addi a6, zero, -1 +; RV32-NEXT: addi t0, zero, -1 +; RV32-NEXT: bltu a2, a1, .LBB13_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv t0, zero +; RV32-NEXT: .LBB13_2: +; RV32-NEXT: lbu a1, 12(sp) +; RV32-NEXT: lbu a2, 4(sp) +; RV32-NEXT: addi a5, zero, -1 +; RV32-NEXT: bltu a2, a1, .LBB13_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: mv a5, zero +; RV32-NEXT: .LBB13_4: +; RV32-NEXT: lbu a1, 14(sp) +; RV32-NEXT: lbu a2, 6(sp) +; RV32-NEXT: addi t1, zero, -1 +; RV32-NEXT: bltu a2, a1, .LBB13_6 +; RV32-NEXT: # %bb.5: +; RV32-NEXT: mv t1, zero +; RV32-NEXT: .LBB13_6: +; RV32-NEXT: lbu a1, 15(sp) +; RV32-NEXT: lbu a2, 7(sp) +; RV32-NEXT: addi a7, zero, -1 +; RV32-NEXT: bltu a2, a1, .LBB13_8 +; RV32-NEXT: # %bb.7: +; RV32-NEXT: mv a7, zero +; RV32-NEXT: .LBB13_8: +; RV32-NEXT: lbu a2, 9(sp) +; RV32-NEXT: lbu a3, 1(sp) +; RV32-NEXT: mv a1, zero +; RV32-NEXT: insb a1, a5, 0 +; RV32-NEXT: addi a5, zero, -1 +; RV32-NEXT: bltu a3, a2, .LBB13_10 +; RV32-NEXT: # %bb.9: +; RV32-NEXT: mv a5, zero +; RV32-NEXT: .LBB13_10: +; RV32-NEXT: lbu a3, 8(sp) +; RV32-NEXT: lbu a0, 0(sp) +; RV32-NEXT: insb a1, t0, 1 +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: bltu a0, a3, .LBB13_12 +; RV32-NEXT: # %bb.11: +; RV32-NEXT: mv a2, zero +; RV32-NEXT: .LBB13_12: +; RV32-NEXT: mv a0, zero +; RV32-NEXT: lbu a3, 10(sp) +; RV32-NEXT: lbu a4, 2(sp) +; RV32-NEXT: insb a1, t1, 2 +; RV32-NEXT: insb a0, a2, 0 +; RV32-NEXT: insb a0, a5, 1 +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: bltu a4, a3, .LBB13_14 +; RV32-NEXT: # %bb.13: +; RV32-NEXT: mv a2, zero +; RV32-NEXT: .LBB13_14: +; RV32-NEXT: lbu a3, 11(sp) +; RV32-NEXT: lbu a4, 3(sp) +; RV32-NEXT: insb a1, a7, 3 +; RV32-NEXT: insb a0, a2, 2 +; RV32-NEXT: bltu a4, a3, .LBB13_16 +; RV32-NEXT: # %bb.15: +; RV32-NEXT: mv a6, zero +; RV32-NEXT: .LBB13_16: +; RV32-NEXT: insb a0, a6, 3 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: setultv8i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lbu a0, 9(sp) +; RV64-NEXT: lbu a3, 1(sp) +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bltu a3, a0, .LBB13_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB13_2: +; RV64-NEXT: lbu a0, 8(sp) +; RV64-NEXT: lbu a4, 0(sp) +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: bltu a4, a0, .LBB13_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: mv a3, zero +; RV64-NEXT: .LBB13_4: +; RV64-NEXT: lbu a4, 10(sp) +; RV64-NEXT: lbu a5, 2(sp) +; RV64-NEXT: mv a0, zero +; RV64-NEXT: insb a0, a3, 0 +; RV64-NEXT: insb a0, a2, 1 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bltu a5, a4, .LBB13_6 +; RV64-NEXT: # %bb.5: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB13_6: +; RV64-NEXT: lbu a3, 11(sp) +; RV64-NEXT: lbu a4, 3(sp) +; RV64-NEXT: insb a0, a2, 2 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bltu a4, a3, .LBB13_8 +; RV64-NEXT: # %bb.7: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB13_8: +; RV64-NEXT: lbu a3, 12(sp) +; RV64-NEXT: lbu a4, 4(sp) +; RV64-NEXT: insb a0, a2, 3 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bltu a4, a3, .LBB13_10 +; RV64-NEXT: # %bb.9: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB13_10: +; RV64-NEXT: lbu a3, 13(sp) +; RV64-NEXT: lbu a4, 5(sp) +; RV64-NEXT: insb a0, a2, 4 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bltu a4, a3, .LBB13_12 +; RV64-NEXT: # %bb.11: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB13_12: +; RV64-NEXT: lbu a3, 14(sp) +; RV64-NEXT: lbu a4, 6(sp) +; RV64-NEXT: insb a0, a2, 5 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bltu a4, a3, .LBB13_14 +; RV64-NEXT: # %bb.13: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB13_14: +; RV64-NEXT: lbu a3, 15(sp) +; RV64-NEXT: lbu a4, 7(sp) +; RV64-NEXT: insb a0, a2, 6 +; RV64-NEXT: bltu a4, a3, .LBB13_16 +; RV64-NEXT: # %bb.15: +; RV64-NEXT: mv a1, zero +; RV64-NEXT: .LBB13_16: +; RV64-NEXT: insb a0, a1, 7 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <8 x i8> + %tmp2 = bitcast i64 %b to <8 x i8> + %cmp = icmp ult <8 x i8> %tmp1, %tmp2 + %sext = sext <8 x i1> %cmp to <8 x i8> + %res = bitcast <8 x i8> %sext to i64 + ret i64 %res +} + +define i32 @setugtv4i8(i32 %a, i32 %b) nounwind { +; RV32-LABEL: setugtv4i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: lbu a0, 9(sp) +; RV32-NEXT: lbu a3, 13(sp) +; RV32-NEXT: addi a1, zero, -1 +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: bltu a3, a0, .LBB14_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a2, zero +; RV32-NEXT: .LBB14_2: +; RV32-NEXT: lbu a0, 8(sp) +; RV32-NEXT: lbu a4, 12(sp) +; RV32-NEXT: addi a3, zero, -1 +; RV32-NEXT: bltu a4, a0, .LBB14_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: mv a3, zero +; RV32-NEXT: .LBB14_4: +; RV32-NEXT: lbu a4, 10(sp) +; RV32-NEXT: lbu a5, 14(sp) +; RV32-NEXT: mv a0, zero +; RV32-NEXT: insb a0, a3, 0 +; RV32-NEXT: insb a0, a2, 1 +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: bltu a5, a4, .LBB14_6 +; RV32-NEXT: # %bb.5: +; RV32-NEXT: mv a2, zero +; RV32-NEXT: .LBB14_6: +; RV32-NEXT: lbu a3, 11(sp) +; RV32-NEXT: lbu a4, 15(sp) +; RV32-NEXT: insb a0, a2, 2 +; RV32-NEXT: bltu a4, a3, .LBB14_8 +; RV32-NEXT: # %bb.7: +; RV32-NEXT: mv a1, zero +; RV32-NEXT: .LBB14_8: +; RV32-NEXT: insb a0, a1, 3 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: setugtv4i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: lbu a0, 1(sp) +; RV64-NEXT: lbu a3, 9(sp) +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bltu a3, a0, .LBB14_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB14_2: +; RV64-NEXT: lbu a0, 0(sp) +; RV64-NEXT: lbu a4, 8(sp) +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: bltu a4, a0, .LBB14_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: mv a3, zero +; RV64-NEXT: .LBB14_4: +; RV64-NEXT: lbu a4, 2(sp) +; RV64-NEXT: lbu a5, 10(sp) +; RV64-NEXT: mv a0, zero +; RV64-NEXT: insb a0, a3, 0 +; RV64-NEXT: insb a0, a2, 1 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bltu a5, a4, .LBB14_6 +; RV64-NEXT: # %bb.5: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB14_6: +; RV64-NEXT: lbu a3, 3(sp) +; RV64-NEXT: lbu a4, 11(sp) +; RV64-NEXT: insb a0, a2, 2 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bltu a4, a3, .LBB14_8 +; RV64-NEXT: # %bb.7: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB14_8: +; RV64-NEXT: lbu a3, 4(sp) +; RV64-NEXT: lbu a4, 12(sp) +; RV64-NEXT: insb a0, a2, 3 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bltu a4, a3, .LBB14_10 +; RV64-NEXT: # %bb.9: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB14_10: +; RV64-NEXT: lbu a3, 5(sp) +; RV64-NEXT: lbu a4, 13(sp) +; RV64-NEXT: insb a0, a2, 4 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bltu a4, a3, .LBB14_12 +; RV64-NEXT: # %bb.11: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB14_12: +; RV64-NEXT: lbu a3, 6(sp) +; RV64-NEXT: lbu a4, 14(sp) +; RV64-NEXT: insb a0, a2, 5 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bltu a4, a3, .LBB14_14 +; RV64-NEXT: # %bb.13: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB14_14: +; RV64-NEXT: lbu a3, 7(sp) +; RV64-NEXT: lbu a4, 15(sp) +; RV64-NEXT: insb a0, a2, 6 +; RV64-NEXT: bltu a4, a3, .LBB14_16 +; RV64-NEXT: # %bb.15: +; RV64-NEXT: mv a1, zero +; RV64-NEXT: .LBB14_16: +; RV64-NEXT: insb a0, a1, 7 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <4 x i8> + %tmp2 = bitcast i32 %b to <4 x i8> + %cmp = icmp ugt <4 x i8> %tmp1, %tmp2 + %sext = sext <4 x i1> %cmp to <4 x i8> + %res = bitcast <4 x i8> %sext to i32 + ret i32 %res +} + +define i64 @setugtv8i8(i64 %a, i64 %b) nounwind { +; RV32-LABEL: setugtv8i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a3, 12(sp) +; RV32-NEXT: sw a0, 0(sp) +; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: lbu a1, 5(sp) +; RV32-NEXT: lbu a2, 13(sp) +; RV32-NEXT: addi a6, zero, -1 +; RV32-NEXT: addi t0, zero, -1 +; RV32-NEXT: bltu a2, a1, .LBB15_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv t0, zero +; RV32-NEXT: .LBB15_2: +; RV32-NEXT: lbu a1, 4(sp) +; RV32-NEXT: lbu a2, 12(sp) +; RV32-NEXT: addi a5, zero, -1 +; RV32-NEXT: bltu a2, a1, .LBB15_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: mv a5, zero +; RV32-NEXT: .LBB15_4: +; RV32-NEXT: lbu a1, 6(sp) +; RV32-NEXT: lbu a2, 14(sp) +; RV32-NEXT: addi t1, zero, -1 +; RV32-NEXT: bltu a2, a1, .LBB15_6 +; RV32-NEXT: # %bb.5: +; RV32-NEXT: mv t1, zero +; RV32-NEXT: .LBB15_6: +; RV32-NEXT: lbu a1, 7(sp) +; RV32-NEXT: lbu a2, 15(sp) +; RV32-NEXT: addi a7, zero, -1 +; RV32-NEXT: bltu a2, a1, .LBB15_8 +; RV32-NEXT: # %bb.7: +; RV32-NEXT: mv a7, zero +; RV32-NEXT: .LBB15_8: +; RV32-NEXT: lbu a2, 1(sp) +; RV32-NEXT: lbu a3, 9(sp) +; RV32-NEXT: mv a1, zero +; RV32-NEXT: insb a1, a5, 0 +; RV32-NEXT: addi a5, zero, -1 +; RV32-NEXT: bltu a3, a2, .LBB15_10 +; RV32-NEXT: # %bb.9: +; RV32-NEXT: mv a5, zero +; RV32-NEXT: .LBB15_10: +; RV32-NEXT: lbu a3, 0(sp) +; RV32-NEXT: lbu a0, 8(sp) +; RV32-NEXT: insb a1, t0, 1 +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: bltu a0, a3, .LBB15_12 +; RV32-NEXT: # %bb.11: +; RV32-NEXT: mv a2, zero +; RV32-NEXT: .LBB15_12: +; RV32-NEXT: mv a0, zero +; RV32-NEXT: lbu a3, 2(sp) +; RV32-NEXT: lbu a4, 10(sp) +; RV32-NEXT: insb a1, t1, 2 +; RV32-NEXT: insb a0, a2, 0 +; RV32-NEXT: insb a0, a5, 1 +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: bltu a4, a3, .LBB15_14 +; RV32-NEXT: # %bb.13: +; RV32-NEXT: mv a2, zero +; RV32-NEXT: .LBB15_14: +; RV32-NEXT: lbu a3, 3(sp) +; RV32-NEXT: lbu a4, 11(sp) +; RV32-NEXT: insb a1, a7, 3 +; RV32-NEXT: insb a0, a2, 2 +; RV32-NEXT: bltu a4, a3, .LBB15_16 +; RV32-NEXT: # %bb.15: +; RV32-NEXT: mv a6, zero +; RV32-NEXT: .LBB15_16: +; RV32-NEXT: insb a0, a6, 3 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: setugtv8i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: lbu a0, 1(sp) +; RV64-NEXT: lbu a3, 9(sp) +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bltu a3, a0, .LBB15_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB15_2: +; RV64-NEXT: lbu a0, 0(sp) +; RV64-NEXT: lbu a4, 8(sp) +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: bltu a4, a0, .LBB15_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: mv a3, zero +; RV64-NEXT: .LBB15_4: +; RV64-NEXT: lbu a4, 2(sp) +; RV64-NEXT: lbu a5, 10(sp) +; RV64-NEXT: mv a0, zero +; RV64-NEXT: insb a0, a3, 0 +; RV64-NEXT: insb a0, a2, 1 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bltu a5, a4, .LBB15_6 +; RV64-NEXT: # %bb.5: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB15_6: +; RV64-NEXT: lbu a3, 3(sp) +; RV64-NEXT: lbu a4, 11(sp) +; RV64-NEXT: insb a0, a2, 2 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bltu a4, a3, .LBB15_8 +; RV64-NEXT: # %bb.7: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB15_8: +; RV64-NEXT: lbu a3, 4(sp) +; RV64-NEXT: lbu a4, 12(sp) +; RV64-NEXT: insb a0, a2, 3 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bltu a4, a3, .LBB15_10 +; RV64-NEXT: # %bb.9: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB15_10: +; RV64-NEXT: lbu a3, 5(sp) +; RV64-NEXT: lbu a4, 13(sp) +; RV64-NEXT: insb a0, a2, 4 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bltu a4, a3, .LBB15_12 +; RV64-NEXT: # %bb.11: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB15_12: +; RV64-NEXT: lbu a3, 6(sp) +; RV64-NEXT: lbu a4, 14(sp) +; RV64-NEXT: insb a0, a2, 5 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bltu a4, a3, .LBB15_14 +; RV64-NEXT: # %bb.13: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB15_14: +; RV64-NEXT: lbu a3, 7(sp) +; RV64-NEXT: lbu a4, 15(sp) +; RV64-NEXT: insb a0, a2, 6 +; RV64-NEXT: bltu a4, a3, .LBB15_16 +; RV64-NEXT: # %bb.15: +; RV64-NEXT: mv a1, zero +; RV64-NEXT: .LBB15_16: +; RV64-NEXT: insb a0, a1, 7 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <8 x i8> + %tmp2 = bitcast i64 %b to <8 x i8> + %cmp = icmp ugt <8 x i8> %tmp1, %tmp2 + %sext = sext <8 x i1> %cmp to <8 x i8> + %res = bitcast <8 x i8> %sext to i64 + ret i64 %res +} + +; ucmple8 + +define i32 @setulev4i8(i32 %a, i32 %b) nounwind { +; RV32-LABEL: setulev4i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: lbu a0, 9(sp) +; RV32-NEXT: lbu a3, 13(sp) +; RV32-NEXT: addi a1, zero, -1 +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: bgeu a3, a0, .LBB16_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a2, zero +; RV32-NEXT: .LBB16_2: +; RV32-NEXT: lbu a0, 8(sp) +; RV32-NEXT: lbu a4, 12(sp) +; RV32-NEXT: addi a3, zero, -1 +; RV32-NEXT: bgeu a4, a0, .LBB16_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: mv a3, zero +; RV32-NEXT: .LBB16_4: +; RV32-NEXT: lbu a4, 10(sp) +; RV32-NEXT: lbu a5, 14(sp) +; RV32-NEXT: mv a0, zero +; RV32-NEXT: insb a0, a3, 0 +; RV32-NEXT: insb a0, a2, 1 +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: bgeu a5, a4, .LBB16_6 +; RV32-NEXT: # %bb.5: +; RV32-NEXT: mv a2, zero +; RV32-NEXT: .LBB16_6: +; RV32-NEXT: lbu a3, 11(sp) +; RV32-NEXT: lbu a4, 15(sp) +; RV32-NEXT: insb a0, a2, 2 +; RV32-NEXT: bgeu a4, a3, .LBB16_8 +; RV32-NEXT: # %bb.7: +; RV32-NEXT: mv a1, zero +; RV32-NEXT: .LBB16_8: +; RV32-NEXT: insb a0, a1, 3 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: setulev4i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: lbu a0, 1(sp) +; RV64-NEXT: lbu a3, 9(sp) +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bgeu a3, a0, .LBB16_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB16_2: +; RV64-NEXT: lbu a0, 0(sp) +; RV64-NEXT: lbu a4, 8(sp) +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: bgeu a4, a0, .LBB16_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: mv a3, zero +; RV64-NEXT: .LBB16_4: +; RV64-NEXT: lbu a4, 2(sp) +; RV64-NEXT: lbu a5, 10(sp) +; RV64-NEXT: mv a0, zero +; RV64-NEXT: insb a0, a3, 0 +; RV64-NEXT: insb a0, a2, 1 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bgeu a5, a4, .LBB16_6 +; RV64-NEXT: # %bb.5: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB16_6: +; RV64-NEXT: lbu a3, 3(sp) +; RV64-NEXT: lbu a4, 11(sp) +; RV64-NEXT: insb a0, a2, 2 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bgeu a4, a3, .LBB16_8 +; RV64-NEXT: # %bb.7: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB16_8: +; RV64-NEXT: lbu a3, 4(sp) +; RV64-NEXT: lbu a4, 12(sp) +; RV64-NEXT: insb a0, a2, 3 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bgeu a4, a3, .LBB16_10 +; RV64-NEXT: # %bb.9: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB16_10: +; RV64-NEXT: lbu a3, 5(sp) +; RV64-NEXT: lbu a4, 13(sp) +; RV64-NEXT: insb a0, a2, 4 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bgeu a4, a3, .LBB16_12 +; RV64-NEXT: # %bb.11: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB16_12: +; RV64-NEXT: lbu a3, 6(sp) +; RV64-NEXT: lbu a4, 14(sp) +; RV64-NEXT: insb a0, a2, 5 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bgeu a4, a3, .LBB16_14 +; RV64-NEXT: # %bb.13: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB16_14: +; RV64-NEXT: lbu a3, 7(sp) +; RV64-NEXT: lbu a4, 15(sp) +; RV64-NEXT: insb a0, a2, 6 +; RV64-NEXT: bgeu a4, a3, .LBB16_16 +; RV64-NEXT: # %bb.15: +; RV64-NEXT: mv a1, zero +; RV64-NEXT: .LBB16_16: +; RV64-NEXT: insb a0, a1, 7 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <4 x i8> + %tmp2 = bitcast i32 %b to <4 x i8> + %cmp = icmp ule <4 x i8> %tmp1, %tmp2 + %sext = sext <4 x i1> %cmp to <4 x i8> + %res = bitcast <4 x i8> %sext to i32 + ret i32 %res +} + +define i64 @setulev8i8(i64 %a, i64 %b) nounwind { +; RV32-LABEL: setulev8i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a3, 12(sp) +; RV32-NEXT: sw a0, 0(sp) +; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: lbu a1, 5(sp) +; RV32-NEXT: lbu a2, 13(sp) +; RV32-NEXT: addi a6, zero, -1 +; RV32-NEXT: addi t0, zero, -1 +; RV32-NEXT: bgeu a2, a1, .LBB17_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv t0, zero +; RV32-NEXT: .LBB17_2: +; RV32-NEXT: lbu a1, 4(sp) +; RV32-NEXT: lbu a2, 12(sp) +; RV32-NEXT: addi a5, zero, -1 +; RV32-NEXT: bgeu a2, a1, .LBB17_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: mv a5, zero +; RV32-NEXT: .LBB17_4: +; RV32-NEXT: lbu a1, 6(sp) +; RV32-NEXT: lbu a2, 14(sp) +; RV32-NEXT: addi t1, zero, -1 +; RV32-NEXT: bgeu a2, a1, .LBB17_6 +; RV32-NEXT: # %bb.5: +; RV32-NEXT: mv t1, zero +; RV32-NEXT: .LBB17_6: +; RV32-NEXT: lbu a1, 7(sp) +; RV32-NEXT: lbu a2, 15(sp) +; RV32-NEXT: addi a7, zero, -1 +; RV32-NEXT: bgeu a2, a1, .LBB17_8 +; RV32-NEXT: # %bb.7: +; RV32-NEXT: mv a7, zero +; RV32-NEXT: .LBB17_8: +; RV32-NEXT: lbu a2, 1(sp) +; RV32-NEXT: lbu a3, 9(sp) +; RV32-NEXT: mv a1, zero +; RV32-NEXT: insb a1, a5, 0 +; RV32-NEXT: addi a5, zero, -1 +; RV32-NEXT: bgeu a3, a2, .LBB17_10 +; RV32-NEXT: # %bb.9: +; RV32-NEXT: mv a5, zero +; RV32-NEXT: .LBB17_10: +; RV32-NEXT: lbu a3, 0(sp) +; RV32-NEXT: lbu a0, 8(sp) +; RV32-NEXT: insb a1, t0, 1 +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: bgeu a0, a3, .LBB17_12 +; RV32-NEXT: # %bb.11: +; RV32-NEXT: mv a2, zero +; RV32-NEXT: .LBB17_12: +; RV32-NEXT: mv a0, zero +; RV32-NEXT: lbu a3, 2(sp) +; RV32-NEXT: lbu a4, 10(sp) +; RV32-NEXT: insb a1, t1, 2 +; RV32-NEXT: insb a0, a2, 0 +; RV32-NEXT: insb a0, a5, 1 +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: bgeu a4, a3, .LBB17_14 +; RV32-NEXT: # %bb.13: +; RV32-NEXT: mv a2, zero +; RV32-NEXT: .LBB17_14: +; RV32-NEXT: lbu a3, 3(sp) +; RV32-NEXT: lbu a4, 11(sp) +; RV32-NEXT: insb a1, a7, 3 +; RV32-NEXT: insb a0, a2, 2 +; RV32-NEXT: bgeu a4, a3, .LBB17_16 +; RV32-NEXT: # %bb.15: +; RV32-NEXT: mv a6, zero +; RV32-NEXT: .LBB17_16: +; RV32-NEXT: insb a0, a6, 3 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: setulev8i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: lbu a0, 1(sp) +; RV64-NEXT: lbu a3, 9(sp) +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bgeu a3, a0, .LBB17_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB17_2: +; RV64-NEXT: lbu a0, 0(sp) +; RV64-NEXT: lbu a4, 8(sp) +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: bgeu a4, a0, .LBB17_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: mv a3, zero +; RV64-NEXT: .LBB17_4: +; RV64-NEXT: lbu a4, 2(sp) +; RV64-NEXT: lbu a5, 10(sp) +; RV64-NEXT: mv a0, zero +; RV64-NEXT: insb a0, a3, 0 +; RV64-NEXT: insb a0, a2, 1 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bgeu a5, a4, .LBB17_6 +; RV64-NEXT: # %bb.5: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB17_6: +; RV64-NEXT: lbu a3, 3(sp) +; RV64-NEXT: lbu a4, 11(sp) +; RV64-NEXT: insb a0, a2, 2 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bgeu a4, a3, .LBB17_8 +; RV64-NEXT: # %bb.7: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB17_8: +; RV64-NEXT: lbu a3, 4(sp) +; RV64-NEXT: lbu a4, 12(sp) +; RV64-NEXT: insb a0, a2, 3 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bgeu a4, a3, .LBB17_10 +; RV64-NEXT: # %bb.9: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB17_10: +; RV64-NEXT: lbu a3, 5(sp) +; RV64-NEXT: lbu a4, 13(sp) +; RV64-NEXT: insb a0, a2, 4 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bgeu a4, a3, .LBB17_12 +; RV64-NEXT: # %bb.11: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB17_12: +; RV64-NEXT: lbu a3, 6(sp) +; RV64-NEXT: lbu a4, 14(sp) +; RV64-NEXT: insb a0, a2, 5 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bgeu a4, a3, .LBB17_14 +; RV64-NEXT: # %bb.13: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB17_14: +; RV64-NEXT: lbu a3, 7(sp) +; RV64-NEXT: lbu a4, 15(sp) +; RV64-NEXT: insb a0, a2, 6 +; RV64-NEXT: bgeu a4, a3, .LBB17_16 +; RV64-NEXT: # %bb.15: +; RV64-NEXT: mv a1, zero +; RV64-NEXT: .LBB17_16: +; RV64-NEXT: insb a0, a1, 7 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <8 x i8> + %tmp2 = bitcast i64 %b to <8 x i8> + %cmp = icmp ule <8 x i8> %tmp1, %tmp2 + %sext = sext <8 x i1> %cmp to <8 x i8> + %res = bitcast <8 x i8> %sext to i64 + ret i64 %res +} + +define i32 @setugev4i8(i32 %a, i32 %b) nounwind { +; RV32-LABEL: setugev4i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: lbu a0, 13(sp) +; RV32-NEXT: lbu a3, 9(sp) +; RV32-NEXT: addi a1, zero, -1 +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: bgeu a3, a0, .LBB18_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a2, zero +; RV32-NEXT: .LBB18_2: +; RV32-NEXT: lbu a0, 12(sp) +; RV32-NEXT: lbu a4, 8(sp) +; RV32-NEXT: addi a3, zero, -1 +; RV32-NEXT: bgeu a4, a0, .LBB18_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: mv a3, zero +; RV32-NEXT: .LBB18_4: +; RV32-NEXT: lbu a4, 14(sp) +; RV32-NEXT: lbu a5, 10(sp) +; RV32-NEXT: mv a0, zero +; RV32-NEXT: insb a0, a3, 0 +; RV32-NEXT: insb a0, a2, 1 +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: bgeu a5, a4, .LBB18_6 +; RV32-NEXT: # %bb.5: +; RV32-NEXT: mv a2, zero +; RV32-NEXT: .LBB18_6: +; RV32-NEXT: lbu a3, 15(sp) +; RV32-NEXT: lbu a4, 11(sp) +; RV32-NEXT: insb a0, a2, 2 +; RV32-NEXT: bgeu a4, a3, .LBB18_8 +; RV32-NEXT: # %bb.7: +; RV32-NEXT: mv a1, zero +; RV32-NEXT: .LBB18_8: +; RV32-NEXT: insb a0, a1, 3 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: setugev4i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lbu a0, 9(sp) +; RV64-NEXT: lbu a3, 1(sp) +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bgeu a3, a0, .LBB18_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB18_2: +; RV64-NEXT: lbu a0, 8(sp) +; RV64-NEXT: lbu a4, 0(sp) +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: bgeu a4, a0, .LBB18_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: mv a3, zero +; RV64-NEXT: .LBB18_4: +; RV64-NEXT: lbu a4, 10(sp) +; RV64-NEXT: lbu a5, 2(sp) +; RV64-NEXT: mv a0, zero +; RV64-NEXT: insb a0, a3, 0 +; RV64-NEXT: insb a0, a2, 1 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bgeu a5, a4, .LBB18_6 +; RV64-NEXT: # %bb.5: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB18_6: +; RV64-NEXT: lbu a3, 11(sp) +; RV64-NEXT: lbu a4, 3(sp) +; RV64-NEXT: insb a0, a2, 2 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bgeu a4, a3, .LBB18_8 +; RV64-NEXT: # %bb.7: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB18_8: +; RV64-NEXT: lbu a3, 12(sp) +; RV64-NEXT: lbu a4, 4(sp) +; RV64-NEXT: insb a0, a2, 3 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bgeu a4, a3, .LBB18_10 +; RV64-NEXT: # %bb.9: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB18_10: +; RV64-NEXT: lbu a3, 13(sp) +; RV64-NEXT: lbu a4, 5(sp) +; RV64-NEXT: insb a0, a2, 4 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bgeu a4, a3, .LBB18_12 +; RV64-NEXT: # %bb.11: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB18_12: +; RV64-NEXT: lbu a3, 14(sp) +; RV64-NEXT: lbu a4, 6(sp) +; RV64-NEXT: insb a0, a2, 5 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bgeu a4, a3, .LBB18_14 +; RV64-NEXT: # %bb.13: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB18_14: +; RV64-NEXT: lbu a3, 15(sp) +; RV64-NEXT: lbu a4, 7(sp) +; RV64-NEXT: insb a0, a2, 6 +; RV64-NEXT: bgeu a4, a3, .LBB18_16 +; RV64-NEXT: # %bb.15: +; RV64-NEXT: mv a1, zero +; RV64-NEXT: .LBB18_16: +; RV64-NEXT: insb a0, a1, 7 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <4 x i8> + %tmp2 = bitcast i32 %b to <4 x i8> + %cmp = icmp uge <4 x i8> %tmp1, %tmp2 + %sext = sext <4 x i1> %cmp to <4 x i8> + %res = bitcast <4 x i8> %sext to i32 + ret i32 %res +} + +define i64 @setugev8i8(i64 %a, i64 %b) nounwind { +; RV32-LABEL: setugev8i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a3, 12(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: sw a0, 0(sp) +; RV32-NEXT: lbu a1, 13(sp) +; RV32-NEXT: lbu a2, 5(sp) +; RV32-NEXT: addi a6, zero, -1 +; RV32-NEXT: addi t0, zero, -1 +; RV32-NEXT: bgeu a2, a1, .LBB19_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv t0, zero +; RV32-NEXT: .LBB19_2: +; RV32-NEXT: lbu a1, 12(sp) +; RV32-NEXT: lbu a2, 4(sp) +; RV32-NEXT: addi a5, zero, -1 +; RV32-NEXT: bgeu a2, a1, .LBB19_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: mv a5, zero +; RV32-NEXT: .LBB19_4: +; RV32-NEXT: lbu a1, 14(sp) +; RV32-NEXT: lbu a2, 6(sp) +; RV32-NEXT: addi t1, zero, -1 +; RV32-NEXT: bgeu a2, a1, .LBB19_6 +; RV32-NEXT: # %bb.5: +; RV32-NEXT: mv t1, zero +; RV32-NEXT: .LBB19_6: +; RV32-NEXT: lbu a1, 15(sp) +; RV32-NEXT: lbu a2, 7(sp) +; RV32-NEXT: addi a7, zero, -1 +; RV32-NEXT: bgeu a2, a1, .LBB19_8 +; RV32-NEXT: # %bb.7: +; RV32-NEXT: mv a7, zero +; RV32-NEXT: .LBB19_8: +; RV32-NEXT: lbu a2, 9(sp) +; RV32-NEXT: lbu a3, 1(sp) +; RV32-NEXT: mv a1, zero +; RV32-NEXT: insb a1, a5, 0 +; RV32-NEXT: addi a5, zero, -1 +; RV32-NEXT: bgeu a3, a2, .LBB19_10 +; RV32-NEXT: # %bb.9: +; RV32-NEXT: mv a5, zero +; RV32-NEXT: .LBB19_10: +; RV32-NEXT: lbu a3, 8(sp) +; RV32-NEXT: lbu a0, 0(sp) +; RV32-NEXT: insb a1, t0, 1 +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: bgeu a0, a3, .LBB19_12 +; RV32-NEXT: # %bb.11: +; RV32-NEXT: mv a2, zero +; RV32-NEXT: .LBB19_12: +; RV32-NEXT: mv a0, zero +; RV32-NEXT: lbu a3, 10(sp) +; RV32-NEXT: lbu a4, 2(sp) +; RV32-NEXT: insb a1, t1, 2 +; RV32-NEXT: insb a0, a2, 0 +; RV32-NEXT: insb a0, a5, 1 +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: bgeu a4, a3, .LBB19_14 +; RV32-NEXT: # %bb.13: +; RV32-NEXT: mv a2, zero +; RV32-NEXT: .LBB19_14: +; RV32-NEXT: lbu a3, 11(sp) +; RV32-NEXT: lbu a4, 3(sp) +; RV32-NEXT: insb a1, a7, 3 +; RV32-NEXT: insb a0, a2, 2 +; RV32-NEXT: bgeu a4, a3, .LBB19_16 +; RV32-NEXT: # %bb.15: +; RV32-NEXT: mv a6, zero +; RV32-NEXT: .LBB19_16: +; RV32-NEXT: insb a0, a6, 3 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: setugev8i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lbu a0, 9(sp) +; RV64-NEXT: lbu a3, 1(sp) +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bgeu a3, a0, .LBB19_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB19_2: +; RV64-NEXT: lbu a0, 8(sp) +; RV64-NEXT: lbu a4, 0(sp) +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: bgeu a4, a0, .LBB19_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: mv a3, zero +; RV64-NEXT: .LBB19_4: +; RV64-NEXT: lbu a4, 10(sp) +; RV64-NEXT: lbu a5, 2(sp) +; RV64-NEXT: mv a0, zero +; RV64-NEXT: insb a0, a3, 0 +; RV64-NEXT: insb a0, a2, 1 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bgeu a5, a4, .LBB19_6 +; RV64-NEXT: # %bb.5: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB19_6: +; RV64-NEXT: lbu a3, 11(sp) +; RV64-NEXT: lbu a4, 3(sp) +; RV64-NEXT: insb a0, a2, 2 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bgeu a4, a3, .LBB19_8 +; RV64-NEXT: # %bb.7: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB19_8: +; RV64-NEXT: lbu a3, 12(sp) +; RV64-NEXT: lbu a4, 4(sp) +; RV64-NEXT: insb a0, a2, 3 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bgeu a4, a3, .LBB19_10 +; RV64-NEXT: # %bb.9: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB19_10: +; RV64-NEXT: lbu a3, 13(sp) +; RV64-NEXT: lbu a4, 5(sp) +; RV64-NEXT: insb a0, a2, 4 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bgeu a4, a3, .LBB19_12 +; RV64-NEXT: # %bb.11: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB19_12: +; RV64-NEXT: lbu a3, 14(sp) +; RV64-NEXT: lbu a4, 6(sp) +; RV64-NEXT: insb a0, a2, 5 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bgeu a4, a3, .LBB19_14 +; RV64-NEXT: # %bb.13: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB19_14: +; RV64-NEXT: lbu a3, 15(sp) +; RV64-NEXT: lbu a4, 7(sp) +; RV64-NEXT: insb a0, a2, 6 +; RV64-NEXT: bgeu a4, a3, .LBB19_16 +; RV64-NEXT: # %bb.15: +; RV64-NEXT: mv a1, zero +; RV64-NEXT: .LBB19_16: +; RV64-NEXT: insb a0, a1, 7 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <8 x i8> + %tmp2 = bitcast i64 %b to <8 x i8> + %cmp = icmp uge <8 x i8> %tmp1, %tmp2 + %sext = sext <8 x i1> %cmp to <8 x i8> + %res = bitcast <8 x i8> %sext to i64 + ret i64 %res +} + +; cmpeq16 + +define i32 @seteqv2i16(i32 %a, i32 %b) nounwind { +; RV32-LABEL: seteqv2i16: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: lhu a2, 14(sp) +; RV32-NEXT: lhu a3, 10(sp) +; RV32-NEXT: addi a1, zero, -1 +; RV32-NEXT: addi a0, zero, -1 +; RV32-NEXT: beq a3, a2, .LBB20_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a0, zero +; RV32-NEXT: .LBB20_2: +; RV32-NEXT: lhu a2, 12(sp) +; RV32-NEXT: lhu a3, 8(sp) +; RV32-NEXT: beq a3, a2, .LBB20_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: mv a1, zero +; RV32-NEXT: .LBB20_4: +; RV32-NEXT: pktb16 a1, zero, a1 +; RV32-NEXT: pkbb16 a0, a0, a1 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: seteqv2i16: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lhu a2, 10(sp) +; RV64-NEXT: lhu a3, 2(sp) +; RV64-NEXT: addi a0, zero, -1 +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: beq a3, a2, .LBB20_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a1, zero +; RV64-NEXT: .LBB20_2: +; RV64-NEXT: lhu a3, 8(sp) +; RV64-NEXT: lhu a4, 0(sp) +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: beq a4, a3, .LBB20_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB20_4: +; RV64-NEXT: pktb16 a2, zero, a2 +; RV64-NEXT: pktb32 a2, zero, a2 +; RV64-NEXT: lhu a4, 12(sp) +; RV64-NEXT: lhu a5, 4(sp) +; RV64-NEXT: pkbb16 a1, a1, a2 +; RV64-NEXT: pktb32 a1, a2, a1 +; RV64-NEXT: pkbt32 a2, a1, a1 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: beq a5, a4, .LBB20_6 +; RV64-NEXT: # %bb.5: +; RV64-NEXT: mv a3, zero +; RV64-NEXT: .LBB20_6: +; RV64-NEXT: lhu a4, 14(sp) +; RV64-NEXT: lhu a5, 6(sp) +; RV64-NEXT: pktb16 a2, a2, a3 +; RV64-NEXT: pkbb32 a1, a2, a1 +; RV64-NEXT: pkbt32 a2, a1, a1 +; RV64-NEXT: beq a5, a4, .LBB20_8 +; RV64-NEXT: # %bb.7: +; RV64-NEXT: mv a0, zero +; RV64-NEXT: .LBB20_8: +; RV64-NEXT: pkbb16 a0, a0, a2 +; RV64-NEXT: pkbb32 a0, a0, a1 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <2 x i16> + %tmp2 = bitcast i32 %b to <2 x i16> + %cmp = icmp eq <2 x i16> %tmp1, %tmp2 + %sext = sext <2 x i1> %cmp to <2 x i16> + %res = bitcast <2 x i16> %sext to i32 + ret i32 %res +} + +define i64 @seteqv4i16(i64 %a, i64 %b) nounwind { +; RV32-LABEL: seteqv4i16: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a3, 12(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: sw a0, 0(sp) +; RV32-NEXT: lhu a2, 14(sp) +; RV32-NEXT: lhu a3, 6(sp) +; RV32-NEXT: addi a0, zero, -1 +; RV32-NEXT: addi a1, zero, -1 +; RV32-NEXT: beq a3, a2, .LBB21_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a1, zero +; RV32-NEXT: .LBB21_2: +; RV32-NEXT: lhu a3, 12(sp) +; RV32-NEXT: lhu a4, 4(sp) +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: beq a4, a3, .LBB21_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: mv a2, zero +; RV32-NEXT: .LBB21_4: +; RV32-NEXT: lhu a4, 10(sp) +; RV32-NEXT: lhu a5, 2(sp) +; RV32-NEXT: pktb16 a3, zero, a2 +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: beq a5, a4, .LBB21_6 +; RV32-NEXT: # %bb.5: +; RV32-NEXT: mv a2, zero +; RV32-NEXT: .LBB21_6: +; RV32-NEXT: lhu a4, 8(sp) +; RV32-NEXT: lhu a5, 0(sp) +; RV32-NEXT: pkbb16 a1, a1, a3 +; RV32-NEXT: beq a5, a4, .LBB21_8 +; RV32-NEXT: # %bb.7: +; RV32-NEXT: mv a0, zero +; RV32-NEXT: .LBB21_8: +; RV32-NEXT: pktb16 a0, zero, a0 +; RV32-NEXT: pkbb16 a0, a2, a0 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: seteqv4i16: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lhu a2, 10(sp) +; RV64-NEXT: lhu a3, 2(sp) +; RV64-NEXT: addi a0, zero, -1 +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: beq a3, a2, .LBB21_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a1, zero +; RV64-NEXT: .LBB21_2: +; RV64-NEXT: lhu a3, 8(sp) +; RV64-NEXT: lhu a4, 0(sp) +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: beq a4, a3, .LBB21_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB21_4: +; RV64-NEXT: pktb16 a2, zero, a2 +; RV64-NEXT: pktb32 a2, zero, a2 +; RV64-NEXT: lhu a4, 12(sp) +; RV64-NEXT: lhu a5, 4(sp) +; RV64-NEXT: pkbb16 a1, a1, a2 +; RV64-NEXT: pktb32 a1, a2, a1 +; RV64-NEXT: pkbt32 a2, a1, a1 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: beq a5, a4, .LBB21_6 +; RV64-NEXT: # %bb.5: +; RV64-NEXT: mv a3, zero +; RV64-NEXT: .LBB21_6: +; RV64-NEXT: lhu a4, 14(sp) +; RV64-NEXT: lhu a5, 6(sp) +; RV64-NEXT: pktb16 a2, a2, a3 +; RV64-NEXT: pkbb32 a1, a2, a1 +; RV64-NEXT: pkbt32 a2, a1, a1 +; RV64-NEXT: beq a5, a4, .LBB21_8 +; RV64-NEXT: # %bb.7: +; RV64-NEXT: mv a0, zero +; RV64-NEXT: .LBB21_8: +; RV64-NEXT: pkbb16 a0, a0, a2 +; RV64-NEXT: pkbb32 a0, a0, a1 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <4 x i16> + %tmp2 = bitcast i64 %b to <4 x i16> + %cmp = icmp eq <4 x i16> %tmp1, %tmp2 + %sext = sext <4 x i1> %cmp to <4 x i16> + %res = bitcast <4 x i16> %sext to i64 + ret i64 %res +} + +define i32 @setnev2i16(i32 %a, i32 %b) nounwind { +; RV32-LABEL: setnev2i16: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: lhu a2, 14(sp) +; RV32-NEXT: lhu a3, 10(sp) +; RV32-NEXT: addi a1, zero, -1 +; RV32-NEXT: addi a0, zero, -1 +; RV32-NEXT: bne a3, a2, .LBB22_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a0, zero +; RV32-NEXT: .LBB22_2: +; RV32-NEXT: lhu a2, 12(sp) +; RV32-NEXT: lhu a3, 8(sp) +; RV32-NEXT: bne a3, a2, .LBB22_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: mv a1, zero +; RV32-NEXT: .LBB22_4: +; RV32-NEXT: pktb16 a1, zero, a1 +; RV32-NEXT: pkbb16 a0, a0, a1 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: setnev2i16: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lhu a2, 10(sp) +; RV64-NEXT: lhu a3, 2(sp) +; RV64-NEXT: addi a0, zero, -1 +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: bne a3, a2, .LBB22_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a1, zero +; RV64-NEXT: .LBB22_2: +; RV64-NEXT: lhu a3, 8(sp) +; RV64-NEXT: lhu a4, 0(sp) +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bne a4, a3, .LBB22_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB22_4: +; RV64-NEXT: pktb16 a2, zero, a2 +; RV64-NEXT: pktb32 a2, zero, a2 +; RV64-NEXT: lhu a4, 12(sp) +; RV64-NEXT: lhu a5, 4(sp) +; RV64-NEXT: pkbb16 a1, a1, a2 +; RV64-NEXT: pktb32 a1, a2, a1 +; RV64-NEXT: pkbt32 a2, a1, a1 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: bne a5, a4, .LBB22_6 +; RV64-NEXT: # %bb.5: +; RV64-NEXT: mv a3, zero +; RV64-NEXT: .LBB22_6: +; RV64-NEXT: lhu a4, 14(sp) +; RV64-NEXT: lhu a5, 6(sp) +; RV64-NEXT: pktb16 a2, a2, a3 +; RV64-NEXT: pkbb32 a1, a2, a1 +; RV64-NEXT: pkbt32 a2, a1, a1 +; RV64-NEXT: bne a5, a4, .LBB22_8 +; RV64-NEXT: # %bb.7: +; RV64-NEXT: mv a0, zero +; RV64-NEXT: .LBB22_8: +; RV64-NEXT: pkbb16 a0, a0, a2 +; RV64-NEXT: pkbb32 a0, a0, a1 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <2 x i16> + %tmp2 = bitcast i32 %b to <2 x i16> + %cmp = icmp ne <2 x i16> %tmp1, %tmp2 + %sext = sext <2 x i1> %cmp to <2 x i16> + %res = bitcast <2 x i16> %sext to i32 + ret i32 %res +} + +define i64 @setnev4i16(i64 %a, i64 %b) nounwind { +; RV32-LABEL: setnev4i16: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a3, 12(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: sw a0, 0(sp) +; RV32-NEXT: lhu a2, 14(sp) +; RV32-NEXT: lhu a3, 6(sp) +; RV32-NEXT: addi a0, zero, -1 +; RV32-NEXT: addi a1, zero, -1 +; RV32-NEXT: bne a3, a2, .LBB23_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a1, zero +; RV32-NEXT: .LBB23_2: +; RV32-NEXT: lhu a3, 12(sp) +; RV32-NEXT: lhu a4, 4(sp) +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: bne a4, a3, .LBB23_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: mv a2, zero +; RV32-NEXT: .LBB23_4: +; RV32-NEXT: lhu a4, 10(sp) +; RV32-NEXT: lhu a5, 2(sp) +; RV32-NEXT: pktb16 a3, zero, a2 +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: bne a5, a4, .LBB23_6 +; RV32-NEXT: # %bb.5: +; RV32-NEXT: mv a2, zero +; RV32-NEXT: .LBB23_6: +; RV32-NEXT: lhu a4, 8(sp) +; RV32-NEXT: lhu a5, 0(sp) +; RV32-NEXT: pkbb16 a1, a1, a3 +; RV32-NEXT: bne a5, a4, .LBB23_8 +; RV32-NEXT: # %bb.7: +; RV32-NEXT: mv a0, zero +; RV32-NEXT: .LBB23_8: +; RV32-NEXT: pktb16 a0, zero, a0 +; RV32-NEXT: pkbb16 a0, a2, a0 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: setnev4i16: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lhu a2, 10(sp) +; RV64-NEXT: lhu a3, 2(sp) +; RV64-NEXT: addi a0, zero, -1 +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: bne a3, a2, .LBB23_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a1, zero +; RV64-NEXT: .LBB23_2: +; RV64-NEXT: lhu a3, 8(sp) +; RV64-NEXT: lhu a4, 0(sp) +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bne a4, a3, .LBB23_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB23_4: +; RV64-NEXT: pktb16 a2, zero, a2 +; RV64-NEXT: pktb32 a2, zero, a2 +; RV64-NEXT: lhu a4, 12(sp) +; RV64-NEXT: lhu a5, 4(sp) +; RV64-NEXT: pkbb16 a1, a1, a2 +; RV64-NEXT: pktb32 a1, a2, a1 +; RV64-NEXT: pkbt32 a2, a1, a1 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: bne a5, a4, .LBB23_6 +; RV64-NEXT: # %bb.5: +; RV64-NEXT: mv a3, zero +; RV64-NEXT: .LBB23_6: +; RV64-NEXT: lhu a4, 14(sp) +; RV64-NEXT: lhu a5, 6(sp) +; RV64-NEXT: pktb16 a2, a2, a3 +; RV64-NEXT: pkbb32 a1, a2, a1 +; RV64-NEXT: pkbt32 a2, a1, a1 +; RV64-NEXT: bne a5, a4, .LBB23_8 +; RV64-NEXT: # %bb.7: +; RV64-NEXT: mv a0, zero +; RV64-NEXT: .LBB23_8: +; RV64-NEXT: pkbb16 a0, a0, a2 +; RV64-NEXT: pkbb32 a0, a0, a1 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <4 x i16> + %tmp2 = bitcast i64 %b to <4 x i16> + %cmp = icmp ne <4 x i16> %tmp1, %tmp2 + %sext = sext <4 x i1> %cmp to <4 x i16> + %res = bitcast <4 x i16> %sext to i64 + ret i64 %res +} + +; scmplt16 + +define i32 @setltv2i16(i32 %a, i32 %b) nounwind { +; RV32-LABEL: setltv2i16: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: lh a0, 14(sp) +; RV32-NEXT: lh a1, 10(sp) +; RV32-NEXT: slli a0, a0, 16 +; RV32-NEXT: srai a0, a0, 16 +; RV32-NEXT: slli a1, a1, 16 +; RV32-NEXT: srai a1, a1, 16 +; RV32-NEXT: lh a2, 12(sp) +; RV32-NEXT: slt a0, a1, a0 +; RV32-NEXT: lh a1, 8(sp) +; RV32-NEXT: neg a0, a0 +; RV32-NEXT: slli a2, a2, 16 +; RV32-NEXT: srai a2, a2, 16 +; RV32-NEXT: slli a1, a1, 16 +; RV32-NEXT: srai a1, a1, 16 +; RV32-NEXT: slt a1, a1, a2 +; RV32-NEXT: neg a1, a1 +; RV32-NEXT: pktb16 a1, zero, a1 +; RV32-NEXT: pkbb16 a0, a0, a1 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: setltv2i16: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lh a0, 10(sp) +; RV64-NEXT: lh a1, 2(sp) +; RV64-NEXT: slli a0, a0, 48 +; RV64-NEXT: srai a0, a0, 48 +; RV64-NEXT: slli a1, a1, 48 +; RV64-NEXT: srai a1, a1, 48 +; RV64-NEXT: lh a2, 8(sp) +; RV64-NEXT: slt a0, a1, a0 +; RV64-NEXT: lh a1, 0(sp) +; RV64-NEXT: neg a0, a0 +; RV64-NEXT: slli a2, a2, 48 +; RV64-NEXT: srai a2, a2, 48 +; RV64-NEXT: slli a1, a1, 48 +; RV64-NEXT: srai a1, a1, 48 +; RV64-NEXT: slt a1, a1, a2 +; RV64-NEXT: neg a1, a1 +; RV64-NEXT: pktb16 a1, zero, a1 +; RV64-NEXT: pktb32 a1, zero, a1 +; RV64-NEXT: pkbb16 a0, a0, a1 +; RV64-NEXT: lh a2, 12(sp) +; RV64-NEXT: pktb32 a0, a1, a0 +; RV64-NEXT: lh a1, 4(sp) +; RV64-NEXT: pkbt32 a3, a0, a0 +; RV64-NEXT: slli a2, a2, 48 +; RV64-NEXT: srai a2, a2, 48 +; RV64-NEXT: slli a1, a1, 48 +; RV64-NEXT: srai a1, a1, 48 +; RV64-NEXT: slt a1, a1, a2 +; RV64-NEXT: neg a1, a1 +; RV64-NEXT: pktb16 a1, a3, a1 +; RV64-NEXT: lh a2, 14(sp) +; RV64-NEXT: pkbb32 a0, a1, a0 +; RV64-NEXT: lh a1, 6(sp) +; RV64-NEXT: pkbt32 a3, a0, a0 +; RV64-NEXT: slli a2, a2, 48 +; RV64-NEXT: srai a2, a2, 48 +; RV64-NEXT: slli a1, a1, 48 +; RV64-NEXT: srai a1, a1, 48 +; RV64-NEXT: slt a1, a1, a2 +; RV64-NEXT: neg a1, a1 +; RV64-NEXT: pkbb16 a1, a1, a3 +; RV64-NEXT: pkbb32 a0, a1, a0 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <2 x i16> + %tmp2 = bitcast i32 %b to <2 x i16> + %cmp = icmp slt <2 x i16> %tmp1, %tmp2 + %sext = sext <2 x i1> %cmp to <2 x i16> + %res = bitcast <2 x i16> %sext to i32 + ret i32 %res +} + +define i64 @setltv4i16(i64 %a, i64 %b) nounwind { +; RV32-LABEL: setltv4i16: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a3, 12(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: sw a0, 0(sp) +; RV32-NEXT: lh a0, 14(sp) +; RV32-NEXT: lh a1, 6(sp) +; RV32-NEXT: slli a0, a0, 16 +; RV32-NEXT: srai a0, a0, 16 +; RV32-NEXT: slli a1, a1, 16 +; RV32-NEXT: srai a1, a1, 16 +; RV32-NEXT: lh a2, 12(sp) +; RV32-NEXT: slt a0, a1, a0 +; RV32-NEXT: lh a1, 4(sp) +; RV32-NEXT: neg a0, a0 +; RV32-NEXT: slli a2, a2, 16 +; RV32-NEXT: srai a2, a2, 16 +; RV32-NEXT: slli a1, a1, 16 +; RV32-NEXT: srai a1, a1, 16 +; RV32-NEXT: slt a1, a1, a2 +; RV32-NEXT: neg a1, a1 +; RV32-NEXT: lh a2, 10(sp) +; RV32-NEXT: pktb16 a1, zero, a1 +; RV32-NEXT: lh a3, 2(sp) +; RV32-NEXT: pkbb16 a1, a0, a1 +; RV32-NEXT: slli a0, a2, 16 +; RV32-NEXT: srai a0, a0, 16 +; RV32-NEXT: slli a2, a3, 16 +; RV32-NEXT: srai a2, a2, 16 +; RV32-NEXT: lh a3, 8(sp) +; RV32-NEXT: slt a0, a2, a0 +; RV32-NEXT: lh a2, 0(sp) +; RV32-NEXT: neg a0, a0 +; RV32-NEXT: slli a3, a3, 16 +; RV32-NEXT: srai a3, a3, 16 +; RV32-NEXT: slli a2, a2, 16 +; RV32-NEXT: srai a2, a2, 16 +; RV32-NEXT: slt a2, a2, a3 +; RV32-NEXT: neg a2, a2 +; RV32-NEXT: pktb16 a2, zero, a2 +; RV32-NEXT: pkbb16 a0, a0, a2 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: setltv4i16: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lh a0, 10(sp) +; RV64-NEXT: lh a1, 2(sp) +; RV64-NEXT: slli a0, a0, 48 +; RV64-NEXT: srai a0, a0, 48 +; RV64-NEXT: slli a1, a1, 48 +; RV64-NEXT: srai a1, a1, 48 +; RV64-NEXT: lh a2, 8(sp) +; RV64-NEXT: slt a0, a1, a0 +; RV64-NEXT: lh a1, 0(sp) +; RV64-NEXT: neg a0, a0 +; RV64-NEXT: slli a2, a2, 48 +; RV64-NEXT: srai a2, a2, 48 +; RV64-NEXT: slli a1, a1, 48 +; RV64-NEXT: srai a1, a1, 48 +; RV64-NEXT: slt a1, a1, a2 +; RV64-NEXT: neg a1, a1 +; RV64-NEXT: pktb16 a1, zero, a1 +; RV64-NEXT: pktb32 a1, zero, a1 +; RV64-NEXT: pkbb16 a0, a0, a1 +; RV64-NEXT: lh a2, 12(sp) +; RV64-NEXT: pktb32 a0, a1, a0 +; RV64-NEXT: lh a1, 4(sp) +; RV64-NEXT: pkbt32 a3, a0, a0 +; RV64-NEXT: slli a2, a2, 48 +; RV64-NEXT: srai a2, a2, 48 +; RV64-NEXT: slli a1, a1, 48 +; RV64-NEXT: srai a1, a1, 48 +; RV64-NEXT: slt a1, a1, a2 +; RV64-NEXT: neg a1, a1 +; RV64-NEXT: pktb16 a1, a3, a1 +; RV64-NEXT: lh a2, 14(sp) +; RV64-NEXT: pkbb32 a0, a1, a0 +; RV64-NEXT: lh a1, 6(sp) +; RV64-NEXT: pkbt32 a3, a0, a0 +; RV64-NEXT: slli a2, a2, 48 +; RV64-NEXT: srai a2, a2, 48 +; RV64-NEXT: slli a1, a1, 48 +; RV64-NEXT: srai a1, a1, 48 +; RV64-NEXT: slt a1, a1, a2 +; RV64-NEXT: neg a1, a1 +; RV64-NEXT: pkbb16 a1, a1, a3 +; RV64-NEXT: pkbb32 a0, a1, a0 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <4 x i16> + %tmp2 = bitcast i64 %b to <4 x i16> + %cmp = icmp slt <4 x i16> %tmp1, %tmp2 + %sext = sext <4 x i1> %cmp to <4 x i16> + %res = bitcast <4 x i16> %sext to i64 + ret i64 %res +} + +define i32 @setgtv2i16(i32 %a, i32 %b) nounwind { +; RV32-LABEL: setgtv2i16: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: lh a0, 10(sp) +; RV32-NEXT: lh a1, 14(sp) +; RV32-NEXT: slli a0, a0, 16 +; RV32-NEXT: srai a2, a0, 16 +; RV32-NEXT: slli a0, a1, 16 +; RV32-NEXT: srai a3, a0, 16 +; RV32-NEXT: addi a1, zero, -1 +; RV32-NEXT: addi a0, zero, -1 +; RV32-NEXT: blt a3, a2, .LBB26_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a0, zero +; RV32-NEXT: .LBB26_2: +; RV32-NEXT: lh a2, 8(sp) +; RV32-NEXT: lh a3, 12(sp) +; RV32-NEXT: slli a2, a2, 16 +; RV32-NEXT: srai a2, a2, 16 +; RV32-NEXT: slli a3, a3, 16 +; RV32-NEXT: srai a3, a3, 16 +; RV32-NEXT: blt a3, a2, .LBB26_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: mv a1, zero +; RV32-NEXT: .LBB26_4: +; RV32-NEXT: pktb16 a1, zero, a1 +; RV32-NEXT: pkbb16 a0, a0, a1 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: setgtv2i16: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: lh a0, 2(sp) +; RV64-NEXT: lh a1, 10(sp) +; RV64-NEXT: slli a0, a0, 48 +; RV64-NEXT: srai a2, a0, 48 +; RV64-NEXT: slli a0, a1, 48 +; RV64-NEXT: srai a3, a0, 48 +; RV64-NEXT: addi a0, zero, -1 +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: blt a3, a2, .LBB26_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a1, zero +; RV64-NEXT: .LBB26_2: +; RV64-NEXT: lh a2, 0(sp) +; RV64-NEXT: lh a3, 8(sp) +; RV64-NEXT: slli a2, a2, 48 +; RV64-NEXT: srai a4, a2, 48 +; RV64-NEXT: slli a2, a3, 48 +; RV64-NEXT: srai a3, a2, 48 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: blt a3, a4, .LBB26_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB26_4: +; RV64-NEXT: pktb16 a2, zero, a2 +; RV64-NEXT: pktb32 a2, zero, a2 +; RV64-NEXT: pkbb16 a1, a1, a2 +; RV64-NEXT: lh a3, 4(sp) +; RV64-NEXT: pktb32 a1, a2, a1 +; RV64-NEXT: lh a4, 12(sp) +; RV64-NEXT: pkbt32 a2, a1, a1 +; RV64-NEXT: slli a3, a3, 48 +; RV64-NEXT: srai a5, a3, 48 +; RV64-NEXT: slli a3, a4, 48 +; RV64-NEXT: srai a4, a3, 48 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: blt a4, a5, .LBB26_6 +; RV64-NEXT: # %bb.5: +; RV64-NEXT: mv a3, zero +; RV64-NEXT: .LBB26_6: +; RV64-NEXT: lh a4, 6(sp) +; RV64-NEXT: pktb16 a2, a2, a3 +; RV64-NEXT: lh a3, 14(sp) +; RV64-NEXT: pkbb32 a1, a2, a1 +; RV64-NEXT: slli a2, a4, 48 +; RV64-NEXT: srai a4, a2, 48 +; RV64-NEXT: slli a2, a3, 48 +; RV64-NEXT: srai a3, a2, 48 +; RV64-NEXT: pkbt32 a2, a1, a1 +; RV64-NEXT: blt a3, a4, .LBB26_8 +; RV64-NEXT: # %bb.7: +; RV64-NEXT: mv a0, zero +; RV64-NEXT: .LBB26_8: +; RV64-NEXT: pkbb16 a0, a0, a2 +; RV64-NEXT: pkbb32 a0, a0, a1 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <2 x i16> + %tmp2 = bitcast i32 %b to <2 x i16> + %cmp = icmp sgt <2 x i16> %tmp1, %tmp2 + %sext = sext <2 x i1> %cmp to <2 x i16> + %res = bitcast <2 x i16> %sext to i32 + ret i32 %res +} + +define i64 @setgtv4i16(i64 %a, i64 %b) nounwind { +; RV32-LABEL: setgtv4i16: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a3, 12(sp) +; RV32-NEXT: sw a0, 0(sp) +; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: lh a0, 6(sp) +; RV32-NEXT: lh a1, 14(sp) +; RV32-NEXT: slli a0, a0, 16 +; RV32-NEXT: srai a2, a0, 16 +; RV32-NEXT: slli a0, a1, 16 +; RV32-NEXT: srai a3, a0, 16 +; RV32-NEXT: addi a0, zero, -1 +; RV32-NEXT: addi a1, zero, -1 +; RV32-NEXT: blt a3, a2, .LBB27_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a1, zero +; RV32-NEXT: .LBB27_2: +; RV32-NEXT: lh a2, 4(sp) +; RV32-NEXT: lh a3, 12(sp) +; RV32-NEXT: slli a2, a2, 16 +; RV32-NEXT: srai a4, a2, 16 +; RV32-NEXT: slli a2, a3, 16 +; RV32-NEXT: srai a3, a2, 16 +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: blt a3, a4, .LBB27_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: mv a2, zero +; RV32-NEXT: .LBB27_4: +; RV32-NEXT: lh a4, 2(sp) +; RV32-NEXT: lh a5, 10(sp) +; RV32-NEXT: pktb16 a3, zero, a2 +; RV32-NEXT: slli a2, a4, 16 +; RV32-NEXT: srai a4, a2, 16 +; RV32-NEXT: slli a2, a5, 16 +; RV32-NEXT: srai a5, a2, 16 +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: blt a5, a4, .LBB27_6 +; RV32-NEXT: # %bb.5: +; RV32-NEXT: mv a2, zero +; RV32-NEXT: .LBB27_6: +; RV32-NEXT: lh a4, 0(sp) +; RV32-NEXT: lh a5, 8(sp) +; RV32-NEXT: slli a4, a4, 16 +; RV32-NEXT: srai a4, a4, 16 +; RV32-NEXT: slli a5, a5, 16 +; RV32-NEXT: srai a5, a5, 16 +; RV32-NEXT: pkbb16 a1, a1, a3 +; RV32-NEXT: blt a5, a4, .LBB27_8 +; RV32-NEXT: # %bb.7: +; RV32-NEXT: mv a0, zero +; RV32-NEXT: .LBB27_8: +; RV32-NEXT: pktb16 a0, zero, a0 +; RV32-NEXT: pkbb16 a0, a2, a0 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: setgtv4i16: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: lh a0, 2(sp) +; RV64-NEXT: lh a1, 10(sp) +; RV64-NEXT: slli a0, a0, 48 +; RV64-NEXT: srai a2, a0, 48 +; RV64-NEXT: slli a0, a1, 48 +; RV64-NEXT: srai a3, a0, 48 +; RV64-NEXT: addi a0, zero, -1 +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: blt a3, a2, .LBB27_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a1, zero +; RV64-NEXT: .LBB27_2: +; RV64-NEXT: lh a2, 0(sp) +; RV64-NEXT: lh a3, 8(sp) +; RV64-NEXT: slli a2, a2, 48 +; RV64-NEXT: srai a4, a2, 48 +; RV64-NEXT: slli a2, a3, 48 +; RV64-NEXT: srai a3, a2, 48 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: blt a3, a4, .LBB27_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB27_4: +; RV64-NEXT: pktb16 a2, zero, a2 +; RV64-NEXT: pktb32 a2, zero, a2 +; RV64-NEXT: pkbb16 a1, a1, a2 +; RV64-NEXT: lh a3, 4(sp) +; RV64-NEXT: pktb32 a1, a2, a1 +; RV64-NEXT: lh a4, 12(sp) +; RV64-NEXT: pkbt32 a2, a1, a1 +; RV64-NEXT: slli a3, a3, 48 +; RV64-NEXT: srai a5, a3, 48 +; RV64-NEXT: slli a3, a4, 48 +; RV64-NEXT: srai a4, a3, 48 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: blt a4, a5, .LBB27_6 +; RV64-NEXT: # %bb.5: +; RV64-NEXT: mv a3, zero +; RV64-NEXT: .LBB27_6: +; RV64-NEXT: lh a4, 6(sp) +; RV64-NEXT: pktb16 a2, a2, a3 +; RV64-NEXT: lh a3, 14(sp) +; RV64-NEXT: pkbb32 a1, a2, a1 +; RV64-NEXT: slli a2, a4, 48 +; RV64-NEXT: srai a4, a2, 48 +; RV64-NEXT: slli a2, a3, 48 +; RV64-NEXT: srai a3, a2, 48 +; RV64-NEXT: pkbt32 a2, a1, a1 +; RV64-NEXT: blt a3, a4, .LBB27_8 +; RV64-NEXT: # %bb.7: +; RV64-NEXT: mv a0, zero +; RV64-NEXT: .LBB27_8: +; RV64-NEXT: pkbb16 a0, a0, a2 +; RV64-NEXT: pkbb32 a0, a0, a1 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <4 x i16> + %tmp2 = bitcast i64 %b to <4 x i16> + %cmp = icmp sgt <4 x i16> %tmp1, %tmp2 + %sext = sext <4 x i1> %cmp to <4 x i16> + %res = bitcast <4 x i16> %sext to i64 + ret i64 %res +} + +; scmple16 + +define i32 @setlev2i16(i32 %a, i32 %b) nounwind { +; RV32-LABEL: setlev2i16: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: lh a0, 10(sp) +; RV32-NEXT: lh a1, 14(sp) +; RV32-NEXT: slli a0, a0, 16 +; RV32-NEXT: srai a2, a0, 16 +; RV32-NEXT: slli a0, a1, 16 +; RV32-NEXT: srai a3, a0, 16 +; RV32-NEXT: addi a1, zero, -1 +; RV32-NEXT: addi a0, zero, -1 +; RV32-NEXT: bge a3, a2, .LBB28_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a0, zero +; RV32-NEXT: .LBB28_2: +; RV32-NEXT: lh a2, 8(sp) +; RV32-NEXT: lh a3, 12(sp) +; RV32-NEXT: slli a2, a2, 16 +; RV32-NEXT: srai a2, a2, 16 +; RV32-NEXT: slli a3, a3, 16 +; RV32-NEXT: srai a3, a3, 16 +; RV32-NEXT: bge a3, a2, .LBB28_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: mv a1, zero +; RV32-NEXT: .LBB28_4: +; RV32-NEXT: pktb16 a1, zero, a1 +; RV32-NEXT: pkbb16 a0, a0, a1 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: setlev2i16: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: lh a0, 2(sp) +; RV64-NEXT: lh a1, 10(sp) +; RV64-NEXT: slli a0, a0, 48 +; RV64-NEXT: srai a2, a0, 48 +; RV64-NEXT: slli a0, a1, 48 +; RV64-NEXT: srai a3, a0, 48 +; RV64-NEXT: addi a0, zero, -1 +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: bge a3, a2, .LBB28_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a1, zero +; RV64-NEXT: .LBB28_2: +; RV64-NEXT: lh a2, 0(sp) +; RV64-NEXT: lh a3, 8(sp) +; RV64-NEXT: slli a2, a2, 48 +; RV64-NEXT: srai a4, a2, 48 +; RV64-NEXT: slli a2, a3, 48 +; RV64-NEXT: srai a3, a2, 48 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bge a3, a4, .LBB28_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB28_4: +; RV64-NEXT: pktb16 a2, zero, a2 +; RV64-NEXT: pktb32 a2, zero, a2 +; RV64-NEXT: pkbb16 a1, a1, a2 +; RV64-NEXT: lh a3, 4(sp) +; RV64-NEXT: pktb32 a1, a2, a1 +; RV64-NEXT: lh a4, 12(sp) +; RV64-NEXT: pkbt32 a2, a1, a1 +; RV64-NEXT: slli a3, a3, 48 +; RV64-NEXT: srai a5, a3, 48 +; RV64-NEXT: slli a3, a4, 48 +; RV64-NEXT: srai a4, a3, 48 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: bge a4, a5, .LBB28_6 +; RV64-NEXT: # %bb.5: +; RV64-NEXT: mv a3, zero +; RV64-NEXT: .LBB28_6: +; RV64-NEXT: lh a4, 6(sp) +; RV64-NEXT: pktb16 a2, a2, a3 +; RV64-NEXT: lh a3, 14(sp) +; RV64-NEXT: pkbb32 a1, a2, a1 +; RV64-NEXT: slli a2, a4, 48 +; RV64-NEXT: srai a4, a2, 48 +; RV64-NEXT: slli a2, a3, 48 +; RV64-NEXT: srai a3, a2, 48 +; RV64-NEXT: pkbt32 a2, a1, a1 +; RV64-NEXT: bge a3, a4, .LBB28_8 +; RV64-NEXT: # %bb.7: +; RV64-NEXT: mv a0, zero +; RV64-NEXT: .LBB28_8: +; RV64-NEXT: pkbb16 a0, a0, a2 +; RV64-NEXT: pkbb32 a0, a0, a1 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <2 x i16> + %tmp2 = bitcast i32 %b to <2 x i16> + %cmp = icmp sle <2 x i16> %tmp1, %tmp2 + %sext = sext <2 x i1> %cmp to <2 x i16> + %res = bitcast <2 x i16> %sext to i32 + ret i32 %res +} + +define i64 @setlev4i16(i64 %a, i64 %b) nounwind { +; RV32-LABEL: setlev4i16: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a3, 12(sp) +; RV32-NEXT: sw a0, 0(sp) +; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: lh a0, 6(sp) +; RV32-NEXT: lh a1, 14(sp) +; RV32-NEXT: slli a0, a0, 16 +; RV32-NEXT: srai a2, a0, 16 +; RV32-NEXT: slli a0, a1, 16 +; RV32-NEXT: srai a3, a0, 16 +; RV32-NEXT: addi a0, zero, -1 +; RV32-NEXT: addi a1, zero, -1 +; RV32-NEXT: bge a3, a2, .LBB29_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a1, zero +; RV32-NEXT: .LBB29_2: +; RV32-NEXT: lh a2, 4(sp) +; RV32-NEXT: lh a3, 12(sp) +; RV32-NEXT: slli a2, a2, 16 +; RV32-NEXT: srai a4, a2, 16 +; RV32-NEXT: slli a2, a3, 16 +; RV32-NEXT: srai a3, a2, 16 +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: bge a3, a4, .LBB29_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: mv a2, zero +; RV32-NEXT: .LBB29_4: +; RV32-NEXT: lh a4, 2(sp) +; RV32-NEXT: lh a5, 10(sp) +; RV32-NEXT: pktb16 a3, zero, a2 +; RV32-NEXT: slli a2, a4, 16 +; RV32-NEXT: srai a4, a2, 16 +; RV32-NEXT: slli a2, a5, 16 +; RV32-NEXT: srai a5, a2, 16 +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: bge a5, a4, .LBB29_6 +; RV32-NEXT: # %bb.5: +; RV32-NEXT: mv a2, zero +; RV32-NEXT: .LBB29_6: +; RV32-NEXT: lh a4, 0(sp) +; RV32-NEXT: lh a5, 8(sp) +; RV32-NEXT: slli a4, a4, 16 +; RV32-NEXT: srai a4, a4, 16 +; RV32-NEXT: slli a5, a5, 16 +; RV32-NEXT: srai a5, a5, 16 +; RV32-NEXT: pkbb16 a1, a1, a3 +; RV32-NEXT: bge a5, a4, .LBB29_8 +; RV32-NEXT: # %bb.7: +; RV32-NEXT: mv a0, zero +; RV32-NEXT: .LBB29_8: +; RV32-NEXT: pktb16 a0, zero, a0 +; RV32-NEXT: pkbb16 a0, a2, a0 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: setlev4i16: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: lh a0, 2(sp) +; RV64-NEXT: lh a1, 10(sp) +; RV64-NEXT: slli a0, a0, 48 +; RV64-NEXT: srai a2, a0, 48 +; RV64-NEXT: slli a0, a1, 48 +; RV64-NEXT: srai a3, a0, 48 +; RV64-NEXT: addi a0, zero, -1 +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: bge a3, a2, .LBB29_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a1, zero +; RV64-NEXT: .LBB29_2: +; RV64-NEXT: lh a2, 0(sp) +; RV64-NEXT: lh a3, 8(sp) +; RV64-NEXT: slli a2, a2, 48 +; RV64-NEXT: srai a4, a2, 48 +; RV64-NEXT: slli a2, a3, 48 +; RV64-NEXT: srai a3, a2, 48 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bge a3, a4, .LBB29_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB29_4: +; RV64-NEXT: pktb16 a2, zero, a2 +; RV64-NEXT: pktb32 a2, zero, a2 +; RV64-NEXT: pkbb16 a1, a1, a2 +; RV64-NEXT: lh a3, 4(sp) +; RV64-NEXT: pktb32 a1, a2, a1 +; RV64-NEXT: lh a4, 12(sp) +; RV64-NEXT: pkbt32 a2, a1, a1 +; RV64-NEXT: slli a3, a3, 48 +; RV64-NEXT: srai a5, a3, 48 +; RV64-NEXT: slli a3, a4, 48 +; RV64-NEXT: srai a4, a3, 48 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: bge a4, a5, .LBB29_6 +; RV64-NEXT: # %bb.5: +; RV64-NEXT: mv a3, zero +; RV64-NEXT: .LBB29_6: +; RV64-NEXT: lh a4, 6(sp) +; RV64-NEXT: pktb16 a2, a2, a3 +; RV64-NEXT: lh a3, 14(sp) +; RV64-NEXT: pkbb32 a1, a2, a1 +; RV64-NEXT: slli a2, a4, 48 +; RV64-NEXT: srai a4, a2, 48 +; RV64-NEXT: slli a2, a3, 48 +; RV64-NEXT: srai a3, a2, 48 +; RV64-NEXT: pkbt32 a2, a1, a1 +; RV64-NEXT: bge a3, a4, .LBB29_8 +; RV64-NEXT: # %bb.7: +; RV64-NEXT: mv a0, zero +; RV64-NEXT: .LBB29_8: +; RV64-NEXT: pkbb16 a0, a0, a2 +; RV64-NEXT: pkbb32 a0, a0, a1 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <4 x i16> + %tmp2 = bitcast i64 %b to <4 x i16> + %cmp = icmp sle <4 x i16> %tmp1, %tmp2 + %sext = sext <4 x i1> %cmp to <4 x i16> + %res = bitcast <4 x i16> %sext to i64 + ret i64 %res +} + +define i32 @setgev2i16(i32 %a, i32 %b) nounwind { +; RV32-LABEL: setgev2i16: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: lh a0, 14(sp) +; RV32-NEXT: lh a1, 10(sp) +; RV32-NEXT: slli a0, a0, 16 +; RV32-NEXT: srai a2, a0, 16 +; RV32-NEXT: slli a0, a1, 16 +; RV32-NEXT: srai a3, a0, 16 +; RV32-NEXT: addi a1, zero, -1 +; RV32-NEXT: addi a0, zero, -1 +; RV32-NEXT: bge a3, a2, .LBB30_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a0, zero +; RV32-NEXT: .LBB30_2: +; RV32-NEXT: lh a2, 12(sp) +; RV32-NEXT: lh a3, 8(sp) +; RV32-NEXT: slli a2, a2, 16 +; RV32-NEXT: srai a2, a2, 16 +; RV32-NEXT: slli a3, a3, 16 +; RV32-NEXT: srai a3, a3, 16 +; RV32-NEXT: bge a3, a2, .LBB30_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: mv a1, zero +; RV32-NEXT: .LBB30_4: +; RV32-NEXT: pktb16 a1, zero, a1 +; RV32-NEXT: pkbb16 a0, a0, a1 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: setgev2i16: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lh a0, 10(sp) +; RV64-NEXT: lh a1, 2(sp) +; RV64-NEXT: slli a0, a0, 48 +; RV64-NEXT: srai a2, a0, 48 +; RV64-NEXT: slli a0, a1, 48 +; RV64-NEXT: srai a3, a0, 48 +; RV64-NEXT: addi a0, zero, -1 +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: bge a3, a2, .LBB30_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a1, zero +; RV64-NEXT: .LBB30_2: +; RV64-NEXT: lh a2, 8(sp) +; RV64-NEXT: lh a3, 0(sp) +; RV64-NEXT: slli a2, a2, 48 +; RV64-NEXT: srai a4, a2, 48 +; RV64-NEXT: slli a2, a3, 48 +; RV64-NEXT: srai a3, a2, 48 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bge a3, a4, .LBB30_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB30_4: +; RV64-NEXT: pktb16 a2, zero, a2 +; RV64-NEXT: pktb32 a2, zero, a2 +; RV64-NEXT: pkbb16 a1, a1, a2 +; RV64-NEXT: lh a3, 12(sp) +; RV64-NEXT: pktb32 a1, a2, a1 +; RV64-NEXT: lh a4, 4(sp) +; RV64-NEXT: pkbt32 a2, a1, a1 +; RV64-NEXT: slli a3, a3, 48 +; RV64-NEXT: srai a5, a3, 48 +; RV64-NEXT: slli a3, a4, 48 +; RV64-NEXT: srai a4, a3, 48 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: bge a4, a5, .LBB30_6 +; RV64-NEXT: # %bb.5: +; RV64-NEXT: mv a3, zero +; RV64-NEXT: .LBB30_6: +; RV64-NEXT: lh a4, 14(sp) +; RV64-NEXT: pktb16 a2, a2, a3 +; RV64-NEXT: lh a3, 6(sp) +; RV64-NEXT: pkbb32 a1, a2, a1 +; RV64-NEXT: slli a2, a4, 48 +; RV64-NEXT: srai a4, a2, 48 +; RV64-NEXT: slli a2, a3, 48 +; RV64-NEXT: srai a3, a2, 48 +; RV64-NEXT: pkbt32 a2, a1, a1 +; RV64-NEXT: bge a3, a4, .LBB30_8 +; RV64-NEXT: # %bb.7: +; RV64-NEXT: mv a0, zero +; RV64-NEXT: .LBB30_8: +; RV64-NEXT: pkbb16 a0, a0, a2 +; RV64-NEXT: pkbb32 a0, a0, a1 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <2 x i16> + %tmp2 = bitcast i32 %b to <2 x i16> + %cmp = icmp sge <2 x i16> %tmp1, %tmp2 + %sext = sext <2 x i1> %cmp to <2 x i16> + %res = bitcast <2 x i16> %sext to i32 + ret i32 %res +} + +define i64 @setgev4i16(i64 %a, i64 %b) nounwind { +; RV32-LABEL: setgev4i16: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a3, 12(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: sw a0, 0(sp) +; RV32-NEXT: lh a0, 14(sp) +; RV32-NEXT: lh a1, 6(sp) +; RV32-NEXT: slli a0, a0, 16 +; RV32-NEXT: srai a2, a0, 16 +; RV32-NEXT: slli a0, a1, 16 +; RV32-NEXT: srai a3, a0, 16 +; RV32-NEXT: addi a0, zero, -1 +; RV32-NEXT: addi a1, zero, -1 +; RV32-NEXT: bge a3, a2, .LBB31_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a1, zero +; RV32-NEXT: .LBB31_2: +; RV32-NEXT: lh a2, 12(sp) +; RV32-NEXT: lh a3, 4(sp) +; RV32-NEXT: slli a2, a2, 16 +; RV32-NEXT: srai a4, a2, 16 +; RV32-NEXT: slli a2, a3, 16 +; RV32-NEXT: srai a3, a2, 16 +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: bge a3, a4, .LBB31_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: mv a2, zero +; RV32-NEXT: .LBB31_4: +; RV32-NEXT: lh a4, 10(sp) +; RV32-NEXT: lh a5, 2(sp) +; RV32-NEXT: pktb16 a3, zero, a2 +; RV32-NEXT: slli a2, a4, 16 +; RV32-NEXT: srai a4, a2, 16 +; RV32-NEXT: slli a2, a5, 16 +; RV32-NEXT: srai a5, a2, 16 +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: bge a5, a4, .LBB31_6 +; RV32-NEXT: # %bb.5: +; RV32-NEXT: mv a2, zero +; RV32-NEXT: .LBB31_6: +; RV32-NEXT: lh a4, 8(sp) +; RV32-NEXT: lh a5, 0(sp) +; RV32-NEXT: slli a4, a4, 16 +; RV32-NEXT: srai a4, a4, 16 +; RV32-NEXT: slli a5, a5, 16 +; RV32-NEXT: srai a5, a5, 16 +; RV32-NEXT: pkbb16 a1, a1, a3 +; RV32-NEXT: bge a5, a4, .LBB31_8 +; RV32-NEXT: # %bb.7: +; RV32-NEXT: mv a0, zero +; RV32-NEXT: .LBB31_8: +; RV32-NEXT: pktb16 a0, zero, a0 +; RV32-NEXT: pkbb16 a0, a2, a0 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: setgev4i16: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lh a0, 10(sp) +; RV64-NEXT: lh a1, 2(sp) +; RV64-NEXT: slli a0, a0, 48 +; RV64-NEXT: srai a2, a0, 48 +; RV64-NEXT: slli a0, a1, 48 +; RV64-NEXT: srai a3, a0, 48 +; RV64-NEXT: addi a0, zero, -1 +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: bge a3, a2, .LBB31_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a1, zero +; RV64-NEXT: .LBB31_2: +; RV64-NEXT: lh a2, 8(sp) +; RV64-NEXT: lh a3, 0(sp) +; RV64-NEXT: slli a2, a2, 48 +; RV64-NEXT: srai a4, a2, 48 +; RV64-NEXT: slli a2, a3, 48 +; RV64-NEXT: srai a3, a2, 48 +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bge a3, a4, .LBB31_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB31_4: +; RV64-NEXT: pktb16 a2, zero, a2 +; RV64-NEXT: pktb32 a2, zero, a2 +; RV64-NEXT: pkbb16 a1, a1, a2 +; RV64-NEXT: lh a3, 12(sp) +; RV64-NEXT: pktb32 a1, a2, a1 +; RV64-NEXT: lh a4, 4(sp) +; RV64-NEXT: pkbt32 a2, a1, a1 +; RV64-NEXT: slli a3, a3, 48 +; RV64-NEXT: srai a5, a3, 48 +; RV64-NEXT: slli a3, a4, 48 +; RV64-NEXT: srai a4, a3, 48 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: bge a4, a5, .LBB31_6 +; RV64-NEXT: # %bb.5: +; RV64-NEXT: mv a3, zero +; RV64-NEXT: .LBB31_6: +; RV64-NEXT: lh a4, 14(sp) +; RV64-NEXT: pktb16 a2, a2, a3 +; RV64-NEXT: lh a3, 6(sp) +; RV64-NEXT: pkbb32 a1, a2, a1 +; RV64-NEXT: slli a2, a4, 48 +; RV64-NEXT: srai a4, a2, 48 +; RV64-NEXT: slli a2, a3, 48 +; RV64-NEXT: srai a3, a2, 48 +; RV64-NEXT: pkbt32 a2, a1, a1 +; RV64-NEXT: bge a3, a4, .LBB31_8 +; RV64-NEXT: # %bb.7: +; RV64-NEXT: mv a0, zero +; RV64-NEXT: .LBB31_8: +; RV64-NEXT: pkbb16 a0, a0, a2 +; RV64-NEXT: pkbb32 a0, a0, a1 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <4 x i16> + %tmp2 = bitcast i64 %b to <4 x i16> + %cmp = icmp sge <4 x i16> %tmp1, %tmp2 + %sext = sext <4 x i1> %cmp to <4 x i16> + %res = bitcast <4 x i16> %sext to i64 + ret i64 %res +} + +; ucmplt16 + +define i32 @setultv2i16(i32 %a, i32 %b) nounwind { +; RV32-LABEL: setultv2i16: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: lhu a2, 14(sp) +; RV32-NEXT: lhu a3, 10(sp) +; RV32-NEXT: addi a1, zero, -1 +; RV32-NEXT: addi a0, zero, -1 +; RV32-NEXT: bltu a3, a2, .LBB32_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a0, zero +; RV32-NEXT: .LBB32_2: +; RV32-NEXT: lhu a2, 12(sp) +; RV32-NEXT: lhu a3, 8(sp) +; RV32-NEXT: bltu a3, a2, .LBB32_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: mv a1, zero +; RV32-NEXT: .LBB32_4: +; RV32-NEXT: pktb16 a1, zero, a1 +; RV32-NEXT: pkbb16 a0, a0, a1 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: setultv2i16: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lhu a2, 10(sp) +; RV64-NEXT: lhu a3, 2(sp) +; RV64-NEXT: addi a0, zero, -1 +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: bltu a3, a2, .LBB32_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a1, zero +; RV64-NEXT: .LBB32_2: +; RV64-NEXT: lhu a3, 8(sp) +; RV64-NEXT: lhu a4, 0(sp) +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bltu a4, a3, .LBB32_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB32_4: +; RV64-NEXT: pktb16 a2, zero, a2 +; RV64-NEXT: pktb32 a2, zero, a2 +; RV64-NEXT: lhu a4, 12(sp) +; RV64-NEXT: lhu a5, 4(sp) +; RV64-NEXT: pkbb16 a1, a1, a2 +; RV64-NEXT: pktb32 a1, a2, a1 +; RV64-NEXT: pkbt32 a2, a1, a1 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: bltu a5, a4, .LBB32_6 +; RV64-NEXT: # %bb.5: +; RV64-NEXT: mv a3, zero +; RV64-NEXT: .LBB32_6: +; RV64-NEXT: lhu a4, 14(sp) +; RV64-NEXT: lhu a5, 6(sp) +; RV64-NEXT: pktb16 a2, a2, a3 +; RV64-NEXT: pkbb32 a1, a2, a1 +; RV64-NEXT: pkbt32 a2, a1, a1 +; RV64-NEXT: bltu a5, a4, .LBB32_8 +; RV64-NEXT: # %bb.7: +; RV64-NEXT: mv a0, zero +; RV64-NEXT: .LBB32_8: +; RV64-NEXT: pkbb16 a0, a0, a2 +; RV64-NEXT: pkbb32 a0, a0, a1 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <2 x i16> + %tmp2 = bitcast i32 %b to <2 x i16> + %cmp = icmp ult <2 x i16> %tmp1, %tmp2 + %sext = sext <2 x i1> %cmp to <2 x i16> + %res = bitcast <2 x i16> %sext to i32 + ret i32 %res +} + +define i64 @setultv4i16(i64 %a, i64 %b) nounwind { +; RV32-LABEL: setultv4i16: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a3, 12(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: sw a0, 0(sp) +; RV32-NEXT: lhu a2, 14(sp) +; RV32-NEXT: lhu a3, 6(sp) +; RV32-NEXT: addi a0, zero, -1 +; RV32-NEXT: addi a1, zero, -1 +; RV32-NEXT: bltu a3, a2, .LBB33_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a1, zero +; RV32-NEXT: .LBB33_2: +; RV32-NEXT: lhu a3, 12(sp) +; RV32-NEXT: lhu a4, 4(sp) +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: bltu a4, a3, .LBB33_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: mv a2, zero +; RV32-NEXT: .LBB33_4: +; RV32-NEXT: lhu a4, 10(sp) +; RV32-NEXT: lhu a5, 2(sp) +; RV32-NEXT: pktb16 a3, zero, a2 +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: bltu a5, a4, .LBB33_6 +; RV32-NEXT: # %bb.5: +; RV32-NEXT: mv a2, zero +; RV32-NEXT: .LBB33_6: +; RV32-NEXT: lhu a4, 8(sp) +; RV32-NEXT: lhu a5, 0(sp) +; RV32-NEXT: pkbb16 a1, a1, a3 +; RV32-NEXT: bltu a5, a4, .LBB33_8 +; RV32-NEXT: # %bb.7: +; RV32-NEXT: mv a0, zero +; RV32-NEXT: .LBB33_8: +; RV32-NEXT: pktb16 a0, zero, a0 +; RV32-NEXT: pkbb16 a0, a2, a0 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: setultv4i16: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lhu a2, 10(sp) +; RV64-NEXT: lhu a3, 2(sp) +; RV64-NEXT: addi a0, zero, -1 +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: bltu a3, a2, .LBB33_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a1, zero +; RV64-NEXT: .LBB33_2: +; RV64-NEXT: lhu a3, 8(sp) +; RV64-NEXT: lhu a4, 0(sp) +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bltu a4, a3, .LBB33_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB33_4: +; RV64-NEXT: pktb16 a2, zero, a2 +; RV64-NEXT: pktb32 a2, zero, a2 +; RV64-NEXT: lhu a4, 12(sp) +; RV64-NEXT: lhu a5, 4(sp) +; RV64-NEXT: pkbb16 a1, a1, a2 +; RV64-NEXT: pktb32 a1, a2, a1 +; RV64-NEXT: pkbt32 a2, a1, a1 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: bltu a5, a4, .LBB33_6 +; RV64-NEXT: # %bb.5: +; RV64-NEXT: mv a3, zero +; RV64-NEXT: .LBB33_6: +; RV64-NEXT: lhu a4, 14(sp) +; RV64-NEXT: lhu a5, 6(sp) +; RV64-NEXT: pktb16 a2, a2, a3 +; RV64-NEXT: pkbb32 a1, a2, a1 +; RV64-NEXT: pkbt32 a2, a1, a1 +; RV64-NEXT: bltu a5, a4, .LBB33_8 +; RV64-NEXT: # %bb.7: +; RV64-NEXT: mv a0, zero +; RV64-NEXT: .LBB33_8: +; RV64-NEXT: pkbb16 a0, a0, a2 +; RV64-NEXT: pkbb32 a0, a0, a1 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <4 x i16> + %tmp2 = bitcast i64 %b to <4 x i16> + %cmp = icmp ult <4 x i16> %tmp1, %tmp2 + %sext = sext <4 x i1> %cmp to <4 x i16> + %res = bitcast <4 x i16> %sext to i64 + ret i64 %res +} + +define i32 @setugtv2i16(i32 %a, i32 %b) nounwind { +; RV32-LABEL: setugtv2i16: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: lhu a2, 10(sp) +; RV32-NEXT: lhu a3, 14(sp) +; RV32-NEXT: addi a1, zero, -1 +; RV32-NEXT: addi a0, zero, -1 +; RV32-NEXT: bltu a3, a2, .LBB34_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a0, zero +; RV32-NEXT: .LBB34_2: +; RV32-NEXT: lhu a2, 8(sp) +; RV32-NEXT: lhu a3, 12(sp) +; RV32-NEXT: bltu a3, a2, .LBB34_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: mv a1, zero +; RV32-NEXT: .LBB34_4: +; RV32-NEXT: pktb16 a1, zero, a1 +; RV32-NEXT: pkbb16 a0, a0, a1 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: setugtv2i16: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: lhu a2, 2(sp) +; RV64-NEXT: lhu a3, 10(sp) +; RV64-NEXT: addi a0, zero, -1 +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: bltu a3, a2, .LBB34_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a1, zero +; RV64-NEXT: .LBB34_2: +; RV64-NEXT: lhu a3, 0(sp) +; RV64-NEXT: lhu a4, 8(sp) +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bltu a4, a3, .LBB34_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB34_4: +; RV64-NEXT: pktb16 a2, zero, a2 +; RV64-NEXT: pktb32 a2, zero, a2 +; RV64-NEXT: lhu a4, 4(sp) +; RV64-NEXT: lhu a5, 12(sp) +; RV64-NEXT: pkbb16 a1, a1, a2 +; RV64-NEXT: pktb32 a1, a2, a1 +; RV64-NEXT: pkbt32 a2, a1, a1 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: bltu a5, a4, .LBB34_6 +; RV64-NEXT: # %bb.5: +; RV64-NEXT: mv a3, zero +; RV64-NEXT: .LBB34_6: +; RV64-NEXT: lhu a4, 6(sp) +; RV64-NEXT: lhu a5, 14(sp) +; RV64-NEXT: pktb16 a2, a2, a3 +; RV64-NEXT: pkbb32 a1, a2, a1 +; RV64-NEXT: pkbt32 a2, a1, a1 +; RV64-NEXT: bltu a5, a4, .LBB34_8 +; RV64-NEXT: # %bb.7: +; RV64-NEXT: mv a0, zero +; RV64-NEXT: .LBB34_8: +; RV64-NEXT: pkbb16 a0, a0, a2 +; RV64-NEXT: pkbb32 a0, a0, a1 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <2 x i16> + %tmp2 = bitcast i32 %b to <2 x i16> + %cmp = icmp ugt <2 x i16> %tmp1, %tmp2 + %sext = sext <2 x i1> %cmp to <2 x i16> + %res = bitcast <2 x i16> %sext to i32 + ret i32 %res +} + +define i64 @setugtv4i16(i64 %a, i64 %b) nounwind { +; RV32-LABEL: setugtv4i16: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a3, 12(sp) +; RV32-NEXT: sw a0, 0(sp) +; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: lhu a2, 6(sp) +; RV32-NEXT: lhu a3, 14(sp) +; RV32-NEXT: addi a0, zero, -1 +; RV32-NEXT: addi a1, zero, -1 +; RV32-NEXT: bltu a3, a2, .LBB35_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a1, zero +; RV32-NEXT: .LBB35_2: +; RV32-NEXT: lhu a3, 4(sp) +; RV32-NEXT: lhu a4, 12(sp) +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: bltu a4, a3, .LBB35_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: mv a2, zero +; RV32-NEXT: .LBB35_4: +; RV32-NEXT: lhu a4, 2(sp) +; RV32-NEXT: lhu a5, 10(sp) +; RV32-NEXT: pktb16 a3, zero, a2 +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: bltu a5, a4, .LBB35_6 +; RV32-NEXT: # %bb.5: +; RV32-NEXT: mv a2, zero +; RV32-NEXT: .LBB35_6: +; RV32-NEXT: lhu a4, 0(sp) +; RV32-NEXT: lhu a5, 8(sp) +; RV32-NEXT: pkbb16 a1, a1, a3 +; RV32-NEXT: bltu a5, a4, .LBB35_8 +; RV32-NEXT: # %bb.7: +; RV32-NEXT: mv a0, zero +; RV32-NEXT: .LBB35_8: +; RV32-NEXT: pktb16 a0, zero, a0 +; RV32-NEXT: pkbb16 a0, a2, a0 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: setugtv4i16: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: lhu a2, 2(sp) +; RV64-NEXT: lhu a3, 10(sp) +; RV64-NEXT: addi a0, zero, -1 +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: bltu a3, a2, .LBB35_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a1, zero +; RV64-NEXT: .LBB35_2: +; RV64-NEXT: lhu a3, 0(sp) +; RV64-NEXT: lhu a4, 8(sp) +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bltu a4, a3, .LBB35_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB35_4: +; RV64-NEXT: pktb16 a2, zero, a2 +; RV64-NEXT: pktb32 a2, zero, a2 +; RV64-NEXT: lhu a4, 4(sp) +; RV64-NEXT: lhu a5, 12(sp) +; RV64-NEXT: pkbb16 a1, a1, a2 +; RV64-NEXT: pktb32 a1, a2, a1 +; RV64-NEXT: pkbt32 a2, a1, a1 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: bltu a5, a4, .LBB35_6 +; RV64-NEXT: # %bb.5: +; RV64-NEXT: mv a3, zero +; RV64-NEXT: .LBB35_6: +; RV64-NEXT: lhu a4, 6(sp) +; RV64-NEXT: lhu a5, 14(sp) +; RV64-NEXT: pktb16 a2, a2, a3 +; RV64-NEXT: pkbb32 a1, a2, a1 +; RV64-NEXT: pkbt32 a2, a1, a1 +; RV64-NEXT: bltu a5, a4, .LBB35_8 +; RV64-NEXT: # %bb.7: +; RV64-NEXT: mv a0, zero +; RV64-NEXT: .LBB35_8: +; RV64-NEXT: pkbb16 a0, a0, a2 +; RV64-NEXT: pkbb32 a0, a0, a1 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <4 x i16> + %tmp2 = bitcast i64 %b to <4 x i16> + %cmp = icmp ugt <4 x i16> %tmp1, %tmp2 + %sext = sext <4 x i1> %cmp to <4 x i16> + %res = bitcast <4 x i16> %sext to i64 + ret i64 %res +} + +; ucmple16 + +define i32 @setulev2i16(i32 %a, i32 %b) nounwind { +; RV32-LABEL: setulev2i16: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: lhu a2, 10(sp) +; RV32-NEXT: lhu a3, 14(sp) +; RV32-NEXT: addi a1, zero, -1 +; RV32-NEXT: addi a0, zero, -1 +; RV32-NEXT: bgeu a3, a2, .LBB36_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a0, zero +; RV32-NEXT: .LBB36_2: +; RV32-NEXT: lhu a2, 8(sp) +; RV32-NEXT: lhu a3, 12(sp) +; RV32-NEXT: bgeu a3, a2, .LBB36_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: mv a1, zero +; RV32-NEXT: .LBB36_4: +; RV32-NEXT: pktb16 a1, zero, a1 +; RV32-NEXT: pkbb16 a0, a0, a1 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: setulev2i16: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: lhu a2, 2(sp) +; RV64-NEXT: lhu a3, 10(sp) +; RV64-NEXT: addi a0, zero, -1 +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: bgeu a3, a2, .LBB36_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a1, zero +; RV64-NEXT: .LBB36_2: +; RV64-NEXT: lhu a3, 0(sp) +; RV64-NEXT: lhu a4, 8(sp) +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bgeu a4, a3, .LBB36_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB36_4: +; RV64-NEXT: pktb16 a2, zero, a2 +; RV64-NEXT: pktb32 a2, zero, a2 +; RV64-NEXT: lhu a4, 4(sp) +; RV64-NEXT: lhu a5, 12(sp) +; RV64-NEXT: pkbb16 a1, a1, a2 +; RV64-NEXT: pktb32 a1, a2, a1 +; RV64-NEXT: pkbt32 a2, a1, a1 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: bgeu a5, a4, .LBB36_6 +; RV64-NEXT: # %bb.5: +; RV64-NEXT: mv a3, zero +; RV64-NEXT: .LBB36_6: +; RV64-NEXT: lhu a4, 6(sp) +; RV64-NEXT: lhu a5, 14(sp) +; RV64-NEXT: pktb16 a2, a2, a3 +; RV64-NEXT: pkbb32 a1, a2, a1 +; RV64-NEXT: pkbt32 a2, a1, a1 +; RV64-NEXT: bgeu a5, a4, .LBB36_8 +; RV64-NEXT: # %bb.7: +; RV64-NEXT: mv a0, zero +; RV64-NEXT: .LBB36_8: +; RV64-NEXT: pkbb16 a0, a0, a2 +; RV64-NEXT: pkbb32 a0, a0, a1 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <2 x i16> + %tmp2 = bitcast i32 %b to <2 x i16> + %cmp = icmp ule <2 x i16> %tmp1, %tmp2 + %sext = sext <2 x i1> %cmp to <2 x i16> + %res = bitcast <2 x i16> %sext to i32 + ret i32 %res +} + +define i64 @setulev4i16(i64 %a, i64 %b) nounwind { +; RV32-LABEL: setulev4i16: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a3, 12(sp) +; RV32-NEXT: sw a0, 0(sp) +; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: lhu a2, 6(sp) +; RV32-NEXT: lhu a3, 14(sp) +; RV32-NEXT: addi a0, zero, -1 +; RV32-NEXT: addi a1, zero, -1 +; RV32-NEXT: bgeu a3, a2, .LBB37_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a1, zero +; RV32-NEXT: .LBB37_2: +; RV32-NEXT: lhu a3, 4(sp) +; RV32-NEXT: lhu a4, 12(sp) +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: bgeu a4, a3, .LBB37_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: mv a2, zero +; RV32-NEXT: .LBB37_4: +; RV32-NEXT: lhu a4, 2(sp) +; RV32-NEXT: lhu a5, 10(sp) +; RV32-NEXT: pktb16 a3, zero, a2 +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: bgeu a5, a4, .LBB37_6 +; RV32-NEXT: # %bb.5: +; RV32-NEXT: mv a2, zero +; RV32-NEXT: .LBB37_6: +; RV32-NEXT: lhu a4, 0(sp) +; RV32-NEXT: lhu a5, 8(sp) +; RV32-NEXT: pkbb16 a1, a1, a3 +; RV32-NEXT: bgeu a5, a4, .LBB37_8 +; RV32-NEXT: # %bb.7: +; RV32-NEXT: mv a0, zero +; RV32-NEXT: .LBB37_8: +; RV32-NEXT: pktb16 a0, zero, a0 +; RV32-NEXT: pkbb16 a0, a2, a0 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: setulev4i16: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: lhu a2, 2(sp) +; RV64-NEXT: lhu a3, 10(sp) +; RV64-NEXT: addi a0, zero, -1 +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: bgeu a3, a2, .LBB37_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a1, zero +; RV64-NEXT: .LBB37_2: +; RV64-NEXT: lhu a3, 0(sp) +; RV64-NEXT: lhu a4, 8(sp) +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bgeu a4, a3, .LBB37_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB37_4: +; RV64-NEXT: pktb16 a2, zero, a2 +; RV64-NEXT: pktb32 a2, zero, a2 +; RV64-NEXT: lhu a4, 4(sp) +; RV64-NEXT: lhu a5, 12(sp) +; RV64-NEXT: pkbb16 a1, a1, a2 +; RV64-NEXT: pktb32 a1, a2, a1 +; RV64-NEXT: pkbt32 a2, a1, a1 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: bgeu a5, a4, .LBB37_6 +; RV64-NEXT: # %bb.5: +; RV64-NEXT: mv a3, zero +; RV64-NEXT: .LBB37_6: +; RV64-NEXT: lhu a4, 6(sp) +; RV64-NEXT: lhu a5, 14(sp) +; RV64-NEXT: pktb16 a2, a2, a3 +; RV64-NEXT: pkbb32 a1, a2, a1 +; RV64-NEXT: pkbt32 a2, a1, a1 +; RV64-NEXT: bgeu a5, a4, .LBB37_8 +; RV64-NEXT: # %bb.7: +; RV64-NEXT: mv a0, zero +; RV64-NEXT: .LBB37_8: +; RV64-NEXT: pkbb16 a0, a0, a2 +; RV64-NEXT: pkbb32 a0, a0, a1 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <4 x i16> + %tmp2 = bitcast i64 %b to <4 x i16> + %cmp = icmp ule <4 x i16> %tmp1, %tmp2 + %sext = sext <4 x i1> %cmp to <4 x i16> + %res = bitcast <4 x i16> %sext to i64 + ret i64 %res +} + +define i32 @setugev2i16(i32 %a, i32 %b) nounwind { +; RV32-LABEL: setugev2i16: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: lhu a2, 14(sp) +; RV32-NEXT: lhu a3, 10(sp) +; RV32-NEXT: addi a1, zero, -1 +; RV32-NEXT: addi a0, zero, -1 +; RV32-NEXT: bgeu a3, a2, .LBB38_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a0, zero +; RV32-NEXT: .LBB38_2: +; RV32-NEXT: lhu a2, 12(sp) +; RV32-NEXT: lhu a3, 8(sp) +; RV32-NEXT: bgeu a3, a2, .LBB38_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: mv a1, zero +; RV32-NEXT: .LBB38_4: +; RV32-NEXT: pktb16 a1, zero, a1 +; RV32-NEXT: pkbb16 a0, a0, a1 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: setugev2i16: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lhu a2, 10(sp) +; RV64-NEXT: lhu a3, 2(sp) +; RV64-NEXT: addi a0, zero, -1 +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: bgeu a3, a2, .LBB38_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a1, zero +; RV64-NEXT: .LBB38_2: +; RV64-NEXT: lhu a3, 8(sp) +; RV64-NEXT: lhu a4, 0(sp) +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bgeu a4, a3, .LBB38_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB38_4: +; RV64-NEXT: pktb16 a2, zero, a2 +; RV64-NEXT: pktb32 a2, zero, a2 +; RV64-NEXT: lhu a4, 12(sp) +; RV64-NEXT: lhu a5, 4(sp) +; RV64-NEXT: pkbb16 a1, a1, a2 +; RV64-NEXT: pktb32 a1, a2, a1 +; RV64-NEXT: pkbt32 a2, a1, a1 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: bgeu a5, a4, .LBB38_6 +; RV64-NEXT: # %bb.5: +; RV64-NEXT: mv a3, zero +; RV64-NEXT: .LBB38_6: +; RV64-NEXT: lhu a4, 14(sp) +; RV64-NEXT: lhu a5, 6(sp) +; RV64-NEXT: pktb16 a2, a2, a3 +; RV64-NEXT: pkbb32 a1, a2, a1 +; RV64-NEXT: pkbt32 a2, a1, a1 +; RV64-NEXT: bgeu a5, a4, .LBB38_8 +; RV64-NEXT: # %bb.7: +; RV64-NEXT: mv a0, zero +; RV64-NEXT: .LBB38_8: +; RV64-NEXT: pkbb16 a0, a0, a2 +; RV64-NEXT: pkbb32 a0, a0, a1 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <2 x i16> + %tmp2 = bitcast i32 %b to <2 x i16> + %cmp = icmp uge <2 x i16> %tmp1, %tmp2 + %sext = sext <2 x i1> %cmp to <2 x i16> + %res = bitcast <2 x i16> %sext to i32 + ret i32 %res +} + +define i64 @setugev4i16(i64 %a, i64 %b) nounwind { +; RV32-LABEL: setugev4i16: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a3, 12(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: sw a0, 0(sp) +; RV32-NEXT: lhu a2, 14(sp) +; RV32-NEXT: lhu a3, 6(sp) +; RV32-NEXT: addi a0, zero, -1 +; RV32-NEXT: addi a1, zero, -1 +; RV32-NEXT: bgeu a3, a2, .LBB39_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a1, zero +; RV32-NEXT: .LBB39_2: +; RV32-NEXT: lhu a3, 12(sp) +; RV32-NEXT: lhu a4, 4(sp) +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: bgeu a4, a3, .LBB39_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: mv a2, zero +; RV32-NEXT: .LBB39_4: +; RV32-NEXT: lhu a4, 10(sp) +; RV32-NEXT: lhu a5, 2(sp) +; RV32-NEXT: pktb16 a3, zero, a2 +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: bgeu a5, a4, .LBB39_6 +; RV32-NEXT: # %bb.5: +; RV32-NEXT: mv a2, zero +; RV32-NEXT: .LBB39_6: +; RV32-NEXT: lhu a4, 8(sp) +; RV32-NEXT: lhu a5, 0(sp) +; RV32-NEXT: pkbb16 a1, a1, a3 +; RV32-NEXT: bgeu a5, a4, .LBB39_8 +; RV32-NEXT: # %bb.7: +; RV32-NEXT: mv a0, zero +; RV32-NEXT: .LBB39_8: +; RV32-NEXT: pktb16 a0, zero, a0 +; RV32-NEXT: pkbb16 a0, a2, a0 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: setugev4i16: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lhu a2, 10(sp) +; RV64-NEXT: lhu a3, 2(sp) +; RV64-NEXT: addi a0, zero, -1 +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: bgeu a3, a2, .LBB39_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a1, zero +; RV64-NEXT: .LBB39_2: +; RV64-NEXT: lhu a3, 8(sp) +; RV64-NEXT: lhu a4, 0(sp) +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bgeu a4, a3, .LBB39_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB39_4: +; RV64-NEXT: pktb16 a2, zero, a2 +; RV64-NEXT: pktb32 a2, zero, a2 +; RV64-NEXT: lhu a4, 12(sp) +; RV64-NEXT: lhu a5, 4(sp) +; RV64-NEXT: pkbb16 a1, a1, a2 +; RV64-NEXT: pktb32 a1, a2, a1 +; RV64-NEXT: pkbt32 a2, a1, a1 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: bgeu a5, a4, .LBB39_6 +; RV64-NEXT: # %bb.5: +; RV64-NEXT: mv a3, zero +; RV64-NEXT: .LBB39_6: +; RV64-NEXT: lhu a4, 14(sp) +; RV64-NEXT: lhu a5, 6(sp) +; RV64-NEXT: pktb16 a2, a2, a3 +; RV64-NEXT: pkbb32 a1, a2, a1 +; RV64-NEXT: pkbt32 a2, a1, a1 +; RV64-NEXT: bgeu a5, a4, .LBB39_8 +; RV64-NEXT: # %bb.7: +; RV64-NEXT: mv a0, zero +; RV64-NEXT: .LBB39_8: +; RV64-NEXT: pkbb16 a0, a0, a2 +; RV64-NEXT: pkbb32 a0, a0, a1 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <4 x i16> + %tmp2 = bitcast i64 %b to <4 x i16> + %cmp = icmp uge <4 x i16> %tmp1, %tmp2 + %sext = sext <4 x i1> %cmp to <4 x i16> + %res = bitcast <4 x i16> %sext to i64 + ret i64 %res +}