diff --git a/llvm/test/CodeGen/RISCV/rvp/vector-maxmin-rv64.ll b/llvm/test/CodeGen/RISCV/rvp/vector-maxmin-rv64.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvp/vector-maxmin-rv64.ll @@ -0,0 +1,591 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+experimental-p -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=RV64 + +; smax32 + +define i64 @smaxv2i32_1(i64 %a, i64 %b) nounwind { +; RV64-LABEL: smaxv2i32_1: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lw a2, 12(sp) +; RV64-NEXT: lw a3, 4(sp) +; RV64-NEXT: lw a4, 8(sp) +; RV64-NEXT: lw a5, 0(sp) +; RV64-NEXT: slt a2, a3, a2 +; RV64-NEXT: neg a2, a2 +; RV64-NEXT: slt a3, a5, a4 +; RV64-NEXT: neg a3, a3 +; RV64-NEXT: pktb32 a3, zero, a3 +; RV64-NEXT: pkbb32 a2, a2, a3 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <2 x i32> + %tmp2 = bitcast i64 %b to <2 x i32> + %cmp = icmp slt <2 x i32> %tmp1, %tmp2 + %select = select <2 x i1> %cmp, <2 x i32> %tmp2, <2 x i32> %tmp1 + %res = bitcast <2 x i32> %select to i64 + ret i64 %res +} + +define i64 @smaxv2i32_2(i64 %a, i64 %b) nounwind { +; RV64-LABEL: smaxv2i32_2: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: lw a4, 4(sp) +; RV64-NEXT: lw a5, 12(sp) +; RV64-NEXT: addi a6, zero, -1 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: bge a5, a4, .LBB1_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a3, zero +; RV64-NEXT: .LBB1_2: +; RV64-NEXT: lw a5, 0(sp) +; RV64-NEXT: lw a2, 8(sp) +; RV64-NEXT: addi a4, zero, -1 +; RV64-NEXT: bge a2, a5, .LBB1_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: mv a4, zero +; RV64-NEXT: .LBB1_4: +; RV64-NEXT: pktb32 a2, zero, a4 +; RV64-NEXT: pkbb32 a2, a3, a2 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: xor a2, a2, a6 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <2 x i32> + %tmp2 = bitcast i64 %b to <2 x i32> + %cmp = icmp sle <2 x i32> %tmp1, %tmp2 + %select = select <2 x i1> %cmp, <2 x i32> %tmp2, <2 x i32> %tmp1 + %res = bitcast <2 x i32> %select to i64 + ret i64 %res +} + +define i64 @smaxv2i32_3(i64 %a, i64 %b) nounwind { +; RV64-LABEL: smaxv2i32_3: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: lw a4, 4(sp) +; RV64-NEXT: lw a5, 12(sp) +; RV64-NEXT: addi a6, zero, -1 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: blt a5, a4, .LBB2_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a3, zero +; RV64-NEXT: .LBB2_2: +; RV64-NEXT: lw a5, 0(sp) +; RV64-NEXT: lw a2, 8(sp) +; RV64-NEXT: addi a4, zero, -1 +; RV64-NEXT: blt a2, a5, .LBB2_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: mv a4, zero +; RV64-NEXT: .LBB2_4: +; RV64-NEXT: pktb32 a2, zero, a4 +; RV64-NEXT: pkbb32 a2, a3, a2 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: xor a2, a2, a6 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <2 x i32> + %tmp2 = bitcast i64 %b to <2 x i32> + %cmp = icmp sgt <2 x i32> %tmp1, %tmp2 + %select = select <2 x i1> %cmp, <2 x i32> %tmp1, <2 x i32> %tmp2 + %res = bitcast <2 x i32> %select to i64 + ret i64 %res +} + +define i64 @smaxv2i32_4(i64 %a, i64 %b) nounwind { +; RV64-LABEL: smaxv2i32_4: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lw a4, 12(sp) +; RV64-NEXT: lw a5, 4(sp) +; RV64-NEXT: addi a6, zero, -1 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: bge a5, a4, .LBB3_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a3, zero +; RV64-NEXT: .LBB3_2: +; RV64-NEXT: lw a5, 8(sp) +; RV64-NEXT: lw a2, 0(sp) +; RV64-NEXT: addi a4, zero, -1 +; RV64-NEXT: bge a2, a5, .LBB3_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: mv a4, zero +; RV64-NEXT: .LBB3_4: +; RV64-NEXT: pktb32 a2, zero, a4 +; RV64-NEXT: pkbb32 a2, a3, a2 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: xor a2, a2, a6 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <2 x i32> + %tmp2 = bitcast i64 %b to <2 x i32> + %cmp = icmp sge <2 x i32> %tmp1, %tmp2 + %select = select <2 x i1> %cmp, <2 x i32> %tmp1, <2 x i32> %tmp2 + %res = bitcast <2 x i32> %select to i64 + ret i64 %res +} + +; smin32 + +define i64 @sminv2i32_1(i64 %a, i64 %b) nounwind { +; RV64-LABEL: sminv2i32_1: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lw a2, 12(sp) +; RV64-NEXT: lw a3, 4(sp) +; RV64-NEXT: lw a4, 8(sp) +; RV64-NEXT: lw a5, 0(sp) +; RV64-NEXT: slt a2, a3, a2 +; RV64-NEXT: neg a2, a2 +; RV64-NEXT: slt a3, a5, a4 +; RV64-NEXT: neg a3, a3 +; RV64-NEXT: pktb32 a3, zero, a3 +; RV64-NEXT: pkbb32 a2, a2, a3 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <2 x i32> + %tmp2 = bitcast i64 %b to <2 x i32> + %cmp = icmp slt <2 x i32> %tmp1, %tmp2 + %select = select <2 x i1> %cmp, <2 x i32> %tmp1, <2 x i32> %tmp2 + %res = bitcast <2 x i32> %select to i64 + ret i64 %res +} + +define i64 @sminv2i32_2(i64 %a, i64 %b) nounwind { +; RV64-LABEL: sminv2i32_2: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: lw a4, 4(sp) +; RV64-NEXT: lw a5, 12(sp) +; RV64-NEXT: addi a6, zero, -1 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: bge a5, a4, .LBB5_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a3, zero +; RV64-NEXT: .LBB5_2: +; RV64-NEXT: lw a5, 0(sp) +; RV64-NEXT: lw a2, 8(sp) +; RV64-NEXT: addi a4, zero, -1 +; RV64-NEXT: bge a2, a5, .LBB5_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: mv a4, zero +; RV64-NEXT: .LBB5_4: +; RV64-NEXT: pktb32 a2, zero, a4 +; RV64-NEXT: pkbb32 a2, a3, a2 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: xor a2, a2, a6 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <2 x i32> + %tmp2 = bitcast i64 %b to <2 x i32> + %cmp = icmp sle <2 x i32> %tmp1, %tmp2 + %select = select <2 x i1> %cmp, <2 x i32> %tmp1, <2 x i32> %tmp2 + %res = bitcast <2 x i32> %select to i64 + ret i64 %res +} + +define i64 @sminv2i32_3(i64 %a, i64 %b) nounwind { +; RV64-LABEL: sminv2i32_3: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: lw a4, 4(sp) +; RV64-NEXT: lw a5, 12(sp) +; RV64-NEXT: addi a6, zero, -1 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: blt a5, a4, .LBB6_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a3, zero +; RV64-NEXT: .LBB6_2: +; RV64-NEXT: lw a5, 0(sp) +; RV64-NEXT: lw a2, 8(sp) +; RV64-NEXT: addi a4, zero, -1 +; RV64-NEXT: blt a2, a5, .LBB6_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: mv a4, zero +; RV64-NEXT: .LBB6_4: +; RV64-NEXT: pktb32 a2, zero, a4 +; RV64-NEXT: pkbb32 a2, a3, a2 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: xor a2, a2, a6 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <2 x i32> + %tmp2 = bitcast i64 %b to <2 x i32> + %cmp = icmp sgt <2 x i32> %tmp1, %tmp2 + %select = select <2 x i1> %cmp, <2 x i32> %tmp2, <2 x i32> %tmp1 + %res = bitcast <2 x i32> %select to i64 + ret i64 %res +} + +define i64 @sminv2i32_4(i64 %a, i64 %b) nounwind { +; RV64-LABEL: sminv2i32_4: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lw a4, 12(sp) +; RV64-NEXT: lw a5, 4(sp) +; RV64-NEXT: addi a6, zero, -1 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: bge a5, a4, .LBB7_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a3, zero +; RV64-NEXT: .LBB7_2: +; RV64-NEXT: lw a5, 8(sp) +; RV64-NEXT: lw a2, 0(sp) +; RV64-NEXT: addi a4, zero, -1 +; RV64-NEXT: bge a2, a5, .LBB7_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: mv a4, zero +; RV64-NEXT: .LBB7_4: +; RV64-NEXT: pktb32 a2, zero, a4 +; RV64-NEXT: pkbb32 a2, a3, a2 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: xor a2, a2, a6 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <2 x i32> + %tmp2 = bitcast i64 %b to <2 x i32> + %cmp = icmp sge <2 x i32> %tmp1, %tmp2 + %select = select <2 x i1> %cmp, <2 x i32> %tmp2, <2 x i32> %tmp1 + %res = bitcast <2 x i32> %select to i64 + ret i64 %res +} + +; umax32 + +define i64 @umaxv2i32_1(i64 %a, i64 %b) nounwind { +; RV64-LABEL: umaxv2i32_1: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lw a4, 12(sp) +; RV64-NEXT: lw a5, 4(sp) +; RV64-NEXT: addi a6, zero, -1 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: bltu a5, a4, .LBB8_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a3, zero +; RV64-NEXT: .LBB8_2: +; RV64-NEXT: lw a5, 8(sp) +; RV64-NEXT: lw a2, 0(sp) +; RV64-NEXT: addi a4, zero, -1 +; RV64-NEXT: bltu a2, a5, .LBB8_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: mv a4, zero +; RV64-NEXT: .LBB8_4: +; RV64-NEXT: pktb32 a2, zero, a4 +; RV64-NEXT: pkbb32 a2, a3, a2 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: xor a2, a2, a6 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <2 x i32> + %tmp2 = bitcast i64 %b to <2 x i32> + %cmp = icmp ult <2 x i32> %tmp1, %tmp2 + %select = select <2 x i1> %cmp, <2 x i32> %tmp2, <2 x i32> %tmp1 + %res = bitcast <2 x i32> %select to i64 + ret i64 %res +} + +define i64 @umaxv2i32_2(i64 %a, i64 %b) nounwind { +; RV64-LABEL: umaxv2i32_2: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: lw a4, 4(sp) +; RV64-NEXT: lw a5, 12(sp) +; RV64-NEXT: addi a6, zero, -1 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: bgeu a5, a4, .LBB9_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a3, zero +; RV64-NEXT: .LBB9_2: +; RV64-NEXT: lw a5, 0(sp) +; RV64-NEXT: lw a2, 8(sp) +; RV64-NEXT: addi a4, zero, -1 +; RV64-NEXT: bgeu a2, a5, .LBB9_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: mv a4, zero +; RV64-NEXT: .LBB9_4: +; RV64-NEXT: pktb32 a2, zero, a4 +; RV64-NEXT: pkbb32 a2, a3, a2 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: xor a2, a2, a6 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <2 x i32> + %tmp2 = bitcast i64 %b to <2 x i32> + %cmp = icmp ule <2 x i32> %tmp1, %tmp2 + %select = select <2 x i1> %cmp, <2 x i32> %tmp2, <2 x i32> %tmp1 + %res = bitcast <2 x i32> %select to i64 + ret i64 %res +} + +define i64 @umaxv2i32_3(i64 %a, i64 %b) nounwind { +; RV64-LABEL: umaxv2i32_3: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: lw a4, 4(sp) +; RV64-NEXT: lw a5, 12(sp) +; RV64-NEXT: addi a6, zero, -1 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: bltu a5, a4, .LBB10_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a3, zero +; RV64-NEXT: .LBB10_2: +; RV64-NEXT: lw a5, 0(sp) +; RV64-NEXT: lw a2, 8(sp) +; RV64-NEXT: addi a4, zero, -1 +; RV64-NEXT: bltu a2, a5, .LBB10_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: mv a4, zero +; RV64-NEXT: .LBB10_4: +; RV64-NEXT: pktb32 a2, zero, a4 +; RV64-NEXT: pkbb32 a2, a3, a2 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: xor a2, a2, a6 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <2 x i32> + %tmp2 = bitcast i64 %b to <2 x i32> + %cmp = icmp ugt <2 x i32> %tmp1, %tmp2 + %select = select <2 x i1> %cmp, <2 x i32> %tmp1, <2 x i32> %tmp2 + %res = bitcast <2 x i32> %select to i64 + ret i64 %res +} + +define i64 @umaxv2i32_4(i64 %a, i64 %b) nounwind { +; RV64-LABEL: umaxv2i32_4: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lw a4, 12(sp) +; RV64-NEXT: lw a5, 4(sp) +; RV64-NEXT: addi a6, zero, -1 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: bgeu a5, a4, .LBB11_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a3, zero +; RV64-NEXT: .LBB11_2: +; RV64-NEXT: lw a5, 8(sp) +; RV64-NEXT: lw a2, 0(sp) +; RV64-NEXT: addi a4, zero, -1 +; RV64-NEXT: bgeu a2, a5, .LBB11_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: mv a4, zero +; RV64-NEXT: .LBB11_4: +; RV64-NEXT: pktb32 a2, zero, a4 +; RV64-NEXT: pkbb32 a2, a3, a2 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: xor a2, a2, a6 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <2 x i32> + %tmp2 = bitcast i64 %b to <2 x i32> + %cmp = icmp uge <2 x i32> %tmp1, %tmp2 + %select = select <2 x i1> %cmp, <2 x i32> %tmp1, <2 x i32> %tmp2 + %res = bitcast <2 x i32> %select to i64 + ret i64 %res +} + +; umin32 + +define i64 @uminv2i32_1(i64 %a, i64 %b) nounwind { +; RV64-LABEL: uminv2i32_1: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lw a4, 12(sp) +; RV64-NEXT: lw a5, 4(sp) +; RV64-NEXT: addi a6, zero, -1 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: bltu a5, a4, .LBB12_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a3, zero +; RV64-NEXT: .LBB12_2: +; RV64-NEXT: lw a5, 8(sp) +; RV64-NEXT: lw a2, 0(sp) +; RV64-NEXT: addi a4, zero, -1 +; RV64-NEXT: bltu a2, a5, .LBB12_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: mv a4, zero +; RV64-NEXT: .LBB12_4: +; RV64-NEXT: pktb32 a2, zero, a4 +; RV64-NEXT: pkbb32 a2, a3, a2 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: xor a2, a2, a6 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <2 x i32> + %tmp2 = bitcast i64 %b to <2 x i32> + %cmp = icmp ult <2 x i32> %tmp1, %tmp2 + %select = select <2 x i1> %cmp, <2 x i32> %tmp1, <2 x i32> %tmp2 + %res = bitcast <2 x i32> %select to i64 + ret i64 %res +} + +define i64 @uminv2i32_2(i64 %a, i64 %b) nounwind { +; RV64-LABEL: uminv2i32_2: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: lw a4, 4(sp) +; RV64-NEXT: lw a5, 12(sp) +; RV64-NEXT: addi a6, zero, -1 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: bgeu a5, a4, .LBB13_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a3, zero +; RV64-NEXT: .LBB13_2: +; RV64-NEXT: lw a5, 0(sp) +; RV64-NEXT: lw a2, 8(sp) +; RV64-NEXT: addi a4, zero, -1 +; RV64-NEXT: bgeu a2, a5, .LBB13_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: mv a4, zero +; RV64-NEXT: .LBB13_4: +; RV64-NEXT: pktb32 a2, zero, a4 +; RV64-NEXT: pkbb32 a2, a3, a2 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: xor a2, a2, a6 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <2 x i32> + %tmp2 = bitcast i64 %b to <2 x i32> + %cmp = icmp ule <2 x i32> %tmp1, %tmp2 + %select = select <2 x i1> %cmp, <2 x i32> %tmp1, <2 x i32> %tmp2 + %res = bitcast <2 x i32> %select to i64 + ret i64 %res +} + +define i64 @uminv2i32_3(i64 %a, i64 %b) nounwind { +; RV64-LABEL: uminv2i32_3: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: lw a4, 4(sp) +; RV64-NEXT: lw a5, 12(sp) +; RV64-NEXT: addi a6, zero, -1 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: bltu a5, a4, .LBB14_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a3, zero +; RV64-NEXT: .LBB14_2: +; RV64-NEXT: lw a5, 0(sp) +; RV64-NEXT: lw a2, 8(sp) +; RV64-NEXT: addi a4, zero, -1 +; RV64-NEXT: bltu a2, a5, .LBB14_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: mv a4, zero +; RV64-NEXT: .LBB14_4: +; RV64-NEXT: pktb32 a2, zero, a4 +; RV64-NEXT: pkbb32 a2, a3, a2 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: xor a2, a2, a6 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <2 x i32> + %tmp2 = bitcast i64 %b to <2 x i32> + %cmp = icmp ugt <2 x i32> %tmp1, %tmp2 + %select = select <2 x i1> %cmp, <2 x i32> %tmp2, <2 x i32> %tmp1 + %res = bitcast <2 x i32> %select to i64 + ret i64 %res +} + +define i64 @uminv2i32_4(i64 %a, i64 %b) nounwind { +; RV64-LABEL: uminv2i32_4: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lw a4, 12(sp) +; RV64-NEXT: lw a5, 4(sp) +; RV64-NEXT: addi a6, zero, -1 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: bgeu a5, a4, .LBB15_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a3, zero +; RV64-NEXT: .LBB15_2: +; RV64-NEXT: lw a5, 8(sp) +; RV64-NEXT: lw a2, 0(sp) +; RV64-NEXT: addi a4, zero, -1 +; RV64-NEXT: bgeu a2, a5, .LBB15_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: mv a4, zero +; RV64-NEXT: .LBB15_4: +; RV64-NEXT: pktb32 a2, zero, a4 +; RV64-NEXT: pkbb32 a2, a3, a2 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: xor a2, a2, a6 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <2 x i32> + %tmp2 = bitcast i64 %b to <2 x i32> + %cmp = icmp uge <2 x i32> %tmp1, %tmp2 + %select = select <2 x i1> %cmp, <2 x i32> %tmp2, <2 x i32> %tmp1 + %res = bitcast <2 x i32> %select to i64 + ret i64 %res +} diff --git a/llvm/test/CodeGen/RISCV/rvp/vector-maxmin.ll b/llvm/test/CodeGen/RISCV/rvp/vector-maxmin.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvp/vector-maxmin.ll @@ -0,0 +1,1973 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-p -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=RV32 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-p -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=RV64 + +; smax8 + +define i32 @smaxv4i8_1(i32 %a, i32 %b) nounwind { +; RV32-LABEL: smaxv4i8_1: +; RV32: # %bb.0: +; RV32-NEXT: scmplt8 a2, a0, a1 +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: addi a3, zero, -1 +; RV32-NEXT: xor a2, a2, a3 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: smaxv4i8_1: +; RV64: # %bb.0: +; RV64-NEXT: scmplt8 a2, a0, a1 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <4 x i8> + %tmp2 = bitcast i32 %b to <4 x i8> + %cmp = icmp slt <4 x i8> %tmp1, %tmp2 + %select = select <4 x i1> %cmp, <4 x i8> %tmp2, <4 x i8> %tmp1 + %res = bitcast <4 x i8> %select to i32 + ret i32 %res +} + +define i32 @smaxv4i8_2(i32 %a, i32 %b) nounwind { +; RV32-LABEL: smaxv4i8_2: +; RV32: # %bb.0: +; RV32-NEXT: scmple8 a2, a0, a1 +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: addi a3, zero, -1 +; RV32-NEXT: xor a2, a2, a3 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: smaxv4i8_2: +; RV64: # %bb.0: +; RV64-NEXT: scmple8 a2, a0, a1 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <4 x i8> + %tmp2 = bitcast i32 %b to <4 x i8> + %cmp = icmp sle <4 x i8> %tmp1, %tmp2 + %select = select <4 x i1> %cmp, <4 x i8> %tmp2, <4 x i8> %tmp1 + %res = bitcast <4 x i8> %select to i32 + ret i32 %res +} + +define i32 @smaxv4i8_3(i32 %a, i32 %b) nounwind { +; RV32-LABEL: smaxv4i8_3: +; RV32: # %bb.0: +; RV32-NEXT: scmplt8 a2, a1, a0 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: addi a3, zero, -1 +; RV32-NEXT: xor a2, a2, a3 +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: ret +; +; RV64-LABEL: smaxv4i8_3: +; RV64: # %bb.0: +; RV64-NEXT: scmplt8 a2, a1, a0 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <4 x i8> + %tmp2 = bitcast i32 %b to <4 x i8> + %cmp = icmp sgt <4 x i8> %tmp1, %tmp2 + %select = select <4 x i1> %cmp, <4 x i8> %tmp1, <4 x i8> %tmp2 + %res = bitcast <4 x i8> %select to i32 + ret i32 %res +} + +define i32 @smaxv4i8_4(i32 %a, i32 %b) nounwind { +; RV32-LABEL: smaxv4i8_4: +; RV32: # %bb.0: +; RV32-NEXT: scmple8 a2, a1, a0 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: addi a3, zero, -1 +; RV32-NEXT: xor a2, a2, a3 +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: ret +; +; RV64-LABEL: smaxv4i8_4: +; RV64: # %bb.0: +; RV64-NEXT: scmple8 a2, a1, a0 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <4 x i8> + %tmp2 = bitcast i32 %b to <4 x i8> + %cmp = icmp sge <4 x i8> %tmp1, %tmp2 + %select = select <4 x i1> %cmp, <4 x i8> %tmp1, <4 x i8> %tmp2 + %res = bitcast <4 x i8> %select to i32 + ret i32 %res +} + +define i64 @smaxv8i8_1(i64 %a, i64 %b) nounwind { +; RV32-LABEL: smaxv8i8_1: +; RV32: # %bb.0: +; RV32-NEXT: scmplt8 a4, a0, a2 +; RV32-NEXT: scmplt8 a5, a1, a3 +; RV32-NEXT: and a3, a3, a5 +; RV32-NEXT: addi a6, zero, -1 +; RV32-NEXT: xor a5, a5, a6 +; RV32-NEXT: and a1, a1, a5 +; RV32-NEXT: or a1, a3, a1 +; RV32-NEXT: and a2, a2, a4 +; RV32-NEXT: xor a3, a4, a6 +; RV32-NEXT: and a0, a0, a3 +; RV32-NEXT: or a0, a2, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: smaxv8i8_1: +; RV64: # %bb.0: +; RV64-NEXT: scmplt8 a2, a0, a1 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <8 x i8> + %tmp2 = bitcast i64 %b to <8 x i8> + %cmp = icmp slt <8 x i8> %tmp1, %tmp2 + %select = select <8x i1> %cmp, <8 x i8> %tmp2, <8 x i8> %tmp1 + %res = bitcast <8 x i8> %select to i64 + ret i64 %res +} + +define i64 @smaxv8i8_2(i64 %a, i64 %b) nounwind { +; RV32-LABEL: smaxv8i8_2: +; RV32: # %bb.0: +; RV32-NEXT: scmple8 a4, a0, a2 +; RV32-NEXT: scmple8 a5, a1, a3 +; RV32-NEXT: and a3, a3, a5 +; RV32-NEXT: addi a6, zero, -1 +; RV32-NEXT: xor a5, a5, a6 +; RV32-NEXT: and a1, a1, a5 +; RV32-NEXT: or a1, a3, a1 +; RV32-NEXT: and a2, a2, a4 +; RV32-NEXT: xor a3, a4, a6 +; RV32-NEXT: and a0, a0, a3 +; RV32-NEXT: or a0, a2, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: smaxv8i8_2: +; RV64: # %bb.0: +; RV64-NEXT: scmple8 a2, a0, a1 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <8 x i8> + %tmp2 = bitcast i64 %b to <8 x i8> + %cmp = icmp sle <8 x i8> %tmp1, %tmp2 + %select = select <8 x i1> %cmp, <8 x i8> %tmp2, <8 x i8> %tmp1 + %res = bitcast <8 x i8> %select to i64 + ret i64 %res +} + +define i64 @smaxv8i8_3(i64 %a, i64 %b) nounwind { +; RV32-LABEL: smaxv8i8_3: +; RV32: # %bb.0: +; RV32-NEXT: scmplt8 a4, a2, a0 +; RV32-NEXT: scmplt8 a5, a3, a1 +; RV32-NEXT: and a1, a1, a5 +; RV32-NEXT: addi a6, zero, -1 +; RV32-NEXT: xor a5, a5, a6 +; RV32-NEXT: and a3, a3, a5 +; RV32-NEXT: or a1, a1, a3 +; RV32-NEXT: and a0, a0, a4 +; RV32-NEXT: xor a3, a4, a6 +; RV32-NEXT: and a2, a2, a3 +; RV32-NEXT: or a0, a0, a2 +; RV32-NEXT: ret +; +; RV64-LABEL: smaxv8i8_3: +; RV64: # %bb.0: +; RV64-NEXT: scmplt8 a2, a1, a0 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <8 x i8> + %tmp2 = bitcast i64 %b to <8 x i8> + %cmp = icmp sgt <8 x i8> %tmp1, %tmp2 + %select = select <8 x i1> %cmp, <8 x i8> %tmp1, <8 x i8> %tmp2 + %res = bitcast <8 x i8> %select to i64 + ret i64 %res +} + +define i64 @smaxv8i8_4(i64 %a, i64 %b) nounwind { +; RV32-LABEL: smaxv8i8_4: +; RV32: # %bb.0: +; RV32-NEXT: scmple8 a4, a2, a0 +; RV32-NEXT: scmple8 a5, a3, a1 +; RV32-NEXT: and a1, a1, a5 +; RV32-NEXT: addi a6, zero, -1 +; RV32-NEXT: xor a5, a5, a6 +; RV32-NEXT: and a3, a3, a5 +; RV32-NEXT: or a1, a1, a3 +; RV32-NEXT: and a0, a0, a4 +; RV32-NEXT: xor a3, a4, a6 +; RV32-NEXT: and a2, a2, a3 +; RV32-NEXT: or a0, a0, a2 +; RV32-NEXT: ret +; +; RV64-LABEL: smaxv8i8_4: +; RV64: # %bb.0: +; RV64-NEXT: scmple8 a2, a1, a0 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <8 x i8> + %tmp2 = bitcast i64 %b to <8 x i8> + %cmp = icmp sge <8 x i8> %tmp1, %tmp2 + %select = select <8 x i1> %cmp, <8 x i8> %tmp1, <8 x i8> %tmp2 + %res = bitcast <8 x i8> %select to i64 + ret i64 %res +} + +; smin8 + +define i32 @sminv4i8_1(i32 %a, i32 %b) nounwind { +; RV32-LABEL: sminv4i8_1: +; RV32: # %bb.0: +; RV32-NEXT: scmplt8 a2, a0, a1 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: addi a3, zero, -1 +; RV32-NEXT: xor a2, a2, a3 +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: ret +; +; RV64-LABEL: sminv4i8_1: +; RV64: # %bb.0: +; RV64-NEXT: scmplt8 a2, a0, a1 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <4 x i8> + %tmp2 = bitcast i32 %b to <4 x i8> + %cmp = icmp slt <4 x i8> %tmp1, %tmp2 + %select = select <4 x i1> %cmp, <4 x i8> %tmp1, <4 x i8> %tmp2 + %res = bitcast <4 x i8> %select to i32 + ret i32 %res +} + +define i32 @sminv4i8_2(i32 %a, i32 %b) nounwind { +; RV32-LABEL: sminv4i8_2: +; RV32: # %bb.0: +; RV32-NEXT: scmple8 a2, a0, a1 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: addi a3, zero, -1 +; RV32-NEXT: xor a2, a2, a3 +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: ret +; +; RV64-LABEL: sminv4i8_2: +; RV64: # %bb.0: +; RV64-NEXT: scmple8 a2, a0, a1 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <4 x i8> + %tmp2 = bitcast i32 %b to <4 x i8> + %cmp = icmp sle <4 x i8> %tmp1, %tmp2 + %select = select <4 x i1> %cmp, <4 x i8> %tmp1, <4 x i8> %tmp2 + %res = bitcast <4 x i8> %select to i32 + ret i32 %res +} + +define i32 @sminv4i8_3(i32 %a, i32 %b) nounwind { +; RV32-LABEL: sminv4i8_3: +; RV32: # %bb.0: +; RV32-NEXT: scmplt8 a2, a1, a0 +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: addi a3, zero, -1 +; RV32-NEXT: xor a2, a2, a3 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: sminv4i8_3: +; RV64: # %bb.0: +; RV64-NEXT: scmplt8 a2, a1, a0 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <4 x i8> + %tmp2 = bitcast i32 %b to <4 x i8> + %cmp = icmp sgt <4 x i8> %tmp1, %tmp2 + %select = select <4 x i1> %cmp, <4 x i8> %tmp2, <4 x i8> %tmp1 + %res = bitcast <4 x i8> %select to i32 + ret i32 %res +} + +define i32 @sminv4i8_4(i32 %a, i32 %b) nounwind { +; RV32-LABEL: sminv4i8_4: +; RV32: # %bb.0: +; RV32-NEXT: scmple8 a2, a1, a0 +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: addi a3, zero, -1 +; RV32-NEXT: xor a2, a2, a3 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: sminv4i8_4: +; RV64: # %bb.0: +; RV64-NEXT: scmple8 a2, a1, a0 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <4 x i8> + %tmp2 = bitcast i32 %b to <4 x i8> + %cmp = icmp sge <4 x i8> %tmp1, %tmp2 + %select = select <4 x i1> %cmp, <4 x i8> %tmp2, <4 x i8> %tmp1 + %res = bitcast <4 x i8> %select to i32 + ret i32 %res +} + +define i64 @sminv8i8_1(i64 %a, i64 %b) nounwind { +; RV32-LABEL: sminv8i8_1: +; RV32: # %bb.0: +; RV32-NEXT: scmplt8 a4, a0, a2 +; RV32-NEXT: scmplt8 a5, a1, a3 +; RV32-NEXT: and a1, a1, a5 +; RV32-NEXT: addi a6, zero, -1 +; RV32-NEXT: xor a5, a5, a6 +; RV32-NEXT: and a3, a3, a5 +; RV32-NEXT: or a1, a1, a3 +; RV32-NEXT: and a0, a0, a4 +; RV32-NEXT: xor a3, a4, a6 +; RV32-NEXT: and a2, a2, a3 +; RV32-NEXT: or a0, a0, a2 +; RV32-NEXT: ret +; +; RV64-LABEL: sminv8i8_1: +; RV64: # %bb.0: +; RV64-NEXT: scmplt8 a2, a0, a1 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <8 x i8> + %tmp2 = bitcast i64 %b to <8 x i8> + %cmp = icmp slt <8 x i8> %tmp1, %tmp2 + %select = select <8x i1> %cmp, <8 x i8> %tmp1, <8 x i8> %tmp2 + %res = bitcast <8 x i8> %select to i64 + ret i64 %res +} + +define i64 @sminv8i8_2(i64 %a, i64 %b) nounwind { +; RV32-LABEL: sminv8i8_2: +; RV32: # %bb.0: +; RV32-NEXT: scmple8 a4, a0, a2 +; RV32-NEXT: scmple8 a5, a1, a3 +; RV32-NEXT: and a1, a1, a5 +; RV32-NEXT: addi a6, zero, -1 +; RV32-NEXT: xor a5, a5, a6 +; RV32-NEXT: and a3, a3, a5 +; RV32-NEXT: or a1, a1, a3 +; RV32-NEXT: and a0, a0, a4 +; RV32-NEXT: xor a3, a4, a6 +; RV32-NEXT: and a2, a2, a3 +; RV32-NEXT: or a0, a0, a2 +; RV32-NEXT: ret +; +; RV64-LABEL: sminv8i8_2: +; RV64: # %bb.0: +; RV64-NEXT: scmple8 a2, a0, a1 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <8 x i8> + %tmp2 = bitcast i64 %b to <8 x i8> + %cmp = icmp sle <8 x i8> %tmp1, %tmp2 + %select = select <8 x i1> %cmp, <8 x i8> %tmp1, <8 x i8> %tmp2 + %res = bitcast <8 x i8> %select to i64 + ret i64 %res +} + +define i64 @sminv8i8_3(i64 %a, i64 %b) nounwind { +; RV32-LABEL: sminv8i8_3: +; RV32: # %bb.0: +; RV32-NEXT: scmplt8 a4, a2, a0 +; RV32-NEXT: scmplt8 a5, a3, a1 +; RV32-NEXT: and a3, a3, a5 +; RV32-NEXT: addi a6, zero, -1 +; RV32-NEXT: xor a5, a5, a6 +; RV32-NEXT: and a1, a1, a5 +; RV32-NEXT: or a1, a3, a1 +; RV32-NEXT: and a2, a2, a4 +; RV32-NEXT: xor a3, a4, a6 +; RV32-NEXT: and a0, a0, a3 +; RV32-NEXT: or a0, a2, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: sminv8i8_3: +; RV64: # %bb.0: +; RV64-NEXT: scmplt8 a2, a1, a0 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <8 x i8> + %tmp2 = bitcast i64 %b to <8 x i8> + %cmp = icmp sgt <8 x i8> %tmp1, %tmp2 + %select = select <8 x i1> %cmp, <8 x i8> %tmp2, <8 x i8> %tmp1 + %res = bitcast <8 x i8> %select to i64 + ret i64 %res +} + +define i64 @sminv8i8_4(i64 %a, i64 %b) nounwind { +; RV32-LABEL: sminv8i8_4: +; RV32: # %bb.0: +; RV32-NEXT: scmple8 a4, a2, a0 +; RV32-NEXT: scmple8 a5, a3, a1 +; RV32-NEXT: and a3, a3, a5 +; RV32-NEXT: addi a6, zero, -1 +; RV32-NEXT: xor a5, a5, a6 +; RV32-NEXT: and a1, a1, a5 +; RV32-NEXT: or a1, a3, a1 +; RV32-NEXT: and a2, a2, a4 +; RV32-NEXT: xor a3, a4, a6 +; RV32-NEXT: and a0, a0, a3 +; RV32-NEXT: or a0, a2, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: sminv8i8_4: +; RV64: # %bb.0: +; RV64-NEXT: scmple8 a2, a1, a0 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <8 x i8> + %tmp2 = bitcast i64 %b to <8 x i8> + %cmp = icmp sge <8 x i8> %tmp1, %tmp2 + %select = select <8 x i1> %cmp, <8 x i8> %tmp2, <8 x i8> %tmp1 + %res = bitcast <8 x i8> %select to i64 + ret i64 %res +} + +; umax8 + +define i32 @umaxv4i8_1(i32 %a, i32 %b) nounwind { +; RV32-LABEL: umaxv4i8_1: +; RV32: # %bb.0: +; RV32-NEXT: ucmplt8 a2, a0, a1 +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: addi a3, zero, -1 +; RV32-NEXT: xor a2, a2, a3 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: umaxv4i8_1: +; RV64: # %bb.0: +; RV64-NEXT: ucmplt8 a2, a0, a1 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <4 x i8> + %tmp2 = bitcast i32 %b to <4 x i8> + %cmp = icmp ult <4 x i8> %tmp1, %tmp2 + %select = select <4 x i1> %cmp, <4 x i8> %tmp2, <4 x i8> %tmp1 + %res = bitcast <4 x i8> %select to i32 + ret i32 %res +} + +define i32 @umaxv4i8_2(i32 %a, i32 %b) nounwind { +; RV32-LABEL: umaxv4i8_2: +; RV32: # %bb.0: +; RV32-NEXT: ucmple8 a2, a0, a1 +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: addi a3, zero, -1 +; RV32-NEXT: xor a2, a2, a3 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: umaxv4i8_2: +; RV64: # %bb.0: +; RV64-NEXT: ucmple8 a2, a0, a1 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <4 x i8> + %tmp2 = bitcast i32 %b to <4 x i8> + %cmp = icmp ule <4 x i8> %tmp1, %tmp2 + %select = select <4 x i1> %cmp, <4 x i8> %tmp2, <4 x i8> %tmp1 + %res = bitcast <4 x i8> %select to i32 + ret i32 %res +} + +define i32 @umaxv4i8_3(i32 %a, i32 %b) nounwind { +; RV32-LABEL: umaxv4i8_3: +; RV32: # %bb.0: +; RV32-NEXT: ucmplt8 a2, a1, a0 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: addi a3, zero, -1 +; RV32-NEXT: xor a2, a2, a3 +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: ret +; +; RV64-LABEL: umaxv4i8_3: +; RV64: # %bb.0: +; RV64-NEXT: ucmplt8 a2, a1, a0 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <4 x i8> + %tmp2 = bitcast i32 %b to <4 x i8> + %cmp = icmp ugt <4 x i8> %tmp1, %tmp2 + %select = select <4 x i1> %cmp, <4 x i8> %tmp1, <4 x i8> %tmp2 + %res = bitcast <4 x i8> %select to i32 + ret i32 %res +} + +define i32 @umaxv4i8_4(i32 %a, i32 %b) nounwind { +; RV32-LABEL: umaxv4i8_4: +; RV32: # %bb.0: +; RV32-NEXT: ucmple8 a2, a1, a0 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: addi a3, zero, -1 +; RV32-NEXT: xor a2, a2, a3 +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: ret +; +; RV64-LABEL: umaxv4i8_4: +; RV64: # %bb.0: +; RV64-NEXT: ucmple8 a2, a1, a0 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <4 x i8> + %tmp2 = bitcast i32 %b to <4 x i8> + %cmp = icmp uge <4 x i8> %tmp1, %tmp2 + %select = select <4 x i1> %cmp, <4 x i8> %tmp1, <4 x i8> %tmp2 + %res = bitcast <4 x i8> %select to i32 + ret i32 %res +} + +define i64 @umaxv8i8_1(i64 %a, i64 %b) nounwind { +; RV32-LABEL: umaxv8i8_1: +; RV32: # %bb.0: +; RV32-NEXT: ucmplt8 a4, a0, a2 +; RV32-NEXT: ucmplt8 a5, a1, a3 +; RV32-NEXT: and a3, a3, a5 +; RV32-NEXT: addi a6, zero, -1 +; RV32-NEXT: xor a5, a5, a6 +; RV32-NEXT: and a1, a1, a5 +; RV32-NEXT: or a1, a3, a1 +; RV32-NEXT: and a2, a2, a4 +; RV32-NEXT: xor a3, a4, a6 +; RV32-NEXT: and a0, a0, a3 +; RV32-NEXT: or a0, a2, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: umaxv8i8_1: +; RV64: # %bb.0: +; RV64-NEXT: ucmplt8 a2, a0, a1 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <8 x i8> + %tmp2 = bitcast i64 %b to <8 x i8> + %cmp = icmp ult <8 x i8> %tmp1, %tmp2 + %select = select <8x i1> %cmp, <8 x i8> %tmp2, <8 x i8> %tmp1 + %res = bitcast <8 x i8> %select to i64 + ret i64 %res +} + +define i64 @umaxv8i8_2(i64 %a, i64 %b) nounwind { +; RV32-LABEL: umaxv8i8_2: +; RV32: # %bb.0: +; RV32-NEXT: ucmple8 a4, a0, a2 +; RV32-NEXT: ucmple8 a5, a1, a3 +; RV32-NEXT: and a3, a3, a5 +; RV32-NEXT: addi a6, zero, -1 +; RV32-NEXT: xor a5, a5, a6 +; RV32-NEXT: and a1, a1, a5 +; RV32-NEXT: or a1, a3, a1 +; RV32-NEXT: and a2, a2, a4 +; RV32-NEXT: xor a3, a4, a6 +; RV32-NEXT: and a0, a0, a3 +; RV32-NEXT: or a0, a2, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: umaxv8i8_2: +; RV64: # %bb.0: +; RV64-NEXT: ucmple8 a2, a0, a1 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <8 x i8> + %tmp2 = bitcast i64 %b to <8 x i8> + %cmp = icmp ule <8 x i8> %tmp1, %tmp2 + %select = select <8 x i1> %cmp, <8 x i8> %tmp2, <8 x i8> %tmp1 + %res = bitcast <8 x i8> %select to i64 + ret i64 %res +} + +define i64 @umaxv8i8_3(i64 %a, i64 %b) nounwind { +; RV32-LABEL: umaxv8i8_3: +; RV32: # %bb.0: +; RV32-NEXT: ucmplt8 a4, a2, a0 +; RV32-NEXT: ucmplt8 a5, a3, a1 +; RV32-NEXT: and a1, a1, a5 +; RV32-NEXT: addi a6, zero, -1 +; RV32-NEXT: xor a5, a5, a6 +; RV32-NEXT: and a3, a3, a5 +; RV32-NEXT: or a1, a1, a3 +; RV32-NEXT: and a0, a0, a4 +; RV32-NEXT: xor a3, a4, a6 +; RV32-NEXT: and a2, a2, a3 +; RV32-NEXT: or a0, a0, a2 +; RV32-NEXT: ret +; +; RV64-LABEL: umaxv8i8_3: +; RV64: # %bb.0: +; RV64-NEXT: ucmplt8 a2, a1, a0 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <8 x i8> + %tmp2 = bitcast i64 %b to <8 x i8> + %cmp = icmp ugt <8 x i8> %tmp1, %tmp2 + %select = select <8 x i1> %cmp, <8 x i8> %tmp1, <8 x i8> %tmp2 + %res = bitcast <8 x i8> %select to i64 + ret i64 %res +} + +define i64 @umaxv8i8_4(i64 %a, i64 %b) nounwind { +; RV32-LABEL: umaxv8i8_4: +; RV32: # %bb.0: +; RV32-NEXT: ucmple8 a4, a2, a0 +; RV32-NEXT: ucmple8 a5, a3, a1 +; RV32-NEXT: and a1, a1, a5 +; RV32-NEXT: addi a6, zero, -1 +; RV32-NEXT: xor a5, a5, a6 +; RV32-NEXT: and a3, a3, a5 +; RV32-NEXT: or a1, a1, a3 +; RV32-NEXT: and a0, a0, a4 +; RV32-NEXT: xor a3, a4, a6 +; RV32-NEXT: and a2, a2, a3 +; RV32-NEXT: or a0, a0, a2 +; RV32-NEXT: ret +; +; RV64-LABEL: umaxv8i8_4: +; RV64: # %bb.0: +; RV64-NEXT: ucmple8 a2, a1, a0 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <8 x i8> + %tmp2 = bitcast i64 %b to <8 x i8> + %cmp = icmp uge <8 x i8> %tmp1, %tmp2 + %select = select <8 x i1> %cmp, <8 x i8> %tmp1, <8 x i8> %tmp2 + %res = bitcast <8 x i8> %select to i64 + ret i64 %res +} + +; umin8 + +define i32 @uminv4i8_1(i32 %a, i32 %b) nounwind { +; RV32-LABEL: uminv4i8_1: +; RV32: # %bb.0: +; RV32-NEXT: ucmplt8 a2, a0, a1 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: addi a3, zero, -1 +; RV32-NEXT: xor a2, a2, a3 +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: ret +; +; RV64-LABEL: uminv4i8_1: +; RV64: # %bb.0: +; RV64-NEXT: ucmplt8 a2, a0, a1 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <4 x i8> + %tmp2 = bitcast i32 %b to <4 x i8> + %cmp = icmp ult <4 x i8> %tmp1, %tmp2 + %select = select <4 x i1> %cmp, <4 x i8> %tmp1, <4 x i8> %tmp2 + %res = bitcast <4 x i8> %select to i32 + ret i32 %res +} + +define i32 @uminv4i8_2(i32 %a, i32 %b) nounwind { +; RV32-LABEL: uminv4i8_2: +; RV32: # %bb.0: +; RV32-NEXT: ucmple8 a2, a0, a1 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: addi a3, zero, -1 +; RV32-NEXT: xor a2, a2, a3 +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: ret +; +; RV64-LABEL: uminv4i8_2: +; RV64: # %bb.0: +; RV64-NEXT: ucmple8 a2, a0, a1 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <4 x i8> + %tmp2 = bitcast i32 %b to <4 x i8> + %cmp = icmp ule <4 x i8> %tmp1, %tmp2 + %select = select <4 x i1> %cmp, <4 x i8> %tmp1, <4 x i8> %tmp2 + %res = bitcast <4 x i8> %select to i32 + ret i32 %res +} + +define i32 @uminv4i8_3(i32 %a, i32 %b) nounwind { +; RV32-LABEL: uminv4i8_3: +; RV32: # %bb.0: +; RV32-NEXT: ucmplt8 a2, a1, a0 +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: addi a3, zero, -1 +; RV32-NEXT: xor a2, a2, a3 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: uminv4i8_3: +; RV64: # %bb.0: +; RV64-NEXT: ucmplt8 a2, a1, a0 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <4 x i8> + %tmp2 = bitcast i32 %b to <4 x i8> + %cmp = icmp ugt <4 x i8> %tmp1, %tmp2 + %select = select <4 x i1> %cmp, <4 x i8> %tmp2, <4 x i8> %tmp1 + %res = bitcast <4 x i8> %select to i32 + ret i32 %res +} + +define i32 @uminv4i8_4(i32 %a, i32 %b) nounwind { +; RV32-LABEL: uminv4i8_4: +; RV32: # %bb.0: +; RV32-NEXT: ucmple8 a2, a1, a0 +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: addi a3, zero, -1 +; RV32-NEXT: xor a2, a2, a3 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: uminv4i8_4: +; RV64: # %bb.0: +; RV64-NEXT: ucmple8 a2, a1, a0 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <4 x i8> + %tmp2 = bitcast i32 %b to <4 x i8> + %cmp = icmp uge <4 x i8> %tmp1, %tmp2 + %select = select <4 x i1> %cmp, <4 x i8> %tmp2, <4 x i8> %tmp1 + %res = bitcast <4 x i8> %select to i32 + ret i32 %res +} + +define i64 @uminv8i8_1(i64 %a, i64 %b) nounwind { +; RV32-LABEL: uminv8i8_1: +; RV32: # %bb.0: +; RV32-NEXT: ucmplt8 a4, a0, a2 +; RV32-NEXT: ucmplt8 a5, a1, a3 +; RV32-NEXT: and a1, a1, a5 +; RV32-NEXT: addi a6, zero, -1 +; RV32-NEXT: xor a5, a5, a6 +; RV32-NEXT: and a3, a3, a5 +; RV32-NEXT: or a1, a1, a3 +; RV32-NEXT: and a0, a0, a4 +; RV32-NEXT: xor a3, a4, a6 +; RV32-NEXT: and a2, a2, a3 +; RV32-NEXT: or a0, a0, a2 +; RV32-NEXT: ret +; +; RV64-LABEL: uminv8i8_1: +; RV64: # %bb.0: +; RV64-NEXT: ucmplt8 a2, a0, a1 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <8 x i8> + %tmp2 = bitcast i64 %b to <8 x i8> + %cmp = icmp ult <8 x i8> %tmp1, %tmp2 + %select = select <8x i1> %cmp, <8 x i8> %tmp1, <8 x i8> %tmp2 + %res = bitcast <8 x i8> %select to i64 + ret i64 %res +} + +define i64 @uminv8i8_2(i64 %a, i64 %b) nounwind { +; RV32-LABEL: uminv8i8_2: +; RV32: # %bb.0: +; RV32-NEXT: ucmple8 a4, a0, a2 +; RV32-NEXT: ucmple8 a5, a1, a3 +; RV32-NEXT: and a1, a1, a5 +; RV32-NEXT: addi a6, zero, -1 +; RV32-NEXT: xor a5, a5, a6 +; RV32-NEXT: and a3, a3, a5 +; RV32-NEXT: or a1, a1, a3 +; RV32-NEXT: and a0, a0, a4 +; RV32-NEXT: xor a3, a4, a6 +; RV32-NEXT: and a2, a2, a3 +; RV32-NEXT: or a0, a0, a2 +; RV32-NEXT: ret +; +; RV64-LABEL: uminv8i8_2: +; RV64: # %bb.0: +; RV64-NEXT: ucmple8 a2, a0, a1 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <8 x i8> + %tmp2 = bitcast i64 %b to <8 x i8> + %cmp = icmp ule <8 x i8> %tmp1, %tmp2 + %select = select <8 x i1> %cmp, <8 x i8> %tmp1, <8 x i8> %tmp2 + %res = bitcast <8 x i8> %select to i64 + ret i64 %res +} + +define i64 @uminv8i8_3(i64 %a, i64 %b) nounwind { +; RV32-LABEL: uminv8i8_3: +; RV32: # %bb.0: +; RV32-NEXT: ucmplt8 a4, a2, a0 +; RV32-NEXT: ucmplt8 a5, a3, a1 +; RV32-NEXT: and a3, a3, a5 +; RV32-NEXT: addi a6, zero, -1 +; RV32-NEXT: xor a5, a5, a6 +; RV32-NEXT: and a1, a1, a5 +; RV32-NEXT: or a1, a3, a1 +; RV32-NEXT: and a2, a2, a4 +; RV32-NEXT: xor a3, a4, a6 +; RV32-NEXT: and a0, a0, a3 +; RV32-NEXT: or a0, a2, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: uminv8i8_3: +; RV64: # %bb.0: +; RV64-NEXT: ucmplt8 a2, a1, a0 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <8 x i8> + %tmp2 = bitcast i64 %b to <8 x i8> + %cmp = icmp ugt <8 x i8> %tmp1, %tmp2 + %select = select <8 x i1> %cmp, <8 x i8> %tmp2, <8 x i8> %tmp1 + %res = bitcast <8 x i8> %select to i64 + ret i64 %res +} + +define i64 @uminv8i8_4(i64 %a, i64 %b) nounwind { +; RV32-LABEL: uminv8i8_4: +; RV32: # %bb.0: +; RV32-NEXT: ucmple8 a4, a2, a0 +; RV32-NEXT: ucmple8 a5, a3, a1 +; RV32-NEXT: and a3, a3, a5 +; RV32-NEXT: addi a6, zero, -1 +; RV32-NEXT: xor a5, a5, a6 +; RV32-NEXT: and a1, a1, a5 +; RV32-NEXT: or a1, a3, a1 +; RV32-NEXT: and a2, a2, a4 +; RV32-NEXT: xor a3, a4, a6 +; RV32-NEXT: and a0, a0, a3 +; RV32-NEXT: or a0, a2, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: uminv8i8_4: +; RV64: # %bb.0: +; RV64-NEXT: ucmple8 a2, a1, a0 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <8 x i8> + %tmp2 = bitcast i64 %b to <8 x i8> + %cmp = icmp uge <8 x i8> %tmp1, %tmp2 + %select = select <8 x i1> %cmp, <8 x i8> %tmp2, <8 x i8> %tmp1 + %res = bitcast <8 x i8> %select to i64 + ret i64 %res +} + +; smax16 + +define i32 @smaxv2i16_1(i32 %a, i32 %b) nounwind { +; RV32-LABEL: smaxv2i16_1: +; RV32: # %bb.0: +; RV32-NEXT: scmplt16 a2, a0, a1 +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: addi a3, zero, -1 +; RV32-NEXT: xor a2, a2, a3 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: smaxv2i16_1: +; RV64: # %bb.0: +; RV64-NEXT: scmplt16 a2, a0, a1 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <2 x i16> + %tmp2 = bitcast i32 %b to <2 x i16> + %cmp = icmp slt <2 x i16> %tmp1, %tmp2 + %select = select <2 x i1> %cmp, <2 x i16> %tmp2, <2 x i16> %tmp1 + %res = bitcast <2 x i16> %select to i32 + ret i32 %res +} + +define i32 @smaxv2i16_2(i32 %a, i32 %b) nounwind { +; RV32-LABEL: smaxv2i16_2: +; RV32: # %bb.0: +; RV32-NEXT: scmple16 a2, a0, a1 +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: addi a3, zero, -1 +; RV32-NEXT: xor a2, a2, a3 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: smaxv2i16_2: +; RV64: # %bb.0: +; RV64-NEXT: scmple16 a2, a0, a1 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <2 x i16> + %tmp2 = bitcast i32 %b to <2 x i16> + %cmp = icmp sle <2 x i16> %tmp1, %tmp2 + %select = select <2 x i1> %cmp, <2 x i16> %tmp2, <2 x i16> %tmp1 + %res = bitcast <2 x i16> %select to i32 + ret i32 %res +} + +define i32 @smaxv2i16_3(i32 %a, i32 %b) nounwind { +; RV32-LABEL: smaxv2i16_3: +; RV32: # %bb.0: +; RV32-NEXT: scmplt16 a2, a1, a0 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: addi a3, zero, -1 +; RV32-NEXT: xor a2, a2, a3 +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: ret +; +; RV64-LABEL: smaxv2i16_3: +; RV64: # %bb.0: +; RV64-NEXT: scmplt16 a2, a1, a0 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <2 x i16> + %tmp2 = bitcast i32 %b to <2 x i16> + %cmp = icmp sgt <2 x i16> %tmp1, %tmp2 + %select = select <2 x i1> %cmp, <2 x i16> %tmp1, <2 x i16> %tmp2 + %res = bitcast <2 x i16> %select to i32 + ret i32 %res +} + +define i32 @smaxv2i16_4(i32 %a, i32 %b) nounwind { +; RV32-LABEL: smaxv2i16_4: +; RV32: # %bb.0: +; RV32-NEXT: scmple16 a2, a1, a0 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: addi a3, zero, -1 +; RV32-NEXT: xor a2, a2, a3 +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: ret +; +; RV64-LABEL: smaxv2i16_4: +; RV64: # %bb.0: +; RV64-NEXT: scmple16 a2, a1, a0 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <2 x i16> + %tmp2 = bitcast i32 %b to <2 x i16> + %cmp = icmp sge <2 x i16> %tmp1, %tmp2 + %select = select <2 x i1> %cmp, <2 x i16> %tmp1, <2 x i16> %tmp2 + %res = bitcast <2 x i16> %select to i32 + ret i32 %res +} + +define i64 @smaxv4i16_1(i64 %a, i64 %b) nounwind { +; RV32-LABEL: smaxv4i16_1: +; RV32: # %bb.0: +; RV32-NEXT: scmplt16 a4, a0, a2 +; RV32-NEXT: scmplt16 a5, a1, a3 +; RV32-NEXT: and a3, a3, a5 +; RV32-NEXT: addi a6, zero, -1 +; RV32-NEXT: xor a5, a5, a6 +; RV32-NEXT: and a1, a1, a5 +; RV32-NEXT: or a1, a3, a1 +; RV32-NEXT: and a2, a2, a4 +; RV32-NEXT: xor a3, a4, a6 +; RV32-NEXT: and a0, a0, a3 +; RV32-NEXT: or a0, a2, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: smaxv4i16_1: +; RV64: # %bb.0: +; RV64-NEXT: scmplt16 a2, a0, a1 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <4 x i16> + %tmp2 = bitcast i64 %b to <4 x i16> + %cmp = icmp slt <4 x i16> %tmp1, %tmp2 + %select = select <4 x i1> %cmp, <4 x i16> %tmp2, <4 x i16> %tmp1 + %res = bitcast <4 x i16> %select to i64 + ret i64 %res +} + +define i64 @smaxv4i16_2(i64 %a, i64 %b) nounwind { +; RV32-LABEL: smaxv4i16_2: +; RV32: # %bb.0: +; RV32-NEXT: scmple16 a4, a0, a2 +; RV32-NEXT: scmple16 a5, a1, a3 +; RV32-NEXT: and a3, a3, a5 +; RV32-NEXT: addi a6, zero, -1 +; RV32-NEXT: xor a5, a5, a6 +; RV32-NEXT: and a1, a1, a5 +; RV32-NEXT: or a1, a3, a1 +; RV32-NEXT: and a2, a2, a4 +; RV32-NEXT: xor a3, a4, a6 +; RV32-NEXT: and a0, a0, a3 +; RV32-NEXT: or a0, a2, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: smaxv4i16_2: +; RV64: # %bb.0: +; RV64-NEXT: scmple16 a2, a0, a1 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <4 x i16> + %tmp2 = bitcast i64 %b to <4 x i16> + %cmp = icmp sle <4 x i16> %tmp1, %tmp2 + %select = select <4 x i1> %cmp, <4 x i16> %tmp2, <4 x i16> %tmp1 + %res = bitcast <4 x i16> %select to i64 + ret i64 %res +} + +define i64 @smaxv4i16_3(i64 %a, i64 %b) nounwind { +; RV32-LABEL: smaxv4i16_3: +; RV32: # %bb.0: +; RV32-NEXT: scmplt16 a4, a2, a0 +; RV32-NEXT: scmplt16 a5, a3, a1 +; RV32-NEXT: and a1, a1, a5 +; RV32-NEXT: addi a6, zero, -1 +; RV32-NEXT: xor a5, a5, a6 +; RV32-NEXT: and a3, a3, a5 +; RV32-NEXT: or a1, a1, a3 +; RV32-NEXT: and a0, a0, a4 +; RV32-NEXT: xor a3, a4, a6 +; RV32-NEXT: and a2, a2, a3 +; RV32-NEXT: or a0, a0, a2 +; RV32-NEXT: ret +; +; RV64-LABEL: smaxv4i16_3: +; RV64: # %bb.0: +; RV64-NEXT: scmplt16 a2, a1, a0 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <4 x i16> + %tmp2 = bitcast i64 %b to <4 x i16> + %cmp = icmp sgt <4 x i16> %tmp1, %tmp2 + %select = select <4 x i1> %cmp, <4 x i16> %tmp1, <4 x i16> %tmp2 + %res = bitcast <4 x i16> %select to i64 + ret i64 %res +} + +define i64 @smaxv4i16_4(i64 %a, i64 %b) nounwind { +; RV32-LABEL: smaxv4i16_4: +; RV32: # %bb.0: +; RV32-NEXT: scmple16 a4, a2, a0 +; RV32-NEXT: scmple16 a5, a3, a1 +; RV32-NEXT: and a1, a1, a5 +; RV32-NEXT: addi a6, zero, -1 +; RV32-NEXT: xor a5, a5, a6 +; RV32-NEXT: and a3, a3, a5 +; RV32-NEXT: or a1, a1, a3 +; RV32-NEXT: and a0, a0, a4 +; RV32-NEXT: xor a3, a4, a6 +; RV32-NEXT: and a2, a2, a3 +; RV32-NEXT: or a0, a0, a2 +; RV32-NEXT: ret +; +; RV64-LABEL: smaxv4i16_4: +; RV64: # %bb.0: +; RV64-NEXT: scmple16 a2, a1, a0 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <4 x i16> + %tmp2 = bitcast i64 %b to <4 x i16> + %cmp = icmp sge <4 x i16> %tmp1, %tmp2 + %select = select <4 x i1> %cmp, <4 x i16> %tmp1, <4 x i16> %tmp2 + %res = bitcast <4 x i16> %select to i64 + ret i64 %res +} + +; smin16 + +define i32 @sminv2i16_1(i32 %a, i32 %b) nounwind { +; RV32-LABEL: sminv2i16_1: +; RV32: # %bb.0: +; RV32-NEXT: scmplt16 a2, a0, a1 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: addi a3, zero, -1 +; RV32-NEXT: xor a2, a2, a3 +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: ret +; +; RV64-LABEL: sminv2i16_1: +; RV64: # %bb.0: +; RV64-NEXT: scmplt16 a2, a0, a1 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <2 x i16> + %tmp2 = bitcast i32 %b to <2 x i16> + %cmp = icmp slt <2 x i16> %tmp1, %tmp2 + %select = select <2 x i1> %cmp, <2 x i16> %tmp1, <2 x i16> %tmp2 + %res = bitcast <2 x i16> %select to i32 + ret i32 %res +} + +define i32 @sminv2i16_2(i32 %a, i32 %b) nounwind { +; RV32-LABEL: sminv2i16_2: +; RV32: # %bb.0: +; RV32-NEXT: scmple16 a2, a0, a1 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: addi a3, zero, -1 +; RV32-NEXT: xor a2, a2, a3 +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: ret +; +; RV64-LABEL: sminv2i16_2: +; RV64: # %bb.0: +; RV64-NEXT: scmple16 a2, a0, a1 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <2 x i16> + %tmp2 = bitcast i32 %b to <2 x i16> + %cmp = icmp sle <2 x i16> %tmp1, %tmp2 + %select = select <2 x i1> %cmp, <2 x i16> %tmp1, <2 x i16> %tmp2 + %res = bitcast <2 x i16> %select to i32 + ret i32 %res +} + +define i32 @sminv2i16_3(i32 %a, i32 %b) nounwind { +; RV32-LABEL: sminv2i16_3: +; RV32: # %bb.0: +; RV32-NEXT: scmplt16 a2, a1, a0 +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: addi a3, zero, -1 +; RV32-NEXT: xor a2, a2, a3 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: sminv2i16_3: +; RV64: # %bb.0: +; RV64-NEXT: scmplt16 a2, a1, a0 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <2 x i16> + %tmp2 = bitcast i32 %b to <2 x i16> + %cmp = icmp sgt <2 x i16> %tmp1, %tmp2 + %select = select <2 x i1> %cmp, <2 x i16> %tmp2, <2 x i16> %tmp1 + %res = bitcast <2 x i16> %select to i32 + ret i32 %res +} + +define i32 @sminv2i16_4(i32 %a, i32 %b) nounwind { +; RV32-LABEL: sminv2i16_4: +; RV32: # %bb.0: +; RV32-NEXT: scmple16 a2, a1, a0 +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: addi a3, zero, -1 +; RV32-NEXT: xor a2, a2, a3 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: sminv2i16_4: +; RV64: # %bb.0: +; RV64-NEXT: scmple16 a2, a1, a0 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <2 x i16> + %tmp2 = bitcast i32 %b to <2 x i16> + %cmp = icmp sge <2 x i16> %tmp1, %tmp2 + %select = select <2 x i1> %cmp, <2 x i16> %tmp2, <2 x i16> %tmp1 + %res = bitcast <2 x i16> %select to i32 + ret i32 %res +} + +define i64 @sminv4i16_1(i64 %a, i64 %b) nounwind { +; RV32-LABEL: sminv4i16_1: +; RV32: # %bb.0: +; RV32-NEXT: scmplt16 a4, a0, a2 +; RV32-NEXT: scmplt16 a5, a1, a3 +; RV32-NEXT: and a1, a1, a5 +; RV32-NEXT: addi a6, zero, -1 +; RV32-NEXT: xor a5, a5, a6 +; RV32-NEXT: and a3, a3, a5 +; RV32-NEXT: or a1, a1, a3 +; RV32-NEXT: and a0, a0, a4 +; RV32-NEXT: xor a3, a4, a6 +; RV32-NEXT: and a2, a2, a3 +; RV32-NEXT: or a0, a0, a2 +; RV32-NEXT: ret +; +; RV64-LABEL: sminv4i16_1: +; RV64: # %bb.0: +; RV64-NEXT: scmplt16 a2, a0, a1 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <4 x i16> + %tmp2 = bitcast i64 %b to <4 x i16> + %cmp = icmp slt <4 x i16> %tmp1, %tmp2 + %select = select <4 x i1> %cmp, <4 x i16> %tmp1, <4 x i16> %tmp2 + %res = bitcast <4 x i16> %select to i64 + ret i64 %res +} + +define i64 @sminv4i16_2(i64 %a, i64 %b) nounwind { +; RV32-LABEL: sminv4i16_2: +; RV32: # %bb.0: +; RV32-NEXT: scmple16 a4, a0, a2 +; RV32-NEXT: scmple16 a5, a1, a3 +; RV32-NEXT: and a1, a1, a5 +; RV32-NEXT: addi a6, zero, -1 +; RV32-NEXT: xor a5, a5, a6 +; RV32-NEXT: and a3, a3, a5 +; RV32-NEXT: or a1, a1, a3 +; RV32-NEXT: and a0, a0, a4 +; RV32-NEXT: xor a3, a4, a6 +; RV32-NEXT: and a2, a2, a3 +; RV32-NEXT: or a0, a0, a2 +; RV32-NEXT: ret +; +; RV64-LABEL: sminv4i16_2: +; RV64: # %bb.0: +; RV64-NEXT: scmple16 a2, a0, a1 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <4 x i16> + %tmp2 = bitcast i64 %b to <4 x i16> + %cmp = icmp sle <4 x i16> %tmp1, %tmp2 + %select = select <4 x i1> %cmp, <4 x i16> %tmp1, <4 x i16> %tmp2 + %res = bitcast <4 x i16> %select to i64 + ret i64 %res +} + +define i64 @sminv4i16_3(i64 %a, i64 %b) nounwind { +; RV32-LABEL: sminv4i16_3: +; RV32: # %bb.0: +; RV32-NEXT: scmplt16 a4, a2, a0 +; RV32-NEXT: scmplt16 a5, a3, a1 +; RV32-NEXT: and a3, a3, a5 +; RV32-NEXT: addi a6, zero, -1 +; RV32-NEXT: xor a5, a5, a6 +; RV32-NEXT: and a1, a1, a5 +; RV32-NEXT: or a1, a3, a1 +; RV32-NEXT: and a2, a2, a4 +; RV32-NEXT: xor a3, a4, a6 +; RV32-NEXT: and a0, a0, a3 +; RV32-NEXT: or a0, a2, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: sminv4i16_3: +; RV64: # %bb.0: +; RV64-NEXT: scmplt16 a2, a1, a0 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <4 x i16> + %tmp2 = bitcast i64 %b to <4 x i16> + %cmp = icmp sgt <4 x i16> %tmp1, %tmp2 + %select = select <4 x i1> %cmp, <4 x i16> %tmp2, <4 x i16> %tmp1 + %res = bitcast <4 x i16> %select to i64 + ret i64 %res +} + +define i64 @sminv4i16_4(i64 %a, i64 %b) nounwind { +; RV32-LABEL: sminv4i16_4: +; RV32: # %bb.0: +; RV32-NEXT: scmple16 a4, a2, a0 +; RV32-NEXT: scmple16 a5, a3, a1 +; RV32-NEXT: and a3, a3, a5 +; RV32-NEXT: addi a6, zero, -1 +; RV32-NEXT: xor a5, a5, a6 +; RV32-NEXT: and a1, a1, a5 +; RV32-NEXT: or a1, a3, a1 +; RV32-NEXT: and a2, a2, a4 +; RV32-NEXT: xor a3, a4, a6 +; RV32-NEXT: and a0, a0, a3 +; RV32-NEXT: or a0, a2, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: sminv4i16_4: +; RV64: # %bb.0: +; RV64-NEXT: scmple16 a2, a1, a0 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <4 x i16> + %tmp2 = bitcast i64 %b to <4 x i16> + %cmp = icmp sge <4 x i16> %tmp1, %tmp2 + %select = select <4 x i1> %cmp, <4 x i16> %tmp2, <4 x i16> %tmp1 + %res = bitcast <4 x i16> %select to i64 + ret i64 %res +} + +; umax16 + +define i32 @umaxv2i16_1(i32 %a, i32 %b) nounwind { +; RV32-LABEL: umaxv2i16_1: +; RV32: # %bb.0: +; RV32-NEXT: ucmplt16 a2, a0, a1 +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: addi a3, zero, -1 +; RV32-NEXT: xor a2, a2, a3 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: umaxv2i16_1: +; RV64: # %bb.0: +; RV64-NEXT: ucmplt16 a2, a0, a1 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <2 x i16> + %tmp2 = bitcast i32 %b to <2 x i16> + %cmp = icmp ult <2 x i16> %tmp1, %tmp2 + %select = select <2 x i1> %cmp, <2 x i16> %tmp2, <2 x i16> %tmp1 + %res = bitcast <2 x i16> %select to i32 + ret i32 %res +} + +define i32 @umaxv2i16_2(i32 %a, i32 %b) nounwind { +; RV32-LABEL: umaxv2i16_2: +; RV32: # %bb.0: +; RV32-NEXT: ucmple16 a2, a0, a1 +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: addi a3, zero, -1 +; RV32-NEXT: xor a2, a2, a3 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: umaxv2i16_2: +; RV64: # %bb.0: +; RV64-NEXT: ucmple16 a2, a0, a1 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <2 x i16> + %tmp2 = bitcast i32 %b to <2 x i16> + %cmp = icmp ule <2 x i16> %tmp1, %tmp2 + %select = select <2 x i1> %cmp, <2 x i16> %tmp2, <2 x i16> %tmp1 + %res = bitcast <2 x i16> %select to i32 + ret i32 %res +} + +define i32 @umaxv2i16_3(i32 %a, i32 %b) nounwind { +; RV32-LABEL: umaxv2i16_3: +; RV32: # %bb.0: +; RV32-NEXT: ucmplt16 a2, a1, a0 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: addi a3, zero, -1 +; RV32-NEXT: xor a2, a2, a3 +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: ret +; +; RV64-LABEL: umaxv2i16_3: +; RV64: # %bb.0: +; RV64-NEXT: ucmplt16 a2, a1, a0 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <2 x i16> + %tmp2 = bitcast i32 %b to <2 x i16> + %cmp = icmp ugt <2 x i16> %tmp1, %tmp2 + %select = select <2 x i1> %cmp, <2 x i16> %tmp1, <2 x i16> %tmp2 + %res = bitcast <2 x i16> %select to i32 + ret i32 %res +} + +define i32 @umaxv2i16_4(i32 %a, i32 %b) nounwind { +; RV32-LABEL: umaxv2i16_4: +; RV32: # %bb.0: +; RV32-NEXT: ucmple16 a2, a1, a0 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: addi a3, zero, -1 +; RV32-NEXT: xor a2, a2, a3 +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: ret +; +; RV64-LABEL: umaxv2i16_4: +; RV64: # %bb.0: +; RV64-NEXT: ucmple16 a2, a1, a0 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <2 x i16> + %tmp2 = bitcast i32 %b to <2 x i16> + %cmp = icmp uge <2 x i16> %tmp1, %tmp2 + %select = select <2 x i1> %cmp, <2 x i16> %tmp1, <2 x i16> %tmp2 + %res = bitcast <2 x i16> %select to i32 + ret i32 %res +} + +define i64 @umaxv4i16_1(i64 %a, i64 %b) nounwind { +; RV32-LABEL: umaxv4i16_1: +; RV32: # %bb.0: +; RV32-NEXT: ucmplt16 a4, a0, a2 +; RV32-NEXT: ucmplt16 a5, a1, a3 +; RV32-NEXT: and a3, a3, a5 +; RV32-NEXT: addi a6, zero, -1 +; RV32-NEXT: xor a5, a5, a6 +; RV32-NEXT: and a1, a1, a5 +; RV32-NEXT: or a1, a3, a1 +; RV32-NEXT: and a2, a2, a4 +; RV32-NEXT: xor a3, a4, a6 +; RV32-NEXT: and a0, a0, a3 +; RV32-NEXT: or a0, a2, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: umaxv4i16_1: +; RV64: # %bb.0: +; RV64-NEXT: ucmplt16 a2, a0, a1 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <4 x i16> + %tmp2 = bitcast i64 %b to <4 x i16> + %cmp = icmp ult <4 x i16> %tmp1, %tmp2 + %select = select <4 x i1> %cmp, <4 x i16> %tmp2, <4 x i16> %tmp1 + %res = bitcast <4 x i16> %select to i64 + ret i64 %res +} + +define i64 @umaxv4i16_2(i64 %a, i64 %b) nounwind { +; RV32-LABEL: umaxv4i16_2: +; RV32: # %bb.0: +; RV32-NEXT: ucmple16 a4, a0, a2 +; RV32-NEXT: ucmple16 a5, a1, a3 +; RV32-NEXT: and a3, a3, a5 +; RV32-NEXT: addi a6, zero, -1 +; RV32-NEXT: xor a5, a5, a6 +; RV32-NEXT: and a1, a1, a5 +; RV32-NEXT: or a1, a3, a1 +; RV32-NEXT: and a2, a2, a4 +; RV32-NEXT: xor a3, a4, a6 +; RV32-NEXT: and a0, a0, a3 +; RV32-NEXT: or a0, a2, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: umaxv4i16_2: +; RV64: # %bb.0: +; RV64-NEXT: ucmple16 a2, a0, a1 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <4 x i16> + %tmp2 = bitcast i64 %b to <4 x i16> + %cmp = icmp ule <4 x i16> %tmp1, %tmp2 + %select = select <4 x i1> %cmp, <4 x i16> %tmp2, <4 x i16> %tmp1 + %res = bitcast <4 x i16> %select to i64 + ret i64 %res +} + +define i64 @umaxv4i16_3(i64 %a, i64 %b) nounwind { +; RV32-LABEL: umaxv4i16_3: +; RV32: # %bb.0: +; RV32-NEXT: ucmplt16 a4, a2, a0 +; RV32-NEXT: ucmplt16 a5, a3, a1 +; RV32-NEXT: and a1, a1, a5 +; RV32-NEXT: addi a6, zero, -1 +; RV32-NEXT: xor a5, a5, a6 +; RV32-NEXT: and a3, a3, a5 +; RV32-NEXT: or a1, a1, a3 +; RV32-NEXT: and a0, a0, a4 +; RV32-NEXT: xor a3, a4, a6 +; RV32-NEXT: and a2, a2, a3 +; RV32-NEXT: or a0, a0, a2 +; RV32-NEXT: ret +; +; RV64-LABEL: umaxv4i16_3: +; RV64: # %bb.0: +; RV64-NEXT: ucmplt16 a2, a1, a0 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <4 x i16> + %tmp2 = bitcast i64 %b to <4 x i16> + %cmp = icmp ugt <4 x i16> %tmp1, %tmp2 + %select = select <4 x i1> %cmp, <4 x i16> %tmp1, <4 x i16> %tmp2 + %res = bitcast <4 x i16> %select to i64 + ret i64 %res +} + +define i64 @umaxv4i16_4(i64 %a, i64 %b) nounwind { +; RV32-LABEL: umaxv4i16_4: +; RV32: # %bb.0: +; RV32-NEXT: ucmple16 a4, a2, a0 +; RV32-NEXT: ucmple16 a5, a3, a1 +; RV32-NEXT: and a1, a1, a5 +; RV32-NEXT: addi a6, zero, -1 +; RV32-NEXT: xor a5, a5, a6 +; RV32-NEXT: and a3, a3, a5 +; RV32-NEXT: or a1, a1, a3 +; RV32-NEXT: and a0, a0, a4 +; RV32-NEXT: xor a3, a4, a6 +; RV32-NEXT: and a2, a2, a3 +; RV32-NEXT: or a0, a0, a2 +; RV32-NEXT: ret +; +; RV64-LABEL: umaxv4i16_4: +; RV64: # %bb.0: +; RV64-NEXT: ucmple16 a2, a1, a0 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <4 x i16> + %tmp2 = bitcast i64 %b to <4 x i16> + %cmp = icmp uge <4 x i16> %tmp1, %tmp2 + %select = select <4 x i1> %cmp, <4 x i16> %tmp1, <4 x i16> %tmp2 + %res = bitcast <4 x i16> %select to i64 + ret i64 %res +} + +; umin16 + +define i32 @uminv2i16_1(i32 %a, i32 %b) nounwind { +; RV32-LABEL: uminv2i16_1: +; RV32: # %bb.0: +; RV32-NEXT: ucmplt16 a2, a0, a1 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: addi a3, zero, -1 +; RV32-NEXT: xor a2, a2, a3 +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: ret +; +; RV64-LABEL: uminv2i16_1: +; RV64: # %bb.0: +; RV64-NEXT: ucmplt16 a2, a0, a1 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <2 x i16> + %tmp2 = bitcast i32 %b to <2 x i16> + %cmp = icmp ult <2 x i16> %tmp1, %tmp2 + %select = select <2 x i1> %cmp, <2 x i16> %tmp1, <2 x i16> %tmp2 + %res = bitcast <2 x i16> %select to i32 + ret i32 %res +} + +define i32 @uminv2i16_2(i32 %a, i32 %b) nounwind { +; RV32-LABEL: uminv2i16_2: +; RV32: # %bb.0: +; RV32-NEXT: ucmple16 a2, a0, a1 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: addi a3, zero, -1 +; RV32-NEXT: xor a2, a2, a3 +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: ret +; +; RV64-LABEL: uminv2i16_2: +; RV64: # %bb.0: +; RV64-NEXT: ucmple16 a2, a0, a1 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <2 x i16> + %tmp2 = bitcast i32 %b to <2 x i16> + %cmp = icmp ule <2 x i16> %tmp1, %tmp2 + %select = select <2 x i1> %cmp, <2 x i16> %tmp1, <2 x i16> %tmp2 + %res = bitcast <2 x i16> %select to i32 + ret i32 %res +} + +define i32 @uminv2i16_3(i32 %a, i32 %b) nounwind { +; RV32-LABEL: uminv2i16_3: +; RV32: # %bb.0: +; RV32-NEXT: ucmplt16 a2, a1, a0 +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: addi a3, zero, -1 +; RV32-NEXT: xor a2, a2, a3 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: uminv2i16_3: +; RV64: # %bb.0: +; RV64-NEXT: ucmplt16 a2, a1, a0 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <2 x i16> + %tmp2 = bitcast i32 %b to <2 x i16> + %cmp = icmp ugt <2 x i16> %tmp1, %tmp2 + %select = select <2 x i1> %cmp, <2 x i16> %tmp2, <2 x i16> %tmp1 + %res = bitcast <2 x i16> %select to i32 + ret i32 %res +} + +define i32 @uminv2i16_4(i32 %a, i32 %b) nounwind { +; RV32-LABEL: uminv2i16_4: +; RV32: # %bb.0: +; RV32-NEXT: ucmple16 a2, a1, a0 +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: addi a3, zero, -1 +; RV32-NEXT: xor a2, a2, a3 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: uminv2i16_4: +; RV64: # %bb.0: +; RV64-NEXT: ucmple16 a2, a1, a0 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <2 x i16> + %tmp2 = bitcast i32 %b to <2 x i16> + %cmp = icmp uge <2 x i16> %tmp1, %tmp2 + %select = select <2 x i1> %cmp, <2 x i16> %tmp2, <2 x i16> %tmp1 + %res = bitcast <2 x i16> %select to i32 + ret i32 %res +} + +define i64 @uminv4i16_1(i64 %a, i64 %b) nounwind { +; RV32-LABEL: uminv4i16_1: +; RV32: # %bb.0: +; RV32-NEXT: ucmplt16 a4, a0, a2 +; RV32-NEXT: ucmplt16 a5, a1, a3 +; RV32-NEXT: and a1, a1, a5 +; RV32-NEXT: addi a6, zero, -1 +; RV32-NEXT: xor a5, a5, a6 +; RV32-NEXT: and a3, a3, a5 +; RV32-NEXT: or a1, a1, a3 +; RV32-NEXT: and a0, a0, a4 +; RV32-NEXT: xor a3, a4, a6 +; RV32-NEXT: and a2, a2, a3 +; RV32-NEXT: or a0, a0, a2 +; RV32-NEXT: ret +; +; RV64-LABEL: uminv4i16_1: +; RV64: # %bb.0: +; RV64-NEXT: ucmplt16 a2, a0, a1 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <4 x i16> + %tmp2 = bitcast i64 %b to <4 x i16> + %cmp = icmp ult <4 x i16> %tmp1, %tmp2 + %select = select <4 x i1> %cmp, <4 x i16> %tmp1, <4 x i16> %tmp2 + %res = bitcast <4 x i16> %select to i64 + ret i64 %res +} + +define i64 @uminv4i16_2(i64 %a, i64 %b) nounwind { +; RV32-LABEL: uminv4i16_2: +; RV32: # %bb.0: +; RV32-NEXT: ucmple16 a4, a0, a2 +; RV32-NEXT: ucmple16 a5, a1, a3 +; RV32-NEXT: and a1, a1, a5 +; RV32-NEXT: addi a6, zero, -1 +; RV32-NEXT: xor a5, a5, a6 +; RV32-NEXT: and a3, a3, a5 +; RV32-NEXT: or a1, a1, a3 +; RV32-NEXT: and a0, a0, a4 +; RV32-NEXT: xor a3, a4, a6 +; RV32-NEXT: and a2, a2, a3 +; RV32-NEXT: or a0, a0, a2 +; RV32-NEXT: ret +; +; RV64-LABEL: uminv4i16_2: +; RV64: # %bb.0: +; RV64-NEXT: ucmple16 a2, a0, a1 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <4 x i16> + %tmp2 = bitcast i64 %b to <4 x i16> + %cmp = icmp ule <4 x i16> %tmp1, %tmp2 + %select = select <4 x i1> %cmp, <4 x i16> %tmp1, <4 x i16> %tmp2 + %res = bitcast <4 x i16> %select to i64 + ret i64 %res +} + +define i64 @uminv4i16_3(i64 %a, i64 %b) nounwind { +; RV32-LABEL: uminv4i16_3: +; RV32: # %bb.0: +; RV32-NEXT: ucmplt16 a4, a2, a0 +; RV32-NEXT: ucmplt16 a5, a3, a1 +; RV32-NEXT: and a3, a3, a5 +; RV32-NEXT: addi a6, zero, -1 +; RV32-NEXT: xor a5, a5, a6 +; RV32-NEXT: and a1, a1, a5 +; RV32-NEXT: or a1, a3, a1 +; RV32-NEXT: and a2, a2, a4 +; RV32-NEXT: xor a3, a4, a6 +; RV32-NEXT: and a0, a0, a3 +; RV32-NEXT: or a0, a2, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: uminv4i16_3: +; RV64: # %bb.0: +; RV64-NEXT: ucmplt16 a2, a1, a0 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <4 x i16> + %tmp2 = bitcast i64 %b to <4 x i16> + %cmp = icmp ugt <4 x i16> %tmp1, %tmp2 + %select = select <4 x i1> %cmp, <4 x i16> %tmp2, <4 x i16> %tmp1 + %res = bitcast <4 x i16> %select to i64 + ret i64 %res +} + +define i64 @uminv4i16_4(i64 %a, i64 %b) nounwind { +; RV32-LABEL: uminv4i16_4: +; RV32: # %bb.0: +; RV32-NEXT: ucmple16 a4, a2, a0 +; RV32-NEXT: ucmple16 a5, a3, a1 +; RV32-NEXT: and a3, a3, a5 +; RV32-NEXT: addi a6, zero, -1 +; RV32-NEXT: xor a5, a5, a6 +; RV32-NEXT: and a1, a1, a5 +; RV32-NEXT: or a1, a3, a1 +; RV32-NEXT: and a2, a2, a4 +; RV32-NEXT: xor a3, a4, a6 +; RV32-NEXT: and a0, a0, a3 +; RV32-NEXT: or a0, a2, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: uminv4i16_4: +; RV64: # %bb.0: +; RV64-NEXT: ucmple16 a2, a1, a0 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: addi a3, zero, -1 +; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <4 x i16> + %tmp2 = bitcast i64 %b to <4 x i16> + %cmp = icmp uge <4 x i16> %tmp1, %tmp2 + %select = select <4 x i1> %cmp, <4 x i16> %tmp2, <4 x i16> %tmp1 + %res = bitcast <4 x i16> %select to i64 + ret i64 %res +}