diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,LMULMAX2,LMULMAX2-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,LMULMAX2,LMULMAX2-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,LMULMAX1,LMULMAX1-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,LMULMAX1,LMULMAX1-RV64 define void @add_v16i8(<16 x i8>* %x, <16 x i8>* %y) { ; CHECK-LABEL: add_v16i8: @@ -943,58 +943,58 @@ } define void @mulhu_v2i64(<2 x i64>* %x) { -; LMULMAX1-RV32-LABEL: mulhu_v2i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle64.v v25, (a0) -; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI55_0) -; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI55_0) -; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v26, (a1) -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmulhu.vv v25, v25, v26 -; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI55_1) -; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI55_1) -; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v26, (a1) -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsrl.vv v25, v25, v26 -; LMULMAX1-RV32-NEXT: vse64.v v25, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: mulhu_v2i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV64-NEXT: vle64.v v25, (a0) -; LMULMAX1-RV64-NEXT: lui a1, 1035469 -; LMULMAX1-RV64-NEXT: addiw a1, a1, -819 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, -819 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, -819 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, -819 -; LMULMAX1-RV64-NEXT: vmv.v.x v26, a1 -; LMULMAX1-RV64-NEXT: lui a1, 1026731 -; LMULMAX1-RV64-NEXT: addiw a1, a1, -1365 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, -1365 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, -1365 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, -1365 -; LMULMAX1-RV64-NEXT: vsetvli a2, zero, e64,m1,ta,mu -; LMULMAX1-RV64-NEXT: vmv.s.x v26, a1 -; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV64-NEXT: vmulhu.vv v25, v25, v26 -; LMULMAX1-RV64-NEXT: vmv.v.i v26, 2 -; LMULMAX1-RV64-NEXT: addi a1, zero, 1 -; LMULMAX1-RV64-NEXT: vsetvli a2, zero, e64,m1,ta,mu -; LMULMAX1-RV64-NEXT: vmv.s.x v26, a1 -; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV64-NEXT: vsrl.vv v25, v25, v26 -; LMULMAX1-RV64-NEXT: vse64.v v25, (a0) -; LMULMAX1-RV64-NEXT: ret +; RV32-LABEL: mulhu_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV32-NEXT: vle64.v v25, (a0) +; RV32-NEXT: lui a1, %hi(.LCPI55_0) +; RV32-NEXT: addi a1, a1, %lo(.LCPI55_0) +; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu +; RV32-NEXT: vle32.v v26, (a1) +; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV32-NEXT: vmulhu.vv v25, v25, v26 +; RV32-NEXT: lui a1, %hi(.LCPI55_1) +; RV32-NEXT: addi a1, a1, %lo(.LCPI55_1) +; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu +; RV32-NEXT: vle32.v v26, (a1) +; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV32-NEXT: vsrl.vv v25, v25, v26 +; RV32-NEXT: vse64.v v25, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: mulhu_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV64-NEXT: vle64.v v25, (a0) +; RV64-NEXT: lui a1, 1035469 +; RV64-NEXT: addiw a1, a1, -819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, -819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, -819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, -819 +; RV64-NEXT: vmv.v.x v26, a1 +; RV64-NEXT: lui a1, 1026731 +; RV64-NEXT: addiw a1, a1, -1365 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, -1365 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, -1365 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, -1365 +; RV64-NEXT: vsetvli a2, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.s.x v26, a1 +; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV64-NEXT: vmulhu.vv v25, v25, v26 +; RV64-NEXT: vmv.v.i v26, 2 +; RV64-NEXT: addi a1, zero, 1 +; RV64-NEXT: vsetvli a2, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.s.x v26, a1 +; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV64-NEXT: vsrl.vv v25, v25, v26 +; RV64-NEXT: vse64.v v25, (a0) +; RV64-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = udiv <2 x i64> %a, store <2 x i64> %b, <2 x i64>* %x @@ -1043,33 +1043,33 @@ } define void @mulhs_v4i32(<4 x i32>* %x) { -; LMULMAX1-RV32-LABEL: mulhs_v4i32: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v25, (a0) -; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI58_0) -; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI58_0) -; LMULMAX1-RV32-NEXT: vle32.v v26, (a1) -; LMULMAX1-RV32-NEXT: vmulh.vv v25, v25, v26 -; LMULMAX1-RV32-NEXT: vsrl.vi v26, v25, 31 -; LMULMAX1-RV32-NEXT: vsra.vi v25, v25, 1 -; LMULMAX1-RV32-NEXT: vadd.vv v25, v25, v26 -; LMULMAX1-RV32-NEXT: vse32.v v25, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: mulhs_v4i32: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; LMULMAX1-RV64-NEXT: vle32.v v25, (a0) -; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI58_0) -; LMULMAX1-RV64-NEXT: addi a1, a1, %lo(.LCPI58_0) -; LMULMAX1-RV64-NEXT: vle32.v v26, (a1) -; LMULMAX1-RV64-NEXT: vmulh.vv v25, v25, v26 -; LMULMAX1-RV64-NEXT: vsra.vi v25, v25, 1 -; LMULMAX1-RV64-NEXT: vsrl.vi v26, v25, 31 -; LMULMAX1-RV64-NEXT: vadd.vv v25, v25, v26 -; LMULMAX1-RV64-NEXT: vse32.v v25, (a0) -; LMULMAX1-RV64-NEXT: ret +; RV32-LABEL: mulhs_v4i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; RV32-NEXT: vle32.v v25, (a0) +; RV32-NEXT: lui a1, %hi(.LCPI58_0) +; RV32-NEXT: addi a1, a1, %lo(.LCPI58_0) +; RV32-NEXT: vle32.v v26, (a1) +; RV32-NEXT: vmulh.vv v25, v25, v26 +; RV32-NEXT: vsrl.vi v26, v25, 31 +; RV32-NEXT: vsra.vi v25, v25, 1 +; RV32-NEXT: vadd.vv v25, v25, v26 +; RV32-NEXT: vse32.v v25, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: mulhs_v4i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; RV64-NEXT: vle32.v v25, (a0) +; RV64-NEXT: lui a1, %hi(.LCPI58_0) +; RV64-NEXT: addi a1, a1, %lo(.LCPI58_0) +; RV64-NEXT: vle32.v v26, (a1) +; RV64-NEXT: vmulh.vv v25, v25, v26 +; RV64-NEXT: vsra.vi v25, v25, 1 +; RV64-NEXT: vsrl.vi v26, v25, 31 +; RV64-NEXT: vadd.vv v25, v25, v26 +; RV64-NEXT: vse32.v v25, (a0) +; RV64-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = sdiv <4 x i32> %a, store <4 x i32> %b, <4 x i32>* %x @@ -1077,76 +1077,76 @@ } define void @mulhs_v2i64(<2 x i64>* %x) { -; LMULMAX1-RV32-LABEL: mulhs_v2i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle64.v v25, (a0) -; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI59_0) -; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI59_0) -; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v26, (a1) -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmul.vv v26, v25, v26 -; LMULMAX1-RV32-NEXT: lui a1, 349525 -; LMULMAX1-RV32-NEXT: addi a2, a1, 1365 -; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.x v27, a2 -; LMULMAX1-RV32-NEXT: addi a1, a1, 1366 -; LMULMAX1-RV32-NEXT: vsetvli a2, zero, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.s.x v27, a1 -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmulh.vv v25, v25, v27 -; LMULMAX1-RV32-NEXT: vadd.vv v25, v25, v26 -; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI59_1) -; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI59_1) -; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v26, (a1) -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsrl.vv v26, v25, v26 -; LMULMAX1-RV32-NEXT: addi a1, zero, 1 -; LMULMAX1-RV32-NEXT: vsetvli a2, zero, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.s.x v27, a1 -; LMULMAX1-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.i v28, 0 -; LMULMAX1-RV32-NEXT: vsetivli a1, 3, e32,m1,tu,mu -; LMULMAX1-RV32-NEXT: vslideup.vi v28, v27, 2 -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsra.vv v25, v25, v28 -; LMULMAX1-RV32-NEXT: vadd.vv v25, v25, v26 -; LMULMAX1-RV32-NEXT: vse64.v v25, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: mulhs_v2i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV64-NEXT: vle64.v v25, (a0) -; LMULMAX1-RV64-NEXT: vmv.v.i v26, -1 -; LMULMAX1-RV64-NEXT: vsetvli a1, zero, e64,m1,ta,mu -; LMULMAX1-RV64-NEXT: vmv.s.x v26, zero -; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV64-NEXT: vmul.vv v26, v25, v26 -; LMULMAX1-RV64-NEXT: lui a1, 21845 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a2, a1, 1365 -; LMULMAX1-RV64-NEXT: vmv.v.x v27, a2 -; LMULMAX1-RV64-NEXT: addi a1, a1, 1366 -; LMULMAX1-RV64-NEXT: vsetvli a2, zero, e64,m1,ta,mu -; LMULMAX1-RV64-NEXT: vmv.s.x v27, a1 -; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV64-NEXT: vmulh.vv v25, v25, v27 -; LMULMAX1-RV64-NEXT: vadd.vv v25, v25, v26 -; LMULMAX1-RV64-NEXT: addi a1, zero, 63 -; LMULMAX1-RV64-NEXT: vsrl.vx v26, v25, a1 -; LMULMAX1-RV64-NEXT: vid.v v27 -; LMULMAX1-RV64-NEXT: vsra.vv v25, v25, v27 -; LMULMAX1-RV64-NEXT: vadd.vv v25, v25, v26 -; LMULMAX1-RV64-NEXT: vse64.v v25, (a0) -; LMULMAX1-RV64-NEXT: ret +; RV32-LABEL: mulhs_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV32-NEXT: vle64.v v25, (a0) +; RV32-NEXT: lui a1, %hi(.LCPI59_0) +; RV32-NEXT: addi a1, a1, %lo(.LCPI59_0) +; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu +; RV32-NEXT: vle32.v v26, (a1) +; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV32-NEXT: vmul.vv v26, v25, v26 +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a2, a1, 1365 +; RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu +; RV32-NEXT: vmv.v.x v27, a2 +; RV32-NEXT: addi a1, a1, 1366 +; RV32-NEXT: vsetvli a2, zero, e32,m1,ta,mu +; RV32-NEXT: vmv.s.x v27, a1 +; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV32-NEXT: vmulh.vv v25, v25, v27 +; RV32-NEXT: vadd.vv v25, v25, v26 +; RV32-NEXT: lui a1, %hi(.LCPI59_1) +; RV32-NEXT: addi a1, a1, %lo(.LCPI59_1) +; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu +; RV32-NEXT: vle32.v v26, (a1) +; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV32-NEXT: vsrl.vv v26, v25, v26 +; RV32-NEXT: addi a1, zero, 1 +; RV32-NEXT: vsetvli a2, zero, e32,m1,ta,mu +; RV32-NEXT: vmv.s.x v27, a1 +; RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; RV32-NEXT: vmv.v.i v28, 0 +; RV32-NEXT: vsetivli a1, 3, e32,m1,tu,mu +; RV32-NEXT: vslideup.vi v28, v27, 2 +; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV32-NEXT: vsra.vv v25, v25, v28 +; RV32-NEXT: vadd.vv v25, v25, v26 +; RV32-NEXT: vse64.v v25, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: mulhs_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV64-NEXT: vle64.v v25, (a0) +; RV64-NEXT: vmv.v.i v26, -1 +; RV64-NEXT: vsetvli a1, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.s.x v26, zero +; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV64-NEXT: vmul.vv v26, v25, v26 +; RV64-NEXT: lui a1, 21845 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 1365 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 1365 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a2, a1, 1365 +; RV64-NEXT: vmv.v.x v27, a2 +; RV64-NEXT: addi a1, a1, 1366 +; RV64-NEXT: vsetvli a2, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.s.x v27, a1 +; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV64-NEXT: vmulh.vv v25, v25, v27 +; RV64-NEXT: vadd.vv v25, v25, v26 +; RV64-NEXT: addi a1, zero, 63 +; RV64-NEXT: vsrl.vx v26, v25, a1 +; RV64-NEXT: vid.v v27 +; RV64-NEXT: vsra.vv v25, v25, v27 +; RV64-NEXT: vadd.vv v25, v25, v26 +; RV64-NEXT: vse64.v v25, (a0) +; RV64-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = sdiv <2 x i64> %a, store <2 x i64> %b, <2 x i64>* %x @@ -3841,37 +3841,21 @@ ; LMULMAX2-NEXT: vse64.v v26, (a0) ; LMULMAX2-NEXT: ret ; -; LMULMAX1-RV32-LABEL: extract_v4i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle64.v v25, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle64.v v26, (a2) -; LMULMAX1-RV32-NEXT: vle64.v v27, (a1) -; LMULMAX1-RV32-NEXT: addi a1, a1, 16 -; LMULMAX1-RV32-NEXT: vle64.v v28, (a1) -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vadd.vv v26, v26, v28 -; LMULMAX1-RV32-NEXT: vadd.vv v25, v25, v27 -; LMULMAX1-RV32-NEXT: vse64.v v25, (a0) -; LMULMAX1-RV32-NEXT: vse64.v v26, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: extract_v4i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV64-NEXT: vle64.v v25, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v26, (a2) -; LMULMAX1-RV64-NEXT: vle64.v v27, (a1) -; LMULMAX1-RV64-NEXT: addi a1, a1, 16 -; LMULMAX1-RV64-NEXT: vle64.v v28, (a1) -; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV64-NEXT: vadd.vv v26, v26, v28 -; LMULMAX1-RV64-NEXT: vadd.vv v25, v25, v27 -; LMULMAX1-RV64-NEXT: vse64.v v25, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v26, (a2) -; LMULMAX1-RV64-NEXT: ret +; LMULMAX1-LABEL: extract_v4i64: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a2, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vle64.v v25, (a0) +; LMULMAX1-NEXT: addi a2, a0, 16 +; LMULMAX1-NEXT: vle64.v v26, (a2) +; LMULMAX1-NEXT: vle64.v v27, (a1) +; LMULMAX1-NEXT: addi a1, a1, 16 +; LMULMAX1-NEXT: vle64.v v28, (a1) +; LMULMAX1-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vadd.vv v26, v26, v28 +; LMULMAX1-NEXT: vadd.vv v25, v25, v27 +; LMULMAX1-NEXT: vse64.v v25, (a0) +; LMULMAX1-NEXT: vse64.v v26, (a2) +; LMULMAX1-NEXT: ret %a = load <4 x i64>, <4 x i64>* %x %b = load <4 x i64>, <4 x i64>* %y br label %"compute" @@ -3908,35 +3892,20 @@ ; LMULMAX2-NEXT: vse8.v v26, (a0) ; LMULMAX2-NEXT: ret ; -; LMULMAX1-RV32-LABEL: mulhu_v32i8: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli a1, 16, e8,m1,ta,mu -; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vle8.v v25, (a1) -; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI129_0) -; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI129_0) -; LMULMAX1-RV32-NEXT: vle8.v v26, (a2) -; LMULMAX1-RV32-NEXT: vle8.v v27, (a0) -; LMULMAX1-RV32-NEXT: vdivu.vv v25, v25, v26 -; LMULMAX1-RV32-NEXT: vdivu.vv v26, v27, v26 -; LMULMAX1-RV32-NEXT: vse8.v v26, (a0) -; LMULMAX1-RV32-NEXT: vse8.v v25, (a1) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: mulhu_v32i8: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli a1, 16, e8,m1,ta,mu -; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vle8.v v25, (a1) -; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI129_0) -; LMULMAX1-RV64-NEXT: addi a2, a2, %lo(.LCPI129_0) -; LMULMAX1-RV64-NEXT: vle8.v v26, (a2) -; LMULMAX1-RV64-NEXT: vle8.v v27, (a0) -; LMULMAX1-RV64-NEXT: vdivu.vv v25, v25, v26 -; LMULMAX1-RV64-NEXT: vdivu.vv v26, v27, v26 -; LMULMAX1-RV64-NEXT: vse8.v v26, (a0) -; LMULMAX1-RV64-NEXT: vse8.v v25, (a1) -; LMULMAX1-RV64-NEXT: ret +; LMULMAX1-LABEL: mulhu_v32i8: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a1, 16, e8,m1,ta,mu +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vle8.v v25, (a1) +; LMULMAX1-NEXT: lui a2, %hi(.LCPI129_0) +; LMULMAX1-NEXT: addi a2, a2, %lo(.LCPI129_0) +; LMULMAX1-NEXT: vle8.v v26, (a2) +; LMULMAX1-NEXT: vle8.v v27, (a0) +; LMULMAX1-NEXT: vdivu.vv v25, v25, v26 +; LMULMAX1-NEXT: vdivu.vv v26, v27, v26 +; LMULMAX1-NEXT: vse8.v v26, (a0) +; LMULMAX1-NEXT: vse8.v v25, (a1) +; LMULMAX1-NEXT: ret %a = load <32 x i8>, <32 x i8>* %x %b = udiv <32 x i8> %a, store <32 x i8> %b, <32 x i8>* %x @@ -3969,35 +3938,20 @@ ; LMULMAX2-NEXT: vse16.v v26, (a0) ; LMULMAX2-NEXT: ret ; -; LMULMAX1-RV32-LABEL: mulhu_v16i16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli a1, 8, e16,m1,ta,mu -; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vle16.v v25, (a1) -; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI130_0) -; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI130_0) -; LMULMAX1-RV32-NEXT: vle16.v v26, (a2) -; LMULMAX1-RV32-NEXT: vle16.v v27, (a0) -; LMULMAX1-RV32-NEXT: vdivu.vv v25, v25, v26 -; LMULMAX1-RV32-NEXT: vdivu.vv v26, v27, v26 -; LMULMAX1-RV32-NEXT: vse16.v v26, (a0) -; LMULMAX1-RV32-NEXT: vse16.v v25, (a1) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: mulhu_v16i16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli a1, 8, e16,m1,ta,mu -; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vle16.v v25, (a1) -; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI130_0) -; LMULMAX1-RV64-NEXT: addi a2, a2, %lo(.LCPI130_0) -; LMULMAX1-RV64-NEXT: vle16.v v26, (a2) -; LMULMAX1-RV64-NEXT: vle16.v v27, (a0) -; LMULMAX1-RV64-NEXT: vdivu.vv v25, v25, v26 -; LMULMAX1-RV64-NEXT: vdivu.vv v26, v27, v26 -; LMULMAX1-RV64-NEXT: vse16.v v26, (a0) -; LMULMAX1-RV64-NEXT: vse16.v v25, (a1) -; LMULMAX1-RV64-NEXT: ret +; LMULMAX1-LABEL: mulhu_v16i16: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vle16.v v25, (a1) +; LMULMAX1-NEXT: lui a2, %hi(.LCPI130_0) +; LMULMAX1-NEXT: addi a2, a2, %lo(.LCPI130_0) +; LMULMAX1-NEXT: vle16.v v26, (a2) +; LMULMAX1-NEXT: vle16.v v27, (a0) +; LMULMAX1-NEXT: vdivu.vv v25, v25, v26 +; LMULMAX1-NEXT: vdivu.vv v26, v27, v26 +; LMULMAX1-NEXT: vse16.v v26, (a0) +; LMULMAX1-NEXT: vse16.v v25, (a1) +; LMULMAX1-NEXT: ret %a = load <16 x i16>, <16 x i16>* %x %b = udiv <16 x i16> %a, store <16 x i16> %b, <16 x i16>* %x @@ -4086,6 +4040,63 @@ } define void @mulhu_v4i64(<4 x i64>* %x) { +; LMULMAX2-RV32-LABEL: mulhu_v4i64: +; LMULMAX2-RV32: # %bb.0: +; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; LMULMAX2-RV32-NEXT: vle64.v v26, (a0) +; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI132_0) +; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI132_0) +; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu +; LMULMAX2-RV32-NEXT: vle32.v v28, (a1) +; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; LMULMAX2-RV32-NEXT: vmulhu.vv v28, v26, v28 +; LMULMAX2-RV32-NEXT: vsub.vv v26, v26, v28 +; LMULMAX2-RV32-NEXT: lui a1, 524288 +; LMULMAX2-RV32-NEXT: vsetvli a2, zero, e32,m2,ta,mu +; LMULMAX2-RV32-NEXT: vmv.s.x v30, a1 +; LMULMAX2-RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu +; LMULMAX2-RV32-NEXT: vmv.v.i v8, 0 +; LMULMAX2-RV32-NEXT: vsetivli a1, 6, e32,m2,tu,mu +; LMULMAX2-RV32-NEXT: vslideup.vi v8, v30, 5 +; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; LMULMAX2-RV32-NEXT: vmulhu.vv v26, v26, v8 +; LMULMAX2-RV32-NEXT: vadd.vv v26, v26, v28 +; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI132_1) +; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI132_1) +; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu +; LMULMAX2-RV32-NEXT: vle32.v v28, (a1) +; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; LMULMAX2-RV32-NEXT: vsrl.vv v26, v26, v28 +; LMULMAX2-RV32-NEXT: vse64.v v26, (a0) +; LMULMAX2-RV32-NEXT: ret +; +; LMULMAX2-RV64-LABEL: mulhu_v4i64: +; LMULMAX2-RV64: # %bb.0: +; LMULMAX2-RV64-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; LMULMAX2-RV64-NEXT: vle64.v v26, (a0) +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI132_0) +; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI132_0) +; LMULMAX2-RV64-NEXT: vle64.v v28, (a1) +; LMULMAX2-RV64-NEXT: vmulhu.vv v28, v26, v28 +; LMULMAX2-RV64-NEXT: vsub.vv v26, v26, v28 +; LMULMAX2-RV64-NEXT: addi a1, zero, -1 +; LMULMAX2-RV64-NEXT: slli a1, a1, 63 +; LMULMAX2-RV64-NEXT: vsetvli a2, zero, e64,m2,ta,mu +; LMULMAX2-RV64-NEXT: vmv.s.x v30, a1 +; LMULMAX2-RV64-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; LMULMAX2-RV64-NEXT: vmv.v.i v8, 0 +; LMULMAX2-RV64-NEXT: vsetivli a1, 3, e64,m2,tu,mu +; LMULMAX2-RV64-NEXT: vslideup.vi v8, v30, 2 +; LMULMAX2-RV64-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI132_1) +; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI132_1) +; LMULMAX2-RV64-NEXT: vle64.v v30, (a1) +; LMULMAX2-RV64-NEXT: vmulhu.vv v26, v26, v8 +; LMULMAX2-RV64-NEXT: vadd.vv v26, v26, v28 +; LMULMAX2-RV64-NEXT: vsrl.vv v26, v26, v30 +; LMULMAX2-RV64-NEXT: vse64.v v26, (a0) +; LMULMAX2-RV64-NEXT: ret +; ; LMULMAX1-RV32-LABEL: mulhu_v4i64: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu @@ -4203,35 +4214,20 @@ ; LMULMAX2-NEXT: vse8.v v26, (a0) ; LMULMAX2-NEXT: ret ; -; LMULMAX1-RV32-LABEL: mulhs_v32i8: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli a1, 16, e8,m1,ta,mu -; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vle8.v v25, (a1) -; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI133_0) -; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI133_0) -; LMULMAX1-RV32-NEXT: vle8.v v26, (a2) -; LMULMAX1-RV32-NEXT: vle8.v v27, (a0) -; LMULMAX1-RV32-NEXT: vdivu.vv v25, v25, v26 -; LMULMAX1-RV32-NEXT: vdivu.vv v26, v27, v26 -; LMULMAX1-RV32-NEXT: vse8.v v26, (a0) -; LMULMAX1-RV32-NEXT: vse8.v v25, (a1) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: mulhs_v32i8: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli a1, 16, e8,m1,ta,mu -; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vle8.v v25, (a1) -; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI133_0) -; LMULMAX1-RV64-NEXT: addi a2, a2, %lo(.LCPI133_0) -; LMULMAX1-RV64-NEXT: vle8.v v26, (a2) -; LMULMAX1-RV64-NEXT: vle8.v v27, (a0) -; LMULMAX1-RV64-NEXT: vdivu.vv v25, v25, v26 -; LMULMAX1-RV64-NEXT: vdivu.vv v26, v27, v26 -; LMULMAX1-RV64-NEXT: vse8.v v26, (a0) -; LMULMAX1-RV64-NEXT: vse8.v v25, (a1) -; LMULMAX1-RV64-NEXT: ret +; LMULMAX1-LABEL: mulhs_v32i8: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a1, 16, e8,m1,ta,mu +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vle8.v v25, (a1) +; LMULMAX1-NEXT: lui a2, %hi(.LCPI133_0) +; LMULMAX1-NEXT: addi a2, a2, %lo(.LCPI133_0) +; LMULMAX1-NEXT: vle8.v v26, (a2) +; LMULMAX1-NEXT: vle8.v v27, (a0) +; LMULMAX1-NEXT: vdivu.vv v25, v25, v26 +; LMULMAX1-NEXT: vdivu.vv v26, v27, v26 +; LMULMAX1-NEXT: vse8.v v26, (a0) +; LMULMAX1-NEXT: vse8.v v25, (a1) +; LMULMAX1-NEXT: ret %a = load <32 x i8>, <32 x i8>* %x %b = udiv <32 x i8> %a, store <32 x i8> %b, <32 x i8>* %x @@ -4253,35 +4249,20 @@ ; LMULMAX2-NEXT: vse16.v v26, (a0) ; LMULMAX2-NEXT: ret ; -; LMULMAX1-RV32-LABEL: mulhs_v16i16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli a1, 8, e16,m1,ta,mu -; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vle16.v v25, (a1) -; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI134_0) -; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI134_0) -; LMULMAX1-RV32-NEXT: vle16.v v26, (a2) -; LMULMAX1-RV32-NEXT: vle16.v v27, (a0) -; LMULMAX1-RV32-NEXT: vdiv.vv v25, v25, v26 -; LMULMAX1-RV32-NEXT: vdiv.vv v26, v27, v26 -; LMULMAX1-RV32-NEXT: vse16.v v26, (a0) -; LMULMAX1-RV32-NEXT: vse16.v v25, (a1) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: mulhs_v16i16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli a1, 8, e16,m1,ta,mu -; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vle16.v v25, (a1) -; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI134_0) -; LMULMAX1-RV64-NEXT: addi a2, a2, %lo(.LCPI134_0) -; LMULMAX1-RV64-NEXT: vle16.v v26, (a2) -; LMULMAX1-RV64-NEXT: vle16.v v27, (a0) -; LMULMAX1-RV64-NEXT: vdiv.vv v25, v25, v26 -; LMULMAX1-RV64-NEXT: vdiv.vv v26, v27, v26 -; LMULMAX1-RV64-NEXT: vse16.v v26, (a0) -; LMULMAX1-RV64-NEXT: vse16.v v25, (a1) -; LMULMAX1-RV64-NEXT: ret +; LMULMAX1-LABEL: mulhs_v16i16: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vle16.v v25, (a1) +; LMULMAX1-NEXT: lui a2, %hi(.LCPI134_0) +; LMULMAX1-NEXT: addi a2, a2, %lo(.LCPI134_0) +; LMULMAX1-NEXT: vle16.v v26, (a2) +; LMULMAX1-NEXT: vle16.v v27, (a0) +; LMULMAX1-NEXT: vdiv.vv v25, v25, v26 +; LMULMAX1-NEXT: vdiv.vv v26, v27, v26 +; LMULMAX1-NEXT: vse16.v v26, (a0) +; LMULMAX1-NEXT: vse16.v v25, (a1) +; LMULMAX1-NEXT: ret %a = load <16 x i16>, <16 x i16>* %x %b = sdiv <16 x i16> %a, store <16 x i16> %b, <16 x i16>* %x @@ -4289,6 +4270,34 @@ } define void @mulhs_v8i32(<8 x i32>* %x) { +; LMULMAX2-RV32-LABEL: mulhs_v8i32: +; LMULMAX2-RV32: # %bb.0: +; LMULMAX2-RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu +; LMULMAX2-RV32-NEXT: vle32.v v26, (a0) +; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI135_0) +; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI135_0) +; LMULMAX2-RV32-NEXT: vle32.v v28, (a1) +; LMULMAX2-RV32-NEXT: vmulh.vv v26, v26, v28 +; LMULMAX2-RV32-NEXT: vsrl.vi v28, v26, 31 +; LMULMAX2-RV32-NEXT: vsra.vi v26, v26, 1 +; LMULMAX2-RV32-NEXT: vadd.vv v26, v26, v28 +; LMULMAX2-RV32-NEXT: vse32.v v26, (a0) +; LMULMAX2-RV32-NEXT: ret +; +; LMULMAX2-RV64-LABEL: mulhs_v8i32: +; LMULMAX2-RV64: # %bb.0: +; LMULMAX2-RV64-NEXT: vsetivli a1, 8, e32,m2,ta,mu +; LMULMAX2-RV64-NEXT: vle32.v v26, (a0) +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI135_0) +; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI135_0) +; LMULMAX2-RV64-NEXT: vle32.v v28, (a1) +; LMULMAX2-RV64-NEXT: vmulh.vv v26, v26, v28 +; LMULMAX2-RV64-NEXT: vsra.vi v26, v26, 1 +; LMULMAX2-RV64-NEXT: vsrl.vi v28, v26, 31 +; LMULMAX2-RV64-NEXT: vadd.vv v26, v26, v28 +; LMULMAX2-RV64-NEXT: vse32.v v26, (a0) +; LMULMAX2-RV64-NEXT: ret +; ; LMULMAX1-RV32-LABEL: mulhs_v8i32: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu @@ -4331,6 +4340,62 @@ } define void @mulhs_v4i64(<4 x i64>* %x) { +; LMULMAX2-RV32-LABEL: mulhs_v4i64: +; LMULMAX2-RV32: # %bb.0: +; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; LMULMAX2-RV32-NEXT: vle64.v v26, (a0) +; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI136_0) +; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI136_0) +; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu +; LMULMAX2-RV32-NEXT: vle32.v v28, (a1) +; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; LMULMAX2-RV32-NEXT: vmul.vv v28, v26, v28 +; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI136_1) +; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI136_1) +; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu +; LMULMAX2-RV32-NEXT: vle32.v v30, (a1) +; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; LMULMAX2-RV32-NEXT: vmulh.vv v26, v26, v30 +; LMULMAX2-RV32-NEXT: vadd.vv v26, v26, v28 +; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI136_2) +; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI136_2) +; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu +; LMULMAX2-RV32-NEXT: vle32.v v28, (a1) +; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; LMULMAX2-RV32-NEXT: vsrl.vv v28, v26, v28 +; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI136_3) +; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI136_3) +; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu +; LMULMAX2-RV32-NEXT: vle32.v v30, (a1) +; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; LMULMAX2-RV32-NEXT: vsra.vv v26, v26, v30 +; LMULMAX2-RV32-NEXT: vadd.vv v26, v26, v28 +; LMULMAX2-RV32-NEXT: vse64.v v26, (a0) +; LMULMAX2-RV32-NEXT: ret +; +; LMULMAX2-RV64-LABEL: mulhs_v4i64: +; LMULMAX2-RV64: # %bb.0: +; LMULMAX2-RV64-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; LMULMAX2-RV64-NEXT: vle64.v v26, (a0) +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI136_0) +; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI136_0) +; LMULMAX2-RV64-NEXT: vle64.v v28, (a1) +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI136_1) +; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI136_1) +; LMULMAX2-RV64-NEXT: vle64.v v30, (a1) +; LMULMAX2-RV64-NEXT: vmul.vv v28, v26, v28 +; LMULMAX2-RV64-NEXT: vmulh.vv v26, v26, v30 +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI136_2) +; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI136_2) +; LMULMAX2-RV64-NEXT: vle64.v v30, (a1) +; LMULMAX2-RV64-NEXT: vadd.vv v26, v26, v28 +; LMULMAX2-RV64-NEXT: addi a1, zero, 63 +; LMULMAX2-RV64-NEXT: vsrl.vx v28, v26, a1 +; LMULMAX2-RV64-NEXT: vsra.vv v26, v26, v30 +; LMULMAX2-RV64-NEXT: vadd.vv v26, v26, v28 +; LMULMAX2-RV64-NEXT: vse64.v v26, (a0) +; LMULMAX2-RV64-NEXT: ret +; ; LMULMAX1-RV32-LABEL: mulhs_v4i64: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu @@ -5199,24 +5264,24 @@ } define void @add_vi_v2i64(<2 x i64>* %x) { -; LMULMAX1-RV32-LABEL: add_vi_v2i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle64.v v25, (a0) -; LMULMAX1-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.i v26, -1 -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vadd.vv v25, v25, v26 -; LMULMAX1-RV32-NEXT: vse64.v v25, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: add_vi_v2i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV64-NEXT: vle64.v v25, (a0) -; LMULMAX1-RV64-NEXT: vadd.vi v25, v25, -1 -; LMULMAX1-RV64-NEXT: vse64.v v25, (a0) -; LMULMAX1-RV64-NEXT: ret +; RV32-LABEL: add_vi_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV32-NEXT: vle64.v v25, (a0) +; RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; RV32-NEXT: vmv.v.i v26, -1 +; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV32-NEXT: vadd.vv v25, v25, v26 +; RV32-NEXT: vse64.v v25, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: add_vi_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV64-NEXT: vle64.v v25, (a0) +; RV64-NEXT: vadd.vi v25, v25, -1 +; RV64-NEXT: vse64.v v25, (a0) +; RV64-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = insertelement <2 x i64> undef, i64 -1, i32 0 %c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer @@ -5274,26 +5339,26 @@ } define void @add_iv_v2i64(<2 x i64>* %x) { -; LMULMAX1-RV32-LABEL: add_iv_v2i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle64.v v25, (a0) -; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI160_0) -; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI160_0) -; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v26, (a1) -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vadd.vv v25, v25, v26 -; LMULMAX1-RV32-NEXT: vse64.v v25, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: add_iv_v2i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV64-NEXT: vle64.v v25, (a0) -; LMULMAX1-RV64-NEXT: vadd.vi v25, v25, 1 -; LMULMAX1-RV64-NEXT: vse64.v v25, (a0) -; LMULMAX1-RV64-NEXT: ret +; RV32-LABEL: add_iv_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV32-NEXT: vle64.v v25, (a0) +; RV32-NEXT: lui a1, %hi(.LCPI160_0) +; RV32-NEXT: addi a1, a1, %lo(.LCPI160_0) +; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu +; RV32-NEXT: vle32.v v26, (a1) +; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV32-NEXT: vadd.vv v25, v25, v26 +; RV32-NEXT: vse64.v v25, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: add_iv_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV64-NEXT: vle64.v v25, (a0) +; RV64-NEXT: vadd.vi v25, v25, 1 +; RV64-NEXT: vse64.v v25, (a0) +; RV64-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = insertelement <2 x i64> undef, i64 1, i32 0 %c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer @@ -5450,25 +5515,25 @@ } define void @sub_vi_v2i64(<2 x i64>* %x) { -; LMULMAX1-RV32-LABEL: sub_vi_v2i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle64.v v25, (a0) -; LMULMAX1-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.i v26, -1 -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsub.vv v25, v25, v26 -; LMULMAX1-RV32-NEXT: vse64.v v25, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: sub_vi_v2i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV64-NEXT: vle64.v v25, (a0) -; LMULMAX1-RV64-NEXT: addi a1, zero, -1 -; LMULMAX1-RV64-NEXT: vsub.vx v25, v25, a1 -; LMULMAX1-RV64-NEXT: vse64.v v25, (a0) -; LMULMAX1-RV64-NEXT: ret +; RV32-LABEL: sub_vi_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV32-NEXT: vle64.v v25, (a0) +; RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; RV32-NEXT: vmv.v.i v26, -1 +; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV32-NEXT: vsub.vv v25, v25, v26 +; RV32-NEXT: vse64.v v25, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: sub_vi_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV64-NEXT: vle64.v v25, (a0) +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: vsub.vx v25, v25, a1 +; RV64-NEXT: vse64.v v25, (a0) +; RV64-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = insertelement <2 x i64> undef, i64 -1, i32 0 %c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer @@ -5526,26 +5591,26 @@ } define void @sub_iv_v2i64(<2 x i64>* %x) { -; LMULMAX1-RV32-LABEL: sub_iv_v2i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle64.v v25, (a0) -; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI174_0) -; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI174_0) -; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v26, (a1) -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsub.vv v25, v26, v25 -; LMULMAX1-RV32-NEXT: vse64.v v25, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: sub_iv_v2i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV64-NEXT: vle64.v v25, (a0) -; LMULMAX1-RV64-NEXT: vrsub.vi v25, v25, 1 -; LMULMAX1-RV64-NEXT: vse64.v v25, (a0) -; LMULMAX1-RV64-NEXT: ret +; RV32-LABEL: sub_iv_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV32-NEXT: vle64.v v25, (a0) +; RV32-NEXT: lui a1, %hi(.LCPI174_0) +; RV32-NEXT: addi a1, a1, %lo(.LCPI174_0) +; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu +; RV32-NEXT: vle32.v v26, (a1) +; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV32-NEXT: vsub.vv v25, v26, v25 +; RV32-NEXT: vse64.v v25, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: sub_iv_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV64-NEXT: vle64.v v25, (a0) +; RV64-NEXT: vrsub.vi v25, v25, 1 +; RV64-NEXT: vse64.v v25, (a0) +; RV64-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = insertelement <2 x i64> undef, i64 1, i32 0 %c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer @@ -5795,26 +5860,26 @@ } define void @and_vi_v2i64(<2 x i64>* %x) { -; LMULMAX1-RV32-LABEL: and_vi_v2i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle64.v v25, (a0) -; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI190_0) -; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI190_0) -; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v26, (a1) -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v25, v25, v26 -; LMULMAX1-RV32-NEXT: vse64.v v25, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: and_vi_v2i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV64-NEXT: vle64.v v25, (a0) -; LMULMAX1-RV64-NEXT: vand.vi v25, v25, -2 -; LMULMAX1-RV64-NEXT: vse64.v v25, (a0) -; LMULMAX1-RV64-NEXT: ret +; RV32-LABEL: and_vi_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV32-NEXT: vle64.v v25, (a0) +; RV32-NEXT: lui a1, %hi(.LCPI190_0) +; RV32-NEXT: addi a1, a1, %lo(.LCPI190_0) +; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu +; RV32-NEXT: vle32.v v26, (a1) +; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV32-NEXT: vand.vv v25, v25, v26 +; RV32-NEXT: vse64.v v25, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: and_vi_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV64-NEXT: vle64.v v25, (a0) +; RV64-NEXT: vand.vi v25, v25, -2 +; RV64-NEXT: vse64.v v25, (a0) +; RV64-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = insertelement <2 x i64> undef, i64 -2, i32 0 %c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer @@ -5872,26 +5937,26 @@ } define void @and_iv_v2i64(<2 x i64>* %x) { -; LMULMAX1-RV32-LABEL: and_iv_v2i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle64.v v25, (a0) -; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI194_0) -; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI194_0) -; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v26, (a1) -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v25, v25, v26 -; LMULMAX1-RV32-NEXT: vse64.v v25, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: and_iv_v2i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV64-NEXT: vle64.v v25, (a0) -; LMULMAX1-RV64-NEXT: vand.vi v25, v25, 1 -; LMULMAX1-RV64-NEXT: vse64.v v25, (a0) -; LMULMAX1-RV64-NEXT: ret +; RV32-LABEL: and_iv_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV32-NEXT: vle64.v v25, (a0) +; RV32-NEXT: lui a1, %hi(.LCPI194_0) +; RV32-NEXT: addi a1, a1, %lo(.LCPI194_0) +; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu +; RV32-NEXT: vle32.v v26, (a1) +; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV32-NEXT: vand.vv v25, v25, v26 +; RV32-NEXT: vse64.v v25, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: and_iv_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV64-NEXT: vle64.v v25, (a0) +; RV64-NEXT: vand.vi v25, v25, 1 +; RV64-NEXT: vse64.v v25, (a0) +; RV64-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = insertelement <2 x i64> undef, i64 1, i32 0 %c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer @@ -6045,26 +6110,26 @@ } define void @or_vi_v2i64(<2 x i64>* %x) { -; LMULMAX1-RV32-LABEL: or_vi_v2i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle64.v v25, (a0) -; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI204_0) -; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI204_0) -; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v26, (a1) -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vor.vv v25, v25, v26 -; LMULMAX1-RV32-NEXT: vse64.v v25, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: or_vi_v2i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV64-NEXT: vle64.v v25, (a0) -; LMULMAX1-RV64-NEXT: vor.vi v25, v25, -2 -; LMULMAX1-RV64-NEXT: vse64.v v25, (a0) -; LMULMAX1-RV64-NEXT: ret +; RV32-LABEL: or_vi_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV32-NEXT: vle64.v v25, (a0) +; RV32-NEXT: lui a1, %hi(.LCPI204_0) +; RV32-NEXT: addi a1, a1, %lo(.LCPI204_0) +; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu +; RV32-NEXT: vle32.v v26, (a1) +; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV32-NEXT: vor.vv v25, v25, v26 +; RV32-NEXT: vse64.v v25, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: or_vi_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV64-NEXT: vle64.v v25, (a0) +; RV64-NEXT: vor.vi v25, v25, -2 +; RV64-NEXT: vse64.v v25, (a0) +; RV64-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = insertelement <2 x i64> undef, i64 -2, i32 0 %c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer @@ -6122,26 +6187,26 @@ } define void @or_iv_v2i64(<2 x i64>* %x) { -; LMULMAX1-RV32-LABEL: or_iv_v2i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle64.v v25, (a0) -; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI208_0) -; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI208_0) -; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v26, (a1) -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vor.vv v25, v25, v26 -; LMULMAX1-RV32-NEXT: vse64.v v25, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: or_iv_v2i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV64-NEXT: vle64.v v25, (a0) -; LMULMAX1-RV64-NEXT: vor.vi v25, v25, 1 -; LMULMAX1-RV64-NEXT: vse64.v v25, (a0) -; LMULMAX1-RV64-NEXT: ret +; RV32-LABEL: or_iv_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV32-NEXT: vle64.v v25, (a0) +; RV32-NEXT: lui a1, %hi(.LCPI208_0) +; RV32-NEXT: addi a1, a1, %lo(.LCPI208_0) +; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu +; RV32-NEXT: vle32.v v26, (a1) +; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV32-NEXT: vor.vv v25, v25, v26 +; RV32-NEXT: vse64.v v25, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: or_iv_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV64-NEXT: vle64.v v25, (a0) +; RV64-NEXT: vor.vi v25, v25, 1 +; RV64-NEXT: vse64.v v25, (a0) +; RV64-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = insertelement <2 x i64> undef, i64 1, i32 0 %c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer @@ -6295,24 +6360,24 @@ } define void @xor_vi_v2i64(<2 x i64>* %x) { -; LMULMAX1-RV32-LABEL: xor_vi_v2i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle64.v v25, (a0) -; LMULMAX1-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.i v26, -1 -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vxor.vv v25, v25, v26 -; LMULMAX1-RV32-NEXT: vse64.v v25, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: xor_vi_v2i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV64-NEXT: vle64.v v25, (a0) -; LMULMAX1-RV64-NEXT: vxor.vi v25, v25, -1 -; LMULMAX1-RV64-NEXT: vse64.v v25, (a0) -; LMULMAX1-RV64-NEXT: ret +; RV32-LABEL: xor_vi_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV32-NEXT: vle64.v v25, (a0) +; RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; RV32-NEXT: vmv.v.i v26, -1 +; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV32-NEXT: vxor.vv v25, v25, v26 +; RV32-NEXT: vse64.v v25, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: xor_vi_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV64-NEXT: vle64.v v25, (a0) +; RV64-NEXT: vxor.vi v25, v25, -1 +; RV64-NEXT: vse64.v v25, (a0) +; RV64-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = insertelement <2 x i64> undef, i64 -1, i32 0 %c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer @@ -6370,26 +6435,26 @@ } define void @xor_iv_v2i64(<2 x i64>* %x) { -; LMULMAX1-RV32-LABEL: xor_iv_v2i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle64.v v25, (a0) -; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI222_0) -; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI222_0) -; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v26, (a1) -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vxor.vv v25, v25, v26 -; LMULMAX1-RV32-NEXT: vse64.v v25, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: xor_iv_v2i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV64-NEXT: vle64.v v25, (a0) -; LMULMAX1-RV64-NEXT: vxor.vi v25, v25, 1 -; LMULMAX1-RV64-NEXT: vse64.v v25, (a0) -; LMULMAX1-RV64-NEXT: ret +; RV32-LABEL: xor_iv_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV32-NEXT: vle64.v v25, (a0) +; RV32-NEXT: lui a1, %hi(.LCPI222_0) +; RV32-NEXT: addi a1, a1, %lo(.LCPI222_0) +; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu +; RV32-NEXT: vle32.v v26, (a1) +; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV32-NEXT: vxor.vv v25, v25, v26 +; RV32-NEXT: vse64.v v25, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: xor_iv_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV64-NEXT: vle64.v v25, (a0) +; RV64-NEXT: vxor.vi v25, v25, 1 +; RV64-NEXT: vse64.v v25, (a0) +; RV64-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = insertelement <2 x i64> undef, i64 1, i32 0 %c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer @@ -6543,26 +6608,26 @@ } define void @lshr_vi_v2i64(<2 x i64>* %x) { -; LMULMAX1-RV32-LABEL: lshr_vi_v2i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle64.v v25, (a0) -; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI232_0) -; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI232_0) -; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v26, (a1) -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsrl.vv v25, v25, v26 -; LMULMAX1-RV32-NEXT: vse64.v v25, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: lshr_vi_v2i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV64-NEXT: vle64.v v25, (a0) -; LMULMAX1-RV64-NEXT: vsrl.vi v25, v25, 31 -; LMULMAX1-RV64-NEXT: vse64.v v25, (a0) -; LMULMAX1-RV64-NEXT: ret +; RV32-LABEL: lshr_vi_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV32-NEXT: vle64.v v25, (a0) +; RV32-NEXT: lui a1, %hi(.LCPI232_0) +; RV32-NEXT: addi a1, a1, %lo(.LCPI232_0) +; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu +; RV32-NEXT: vle32.v v26, (a1) +; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV32-NEXT: vsrl.vv v25, v25, v26 +; RV32-NEXT: vse64.v v25, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: lshr_vi_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV64-NEXT: vle64.v v25, (a0) +; RV64-NEXT: vsrl.vi v25, v25, 31 +; RV64-NEXT: vse64.v v25, (a0) +; RV64-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = insertelement <2 x i64> undef, i64 31, i32 0 %c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer @@ -6668,26 +6733,26 @@ } define void @ashr_vi_v2i64(<2 x i64>* %x) { -; LMULMAX1-RV32-LABEL: ashr_vi_v2i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle64.v v25, (a0) -; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI239_0) -; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI239_0) -; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v26, (a1) -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsra.vv v25, v25, v26 -; LMULMAX1-RV32-NEXT: vse64.v v25, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: ashr_vi_v2i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV64-NEXT: vle64.v v25, (a0) -; LMULMAX1-RV64-NEXT: vsra.vi v25, v25, 31 -; LMULMAX1-RV64-NEXT: vse64.v v25, (a0) -; LMULMAX1-RV64-NEXT: ret +; RV32-LABEL: ashr_vi_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV32-NEXT: vle64.v v25, (a0) +; RV32-NEXT: lui a1, %hi(.LCPI239_0) +; RV32-NEXT: addi a1, a1, %lo(.LCPI239_0) +; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu +; RV32-NEXT: vle32.v v26, (a1) +; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV32-NEXT: vsra.vv v25, v25, v26 +; RV32-NEXT: vse64.v v25, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: ashr_vi_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV64-NEXT: vle64.v v25, (a0) +; RV64-NEXT: vsra.vi v25, v25, 31 +; RV64-NEXT: vse64.v v25, (a0) +; RV64-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = insertelement <2 x i64> undef, i64 31, i32 0 %c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer @@ -6793,26 +6858,26 @@ } define void @shl_vi_v2i64(<2 x i64>* %x) { -; LMULMAX1-RV32-LABEL: shl_vi_v2i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle64.v v25, (a0) -; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI246_0) -; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI246_0) -; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v26, (a1) -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsll.vv v25, v25, v26 -; LMULMAX1-RV32-NEXT: vse64.v v25, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: shl_vi_v2i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV64-NEXT: vle64.v v25, (a0) -; LMULMAX1-RV64-NEXT: vsll.vi v25, v25, 31 -; LMULMAX1-RV64-NEXT: vse64.v v25, (a0) -; LMULMAX1-RV64-NEXT: ret +; RV32-LABEL: shl_vi_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV32-NEXT: vle64.v v25, (a0) +; RV32-NEXT: lui a1, %hi(.LCPI246_0) +; RV32-NEXT: addi a1, a1, %lo(.LCPI246_0) +; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu +; RV32-NEXT: vle32.v v26, (a1) +; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV32-NEXT: vsll.vv v25, v25, v26 +; RV32-NEXT: vse64.v v25, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: shl_vi_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV64-NEXT: vle64.v v25, (a0) +; RV64-NEXT: vsll.vi v25, v25, 31 +; RV64-NEXT: vse64.v v25, (a0) +; RV64-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = insertelement <2 x i64> undef, i64 31, i32 0 %c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer @@ -7078,33 +7143,33 @@ } define void @mulhu_vx_v8i16(<8 x i16>* %x) { -; LMULMAX1-RV32-LABEL: mulhu_vx_v8i16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli a1, 8, e16,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle16.v v25, (a0) -; LMULMAX1-RV32-NEXT: lui a1, 2 -; LMULMAX1-RV32-NEXT: addi a1, a1, 1171 -; LMULMAX1-RV32-NEXT: vmulhu.vx v26, v25, a1 -; LMULMAX1-RV32-NEXT: vsub.vv v25, v25, v26 -; LMULMAX1-RV32-NEXT: vsrl.vi v25, v25, 1 -; LMULMAX1-RV32-NEXT: vadd.vv v25, v25, v26 -; LMULMAX1-RV32-NEXT: vsrl.vi v25, v25, 2 -; LMULMAX1-RV32-NEXT: vse16.v v25, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: mulhu_vx_v8i16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli a1, 8, e16,m1,ta,mu -; LMULMAX1-RV64-NEXT: vle16.v v25, (a0) -; LMULMAX1-RV64-NEXT: lui a1, 2 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 1171 -; LMULMAX1-RV64-NEXT: vmulhu.vx v26, v25, a1 -; LMULMAX1-RV64-NEXT: vsub.vv v25, v25, v26 -; LMULMAX1-RV64-NEXT: vsrl.vi v25, v25, 1 -; LMULMAX1-RV64-NEXT: vadd.vv v25, v25, v26 -; LMULMAX1-RV64-NEXT: vsrl.vi v25, v25, 2 -; LMULMAX1-RV64-NEXT: vse16.v v25, (a0) -; LMULMAX1-RV64-NEXT: ret +; RV32-LABEL: mulhu_vx_v8i16: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; RV32-NEXT: vle16.v v25, (a0) +; RV32-NEXT: lui a1, 2 +; RV32-NEXT: addi a1, a1, 1171 +; RV32-NEXT: vmulhu.vx v26, v25, a1 +; RV32-NEXT: vsub.vv v25, v25, v26 +; RV32-NEXT: vsrl.vi v25, v25, 1 +; RV32-NEXT: vadd.vv v25, v25, v26 +; RV32-NEXT: vsrl.vi v25, v25, 2 +; RV32-NEXT: vse16.v v25, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: mulhu_vx_v8i16: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; RV64-NEXT: vle16.v v25, (a0) +; RV64-NEXT: lui a1, 2 +; RV64-NEXT: addiw a1, a1, 1171 +; RV64-NEXT: vmulhu.vx v26, v25, a1 +; RV64-NEXT: vsub.vv v25, v25, v26 +; RV64-NEXT: vsrl.vi v25, v25, 1 +; RV64-NEXT: vadd.vv v25, v25, v26 +; RV64-NEXT: vsrl.vi v25, v25, 2 +; RV64-NEXT: vse16.v v25, (a0) +; RV64-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = udiv <8 x i16> %a, store <8 x i16> %b, <8 x i16>* %x @@ -7112,27 +7177,27 @@ } define void @mulhu_vx_v4i32(<4 x i32>* %x) { -; LMULMAX1-RV32-LABEL: mulhu_vx_v4i32: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v25, (a0) -; LMULMAX1-RV32-NEXT: lui a1, 838861 -; LMULMAX1-RV32-NEXT: addi a1, a1, -819 -; LMULMAX1-RV32-NEXT: vmulhu.vx v25, v25, a1 -; LMULMAX1-RV32-NEXT: vsrl.vi v25, v25, 2 -; LMULMAX1-RV32-NEXT: vse32.v v25, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: mulhu_vx_v4i32: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; LMULMAX1-RV64-NEXT: vle32.v v25, (a0) -; LMULMAX1-RV64-NEXT: lui a1, 838861 -; LMULMAX1-RV64-NEXT: addiw a1, a1, -819 -; LMULMAX1-RV64-NEXT: vmulhu.vx v25, v25, a1 -; LMULMAX1-RV64-NEXT: vsrl.vi v25, v25, 2 -; LMULMAX1-RV64-NEXT: vse32.v v25, (a0) -; LMULMAX1-RV64-NEXT: ret +; RV32-LABEL: mulhu_vx_v4i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; RV32-NEXT: vle32.v v25, (a0) +; RV32-NEXT: lui a1, 838861 +; RV32-NEXT: addi a1, a1, -819 +; RV32-NEXT: vmulhu.vx v25, v25, a1 +; RV32-NEXT: vsrl.vi v25, v25, 2 +; RV32-NEXT: vse32.v v25, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: mulhu_vx_v4i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; RV64-NEXT: vle32.v v25, (a0) +; RV64-NEXT: lui a1, 838861 +; RV64-NEXT: addiw a1, a1, -819 +; RV64-NEXT: vmulhu.vx v25, v25, a1 +; RV64-NEXT: vsrl.vi v25, v25, 2 +; RV64-NEXT: vse32.v v25, (a0) +; RV64-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = udiv <4 x i32> %a, store <4 x i32> %b, <4 x i32>* %x @@ -7140,41 +7205,41 @@ } define void @mulhu_vx_v2i64(<2 x i64>* %x) { -; LMULMAX1-RV32-LABEL: mulhu_vx_v2i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle64.v v25, (a0) -; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI265_0) -; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI265_0) -; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v26, (a1) -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmulhu.vv v25, v25, v26 -; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI265_1) -; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI265_1) -; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v26, (a1) -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsrl.vv v25, v25, v26 -; LMULMAX1-RV32-NEXT: vse64.v v25, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: mulhu_vx_v2i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV64-NEXT: vle64.v v25, (a0) -; LMULMAX1-RV64-NEXT: lui a1, 1026731 -; LMULMAX1-RV64-NEXT: addiw a1, a1, -1365 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, -1365 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, -1365 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, -1365 -; LMULMAX1-RV64-NEXT: vmulhu.vx v25, v25, a1 -; LMULMAX1-RV64-NEXT: vsrl.vi v25, v25, 1 -; LMULMAX1-RV64-NEXT: vse64.v v25, (a0) -; LMULMAX1-RV64-NEXT: ret +; RV32-LABEL: mulhu_vx_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV32-NEXT: vle64.v v25, (a0) +; RV32-NEXT: lui a1, %hi(.LCPI265_0) +; RV32-NEXT: addi a1, a1, %lo(.LCPI265_0) +; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu +; RV32-NEXT: vle32.v v26, (a1) +; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV32-NEXT: vmulhu.vv v25, v25, v26 +; RV32-NEXT: lui a1, %hi(.LCPI265_1) +; RV32-NEXT: addi a1, a1, %lo(.LCPI265_1) +; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu +; RV32-NEXT: vle32.v v26, (a1) +; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV32-NEXT: vsrl.vv v25, v25, v26 +; RV32-NEXT: vse64.v v25, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: mulhu_vx_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV64-NEXT: vle64.v v25, (a0) +; RV64-NEXT: lui a1, 1026731 +; RV64-NEXT: addiw a1, a1, -1365 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, -1365 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, -1365 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, -1365 +; RV64-NEXT: vmulhu.vx v25, v25, a1 +; RV64-NEXT: vsrl.vi v25, v25, 1 +; RV64-NEXT: vse64.v v25, (a0) +; RV64-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = udiv <2 x i64> %a, store <2 x i64> %b, <2 x i64>* %x @@ -7198,31 +7263,31 @@ } define void @mulhs_vx_v8i16(<8 x i16>* %x) { -; LMULMAX1-RV32-LABEL: mulhs_vx_v8i16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli a1, 8, e16,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle16.v v25, (a0) -; LMULMAX1-RV32-NEXT: lui a1, 5 -; LMULMAX1-RV32-NEXT: addi a1, a1, -1755 -; LMULMAX1-RV32-NEXT: vmulh.vx v25, v25, a1 -; LMULMAX1-RV32-NEXT: vsra.vi v25, v25, 1 -; LMULMAX1-RV32-NEXT: vsrl.vi v26, v25, 15 -; LMULMAX1-RV32-NEXT: vadd.vv v25, v25, v26 -; LMULMAX1-RV32-NEXT: vse16.v v25, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: mulhs_vx_v8i16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli a1, 8, e16,m1,ta,mu -; LMULMAX1-RV64-NEXT: vle16.v v25, (a0) -; LMULMAX1-RV64-NEXT: lui a1, 5 -; LMULMAX1-RV64-NEXT: addiw a1, a1, -1755 -; LMULMAX1-RV64-NEXT: vmulh.vx v25, v25, a1 -; LMULMAX1-RV64-NEXT: vsra.vi v25, v25, 1 -; LMULMAX1-RV64-NEXT: vsrl.vi v26, v25, 15 -; LMULMAX1-RV64-NEXT: vadd.vv v25, v25, v26 -; LMULMAX1-RV64-NEXT: vse16.v v25, (a0) -; LMULMAX1-RV64-NEXT: ret +; RV32-LABEL: mulhs_vx_v8i16: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; RV32-NEXT: vle16.v v25, (a0) +; RV32-NEXT: lui a1, 5 +; RV32-NEXT: addi a1, a1, -1755 +; RV32-NEXT: vmulh.vx v25, v25, a1 +; RV32-NEXT: vsra.vi v25, v25, 1 +; RV32-NEXT: vsrl.vi v26, v25, 15 +; RV32-NEXT: vadd.vv v25, v25, v26 +; RV32-NEXT: vse16.v v25, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: mulhs_vx_v8i16: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; RV64-NEXT: vle16.v v25, (a0) +; RV64-NEXT: lui a1, 5 +; RV64-NEXT: addiw a1, a1, -1755 +; RV64-NEXT: vmulh.vx v25, v25, a1 +; RV64-NEXT: vsra.vi v25, v25, 1 +; RV64-NEXT: vsrl.vi v26, v25, 15 +; RV64-NEXT: vadd.vv v25, v25, v26 +; RV64-NEXT: vse16.v v25, (a0) +; RV64-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = sdiv <8 x i16> %a, store <8 x i16> %b, <8 x i16>* %x @@ -7230,31 +7295,31 @@ } define void @mulhs_vx_v4i32(<4 x i32>* %x) { -; LMULMAX1-RV32-LABEL: mulhs_vx_v4i32: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v25, (a0) -; LMULMAX1-RV32-NEXT: lui a1, 629146 -; LMULMAX1-RV32-NEXT: addi a1, a1, -1639 -; LMULMAX1-RV32-NEXT: vmulh.vx v25, v25, a1 -; LMULMAX1-RV32-NEXT: vsrl.vi v26, v25, 31 -; LMULMAX1-RV32-NEXT: vsra.vi v25, v25, 1 -; LMULMAX1-RV32-NEXT: vadd.vv v25, v25, v26 -; LMULMAX1-RV32-NEXT: vse32.v v25, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: mulhs_vx_v4i32: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; LMULMAX1-RV64-NEXT: vle32.v v25, (a0) -; LMULMAX1-RV64-NEXT: lui a1, 629146 -; LMULMAX1-RV64-NEXT: addiw a1, a1, -1639 -; LMULMAX1-RV64-NEXT: vmulh.vx v25, v25, a1 -; LMULMAX1-RV64-NEXT: vsra.vi v25, v25, 1 -; LMULMAX1-RV64-NEXT: vsrl.vi v26, v25, 31 -; LMULMAX1-RV64-NEXT: vadd.vv v25, v25, v26 -; LMULMAX1-RV64-NEXT: vse32.v v25, (a0) -; LMULMAX1-RV64-NEXT: ret +; RV32-LABEL: mulhs_vx_v4i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; RV32-NEXT: vle32.v v25, (a0) +; RV32-NEXT: lui a1, 629146 +; RV32-NEXT: addi a1, a1, -1639 +; RV32-NEXT: vmulh.vx v25, v25, a1 +; RV32-NEXT: vsrl.vi v26, v25, 31 +; RV32-NEXT: vsra.vi v25, v25, 1 +; RV32-NEXT: vadd.vv v25, v25, v26 +; RV32-NEXT: vse32.v v25, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: mulhs_vx_v4i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; RV64-NEXT: vle32.v v25, (a0) +; RV64-NEXT: lui a1, 629146 +; RV64-NEXT: addiw a1, a1, -1639 +; RV64-NEXT: vmulh.vx v25, v25, a1 +; RV64-NEXT: vsra.vi v25, v25, 1 +; RV64-NEXT: vsrl.vi v26, v25, 31 +; RV64-NEXT: vadd.vv v25, v25, v26 +; RV64-NEXT: vse32.v v25, (a0) +; RV64-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = sdiv <4 x i32> %a, store <4 x i32> %b, <4 x i32>* %x @@ -7262,44 +7327,44 @@ } define void @mulhs_vx_v2i64(<2 x i64>* %x) { -; LMULMAX1-RV32-LABEL: mulhs_vx_v2i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle64.v v25, (a0) -; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI269_0) -; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI269_0) -; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v26, (a1) -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmulh.vv v25, v25, v26 -; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI269_1) -; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI269_1) -; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v26, (a1) -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsrl.vv v26, v25, v26 -; LMULMAX1-RV32-NEXT: vadd.vv v25, v25, v26 -; LMULMAX1-RV32-NEXT: vse64.v v25, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: mulhs_vx_v2i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV64-NEXT: vle64.v v25, (a0) -; LMULMAX1-RV64-NEXT: lui a1, 21845 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 1366 -; LMULMAX1-RV64-NEXT: vmulh.vx v25, v25, a1 -; LMULMAX1-RV64-NEXT: addi a1, zero, 63 -; LMULMAX1-RV64-NEXT: vsrl.vx v26, v25, a1 -; LMULMAX1-RV64-NEXT: vadd.vv v25, v25, v26 -; LMULMAX1-RV64-NEXT: vse64.v v25, (a0) -; LMULMAX1-RV64-NEXT: ret +; RV32-LABEL: mulhs_vx_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV32-NEXT: vle64.v v25, (a0) +; RV32-NEXT: lui a1, %hi(.LCPI269_0) +; RV32-NEXT: addi a1, a1, %lo(.LCPI269_0) +; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu +; RV32-NEXT: vle32.v v26, (a1) +; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV32-NEXT: vmulh.vv v25, v25, v26 +; RV32-NEXT: lui a1, %hi(.LCPI269_1) +; RV32-NEXT: addi a1, a1, %lo(.LCPI269_1) +; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu +; RV32-NEXT: vle32.v v26, (a1) +; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV32-NEXT: vsrl.vv v26, v25, v26 +; RV32-NEXT: vadd.vv v25, v25, v26 +; RV32-NEXT: vse64.v v25, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: mulhs_vx_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV64-NEXT: vle64.v v25, (a0) +; RV64-NEXT: lui a1, 21845 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 1365 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 1365 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 1366 +; RV64-NEXT: vmulh.vx v25, v25, a1 +; RV64-NEXT: addi a1, zero, 63 +; RV64-NEXT: vsrl.vx v26, v25, a1 +; RV64-NEXT: vadd.vv v25, v25, v26 +; RV64-NEXT: vse64.v v25, (a0) +; RV64-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = sdiv <2 x i64> %a, store <2 x i64> %b, <2 x i64>* %x