diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -75,7 +75,8 @@ return selectSHXADD_UWOp(N, ShAmt, Val); } - bool hasAllNBitUsers(SDNode *Node, unsigned Bits) const; + bool hasAllNBitUsers(SDNode *Node, unsigned Bits, + const unsigned Depth = 0) const; bool hasAllHUsers(SDNode *Node) const { return hasAllNBitUsers(Node, 16); } bool hasAllWUsers(SDNode *Node) const { return hasAllNBitUsers(Node, 32); } diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -2291,9 +2291,12 @@ Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::AND || Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::XOR || Node->getOpcode() == ISD::SIGN_EXTEND_INREG || - isa(Node)) && + isa(Node) || Depth != 0) && "Unexpected opcode"); + if (Depth >= SelectionDAG::MaxRecursionDepth) + return false; + for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) { SDNode *User = *UI; // Users of this node should have already been instruction selected @@ -2353,15 +2356,25 @@ return false; break; case RISCV::ANDI: - if (Bits < (64 - countLeadingZeros(User->getConstantOperandVal(1)))) - return false; - break; + if (Bits >= (64 - countLeadingZeros(User->getConstantOperandVal(1)))) + break; + goto RecCheck; case RISCV::ORI: { uint64_t Imm = cast(User->getOperand(1))->getSExtValue(); - if (Bits < (64 - countLeadingOnes(Imm))) + if (Bits >= (64 - countLeadingOnes(Imm))) + break; + [[fallthrough]]; + } + case RISCV::AND: + case RISCV::OR: + case RISCV::XOR: + case RISCV::ANDN: + case RISCV::ORN: + case RISCV::XNOR: + RecCheck: + if (!hasAllNBitUsers(User, Bits, Depth + 1)) return false; break; - } case RISCV::SEXT_B: case RISCV::PACKH: if (Bits < 8) diff --git a/llvm/test/CodeGen/RISCV/add-before-shl.ll b/llvm/test/CodeGen/RISCV/add-before-shl.ll --- a/llvm/test/CodeGen/RISCV/add-before-shl.ll +++ b/llvm/test/CodeGen/RISCV/add-before-shl.ll @@ -29,20 +29,6 @@ ; RV64I-NEXT: slli a0, a0, 56 ; RV64I-NEXT: srai a0, a0, 56 ; RV64I-NEXT: jalr zero, 0(ra) -; -; RV32C-LABEL: add_small_const: -; RV32C: # %bb.0: -; RV32C-NEXT: c.addi a0, 1 -; RV32C-NEXT: c.slli a0, 24 -; RV32C-NEXT: c.srai a0, 24 -; RV32C-NEXT: c.jr ra -; -; RV64C-LABEL: add_small_const: -; RV64C: # %bb.0: -; RV64C-NEXT: c.addiw a0, 1 -; RV64C-NEXT: c.slli a0, 56 -; RV64C-NEXT: c.srai a0, 56 -; RV64C-NEXT: c.jr ra %1 = add i32 %a, 1 %2 = shl i32 %1, 24 %3 = ashr i32 %2, 24 @@ -66,23 +52,6 @@ ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: srai a0, a0, 48 ; RV64I-NEXT: jalr zero, 0(ra) -; -; RV32C-LABEL: add_large_const: -; RV32C: # %bb.0: -; RV32C-NEXT: c.slli a0, 16 -; RV32C-NEXT: lui a1, 65520 -; RV32C-NEXT: c.add a0, a1 -; RV32C-NEXT: c.srai a0, 16 -; RV32C-NEXT: c.jr ra -; -; RV64C-LABEL: add_large_const: -; RV64C: # %bb.0: -; RV64C-NEXT: c.lui a1, 1 -; RV64C-NEXT: c.addiw a1, -1 -; RV64C-NEXT: c.addw a0, a1 -; RV64C-NEXT: c.slli a0, 48 -; RV64C-NEXT: c.srai a0, 48 -; RV64C-NEXT: c.jr ra %1 = add i32 %a, 4095 %2 = shl i32 %1, 16 %3 = ashr i32 %2, 16 @@ -106,23 +75,6 @@ ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: srai a0, a0, 48 ; RV64I-NEXT: jalr zero, 0(ra) -; -; RV32C-LABEL: add_huge_const: -; RV32C: # %bb.0: -; RV32C-NEXT: c.slli a0, 16 -; RV32C-NEXT: lui a1, 524272 -; RV32C-NEXT: c.add a0, a1 -; RV32C-NEXT: c.srai a0, 16 -; RV32C-NEXT: c.jr ra -; -; RV64C-LABEL: add_huge_const: -; RV64C: # %bb.0: -; RV64C-NEXT: c.lui a1, 8 -; RV64C-NEXT: c.addiw a1, -1 -; RV64C-NEXT: c.addw a0, a1 -; RV64C-NEXT: c.slli a0, 48 -; RV64C-NEXT: c.srai a0, 48 -; RV64C-NEXT: c.jr ra %1 = add i32 %a, 32767 %2 = shl i32 %1, 16 %3 = ashr i32 %2, 16 @@ -143,20 +95,6 @@ ; RV64I-NEXT: slli a0, a0, 52 ; RV64I-NEXT: srai a0, a0, 40 ; RV64I-NEXT: jalr zero, 0(ra) -; -; RV32C-LABEL: add_non_machine_type: -; RV32C: # %bb.0: -; RV32C-NEXT: addi a0, a0, 256 -; RV32C-NEXT: c.slli a0, 20 -; RV32C-NEXT: c.srai a0, 8 -; RV32C-NEXT: c.jr ra -; -; RV64C-LABEL: add_non_machine_type: -; RV64C: # %bb.0: -; RV64C-NEXT: addiw a0, a0, 256 -; RV64C-NEXT: c.slli a0, 52 -; RV64C-NEXT: c.srai a0, 40 -; RV64C-NEXT: c.jr ra %1 = add i24 %a, 256 %2 = shl i24 %1, 12 ret i24 %2 @@ -197,42 +135,10 @@ ; RV64I-NEXT: slli a2, a2, 51 ; RV64I-NEXT: add a1, a1, a2 ; RV64I-NEXT: jalr zero, 0(ra) -; -; RV32C-LABEL: add_wide_operand: -; RV32C: # %bb.0: -; RV32C-NEXT: lw a6, 4(a1) -; RV32C-NEXT: c.lw a3, 12(a1) -; RV32C-NEXT: c.lw a4, 0(a1) -; RV32C-NEXT: c.lw a1, 8(a1) -; RV32C-NEXT: c.lui a5, 16 -; RV32C-NEXT: c.add a3, a5 -; RV32C-NEXT: c.slli a3, 3 -; RV32C-NEXT: srli a5, a1, 29 -; RV32C-NEXT: c.or a3, a5 -; RV32C-NEXT: srli a5, a4, 29 -; RV32C-NEXT: slli a2, a6, 3 -; RV32C-NEXT: c.or a2, a5 -; RV32C-NEXT: srli a5, a6, 29 -; RV32C-NEXT: c.slli a1, 3 -; RV32C-NEXT: c.or a1, a5 -; RV32C-NEXT: c.slli a4, 3 -; RV32C-NEXT: c.sw a4, 0(a0) -; RV32C-NEXT: c.sw a1, 8(a0) -; RV32C-NEXT: c.sw a2, 4(a0) -; RV32C-NEXT: c.sw a3, 12(a0) -; RV32C-NEXT: c.jr ra -; -; RV64C-LABEL: add_wide_operand: -; RV64C: # %bb.0: -; RV64C-NEXT: srli a2, a0, 61 -; RV64C-NEXT: c.slli a1, 3 -; RV64C-NEXT: c.or a1, a2 -; RV64C-NEXT: c.slli a0, 3 -; RV64C-NEXT: c.li a2, 1 -; RV64C-NEXT: c.slli a2, 51 -; RV64C-NEXT: c.add a1, a2 -; RV64C-NEXT: c.jr ra %1 = add i128 %a, 5192296858534827628530496329220096 %2 = shl i128 %1, 3 ret i128 %2 } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; RV32C: {{.*}} +; RV64C: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll b/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll --- a/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll @@ -118,8 +118,8 @@ ; RV64I-NEXT: xor a4, a7, a4 ; RV64I-NEXT: or a4, a4, a5 ; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: xor a2, a3, a2 -; RV64I-NEXT: or a0, a2, a0 +; RV64I-NEXT: xor a1, a3, a2 +; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: or a0, a0, a4 ; RV64I-NEXT: seqz a0, a0 ; RV64I-NEXT: ret @@ -335,8 +335,8 @@ ; RV64I-NEXT: ld a1, 0(sp) ; RV64I-NEXT: ld a2, 16(sp) ; RV64I-NEXT: ld a3, 32(sp) -; RV64I-NEXT: add a5, a5, a7 -; RV64I-NEXT: add a1, a5, a1 +; RV64I-NEXT: add a4, a5, a7 +; RV64I-NEXT: add a1, a4, a1 ; RV64I-NEXT: add a1, a1, a2 ; RV64I-NEXT: add a1, a1, a3 ; RV64I-NEXT: add a0, a1, a0 diff --git a/llvm/test/CodeGen/RISCV/div-by-constant.ll b/llvm/test/CodeGen/RISCV/div-by-constant.ll --- a/llvm/test/CodeGen/RISCV/div-by-constant.ll +++ b/llvm/test/CodeGen/RISCV/div-by-constant.ll @@ -82,9 +82,9 @@ ; RV32-NEXT: mulhu a6, a5, a4 ; RV32-NEXT: add a3, a6, a3 ; RV32-NEXT: sltu a0, a0, a2 -; RV32-NEXT: sub a1, a1, a0 -; RV32-NEXT: mul a1, a1, a4 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: mul a0, a0, a4 +; RV32-NEXT: add a1, a3, a0 ; RV32-NEXT: mul a0, a5, a4 ; RV32-NEXT: ret ; @@ -339,10 +339,10 @@ ; RV32-NEXT: lui a1, 449390 ; RV32-NEXT: addi a1, a1, -1171 ; RV32-NEXT: mulh a1, a0, a1 -; RV32-NEXT: sub a1, a1, a0 -; RV32-NEXT: srli a0, a1, 31 -; RV32-NEXT: srai a1, a1, 2 -; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: srli a1, a0, 31 +; RV32-NEXT: srai a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: sdiv_constant_sub_srai: @@ -352,10 +352,10 @@ ; RV64-NEXT: addiw a2, a2, -1171 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: srli a1, a1, 32 -; RV64-NEXT: subw a1, a1, a0 -; RV64-NEXT: srliw a0, a1, 31 -; RV64-NEXT: sraiw a1, a1, 2 -; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: subw a0, a1, a0 +; RV64-NEXT: srliw a1, a0, 31 +; RV64-NEXT: sraiw a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: ret %1 = sdiv i32 %a, -7 ret i32 %1 @@ -453,10 +453,10 @@ ; RV64-NEXT: lui a1, %hi(.LCPI15_0) ; RV64-NEXT: ld a1, %lo(.LCPI15_0)(a1) ; RV64-NEXT: mulh a1, a0, a1 -; RV64-NEXT: sub a1, a1, a0 -; RV64-NEXT: srli a0, a1, 63 -; RV64-NEXT: srai a1, a1, 1 -; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: srli a1, a0, 63 +; RV64-NEXT: srai a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: ret %1 = sdiv i64 %a, -3 ret i64 %1 @@ -628,11 +628,11 @@ ; RV32IM-NEXT: li a2, 109 ; RV32IM-NEXT: mul a1, a1, a2 ; RV32IM-NEXT: srli a1, a1, 8 -; RV32IM-NEXT: sub a1, a1, a0 -; RV32IM-NEXT: slli a1, a1, 24 -; RV32IM-NEXT: srli a0, a1, 31 -; RV32IM-NEXT: srai a1, a1, 26 -; RV32IM-NEXT: add a0, a1, a0 +; RV32IM-NEXT: sub a0, a1, a0 +; RV32IM-NEXT: slli a0, a0, 24 +; RV32IM-NEXT: srli a1, a0, 31 +; RV32IM-NEXT: srai a0, a0, 26 +; RV32IM-NEXT: add a0, a0, a1 ; RV32IM-NEXT: ret ; ; RV32IMZB-LABEL: sdiv8_constant_sub_srai: @@ -641,11 +641,11 @@ ; RV32IMZB-NEXT: li a2, 109 ; RV32IMZB-NEXT: mul a1, a1, a2 ; RV32IMZB-NEXT: srli a1, a1, 8 -; RV32IMZB-NEXT: sub a1, a1, a0 -; RV32IMZB-NEXT: slli a1, a1, 24 -; RV32IMZB-NEXT: srli a0, a1, 31 -; RV32IMZB-NEXT: srai a1, a1, 26 -; RV32IMZB-NEXT: add a0, a1, a0 +; RV32IMZB-NEXT: sub a0, a1, a0 +; RV32IMZB-NEXT: slli a0, a0, 24 +; RV32IMZB-NEXT: srli a1, a0, 31 +; RV32IMZB-NEXT: srai a0, a0, 26 +; RV32IMZB-NEXT: add a0, a0, a1 ; RV32IMZB-NEXT: ret ; ; RV64IM-LABEL: sdiv8_constant_sub_srai: @@ -655,11 +655,11 @@ ; RV64IM-NEXT: li a2, 109 ; RV64IM-NEXT: mul a1, a1, a2 ; RV64IM-NEXT: srli a1, a1, 8 -; RV64IM-NEXT: subw a1, a1, a0 -; RV64IM-NEXT: slli a1, a1, 56 -; RV64IM-NEXT: srli a0, a1, 63 -; RV64IM-NEXT: srai a1, a1, 58 -; RV64IM-NEXT: add a0, a1, a0 +; RV64IM-NEXT: subw a0, a1, a0 +; RV64IM-NEXT: slli a0, a0, 56 +; RV64IM-NEXT: srli a1, a0, 63 +; RV64IM-NEXT: srai a0, a0, 58 +; RV64IM-NEXT: add a0, a0, a1 ; RV64IM-NEXT: ret ; ; RV64IMZB-LABEL: sdiv8_constant_sub_srai: @@ -668,11 +668,11 @@ ; RV64IMZB-NEXT: li a2, 109 ; RV64IMZB-NEXT: mul a1, a1, a2 ; RV64IMZB-NEXT: srli a1, a1, 8 -; RV64IMZB-NEXT: subw a1, a1, a0 -; RV64IMZB-NEXT: slli a1, a1, 56 -; RV64IMZB-NEXT: srli a0, a1, 63 -; RV64IMZB-NEXT: srai a1, a1, 58 -; RV64IMZB-NEXT: add a0, a1, a0 +; RV64IMZB-NEXT: subw a0, a1, a0 +; RV64IMZB-NEXT: slli a0, a0, 56 +; RV64IMZB-NEXT: srli a1, a0, 63 +; RV64IMZB-NEXT: srai a0, a0, 58 +; RV64IMZB-NEXT: add a0, a0, a1 ; RV64IMZB-NEXT: ret %1 = sdiv i8 %a, -7 ret i8 %1 @@ -849,11 +849,11 @@ ; RV32IM-NEXT: addi a2, a2, 1911 ; RV32IM-NEXT: mul a1, a1, a2 ; RV32IM-NEXT: srli a1, a1, 16 -; RV32IM-NEXT: sub a1, a1, a0 -; RV32IM-NEXT: slli a1, a1, 16 -; RV32IM-NEXT: srli a0, a1, 31 -; RV32IM-NEXT: srai a1, a1, 19 -; RV32IM-NEXT: add a0, a1, a0 +; RV32IM-NEXT: sub a0, a1, a0 +; RV32IM-NEXT: slli a0, a0, 16 +; RV32IM-NEXT: srli a1, a0, 31 +; RV32IM-NEXT: srai a0, a0, 19 +; RV32IM-NEXT: add a0, a0, a1 ; RV32IM-NEXT: ret ; ; RV32IMZB-LABEL: sdiv16_constant_sub_srai: @@ -863,11 +863,11 @@ ; RV32IMZB-NEXT: addi a2, a2, 1911 ; RV32IMZB-NEXT: mul a1, a1, a2 ; RV32IMZB-NEXT: srli a1, a1, 16 -; RV32IMZB-NEXT: sub a1, a1, a0 -; RV32IMZB-NEXT: slli a1, a1, 16 -; RV32IMZB-NEXT: srli a0, a1, 31 -; RV32IMZB-NEXT: srai a1, a1, 19 -; RV32IMZB-NEXT: add a0, a1, a0 +; RV32IMZB-NEXT: sub a0, a1, a0 +; RV32IMZB-NEXT: slli a0, a0, 16 +; RV32IMZB-NEXT: srli a1, a0, 31 +; RV32IMZB-NEXT: srai a0, a0, 19 +; RV32IMZB-NEXT: add a0, a0, a1 ; RV32IMZB-NEXT: ret ; ; RV64IM-LABEL: sdiv16_constant_sub_srai: @@ -878,11 +878,11 @@ ; RV64IM-NEXT: addiw a2, a2, 1911 ; RV64IM-NEXT: mul a1, a1, a2 ; RV64IM-NEXT: srli a1, a1, 16 -; RV64IM-NEXT: subw a1, a1, a0 -; RV64IM-NEXT: slli a1, a1, 48 -; RV64IM-NEXT: srli a0, a1, 63 -; RV64IM-NEXT: srai a1, a1, 51 -; RV64IM-NEXT: add a0, a1, a0 +; RV64IM-NEXT: subw a0, a1, a0 +; RV64IM-NEXT: slli a0, a0, 48 +; RV64IM-NEXT: srli a1, a0, 63 +; RV64IM-NEXT: srai a0, a0, 51 +; RV64IM-NEXT: add a0, a0, a1 ; RV64IM-NEXT: ret ; ; RV64IMZB-LABEL: sdiv16_constant_sub_srai: @@ -892,11 +892,11 @@ ; RV64IMZB-NEXT: addiw a2, a2, 1911 ; RV64IMZB-NEXT: mul a1, a1, a2 ; RV64IMZB-NEXT: srli a1, a1, 16 -; RV64IMZB-NEXT: subw a1, a1, a0 -; RV64IMZB-NEXT: slli a1, a1, 48 -; RV64IMZB-NEXT: srli a0, a1, 63 -; RV64IMZB-NEXT: srai a1, a1, 51 -; RV64IMZB-NEXT: add a0, a1, a0 +; RV64IMZB-NEXT: subw a0, a1, a0 +; RV64IMZB-NEXT: slli a0, a0, 48 +; RV64IMZB-NEXT: srli a1, a0, 63 +; RV64IMZB-NEXT: srai a0, a0, 51 +; RV64IMZB-NEXT: add a0, a0, a1 ; RV64IMZB-NEXT: ret %1 = sdiv i16 %a, -15 ret i16 %1 diff --git a/llvm/test/CodeGen/RISCV/div-pow2.ll b/llvm/test/CodeGen/RISCV/div-pow2.ll --- a/llvm/test/CodeGen/RISCV/div-pow2.ll +++ b/llvm/test/CodeGen/RISCV/div-pow2.ll @@ -209,9 +209,9 @@ ; RV32I-NEXT: sltu a0, a2, a0 ; RV32I-NEXT: add a1, a1, a0 ; RV32I-NEXT: slli a0, a1, 31 -; RV32I-NEXT: or a3, a3, a0 -; RV32I-NEXT: neg a0, a3 -; RV32I-NEXT: snez a2, a3 +; RV32I-NEXT: or a2, a3, a0 +; RV32I-NEXT: neg a0, a2 +; RV32I-NEXT: snez a2, a2 ; RV32I-NEXT: srai a1, a1, 1 ; RV32I-NEXT: add a1, a1, a2 ; RV32I-NEXT: neg a1, a1 @@ -265,9 +265,9 @@ ; RV32I-NEXT: sltu a0, a2, a0 ; RV32I-NEXT: add a1, a1, a0 ; RV32I-NEXT: slli a0, a1, 21 -; RV32I-NEXT: or a3, a3, a0 -; RV32I-NEXT: neg a0, a3 -; RV32I-NEXT: snez a2, a3 +; RV32I-NEXT: or a2, a3, a0 +; RV32I-NEXT: neg a0, a2 +; RV32I-NEXT: snez a2, a2 ; RV32I-NEXT: srai a1, a1, 11 ; RV32I-NEXT: add a1, a1, a2 ; RV32I-NEXT: neg a1, a1 @@ -322,9 +322,9 @@ ; RV32I-NEXT: sltu a0, a2, a0 ; RV32I-NEXT: add a1, a1, a0 ; RV32I-NEXT: slli a0, a1, 20 -; RV32I-NEXT: or a3, a3, a0 -; RV32I-NEXT: neg a0, a3 -; RV32I-NEXT: snez a2, a3 +; RV32I-NEXT: or a2, a3, a0 +; RV32I-NEXT: neg a0, a2 +; RV32I-NEXT: snez a2, a2 ; RV32I-NEXT: srai a1, a1, 12 ; RV32I-NEXT: add a1, a1, a2 ; RV32I-NEXT: neg a1, a1 @@ -379,9 +379,9 @@ ; RV32I-NEXT: sltu a0, a2, a0 ; RV32I-NEXT: add a1, a1, a0 ; RV32I-NEXT: slli a0, a1, 16 -; RV32I-NEXT: or a3, a3, a0 -; RV32I-NEXT: neg a0, a3 -; RV32I-NEXT: snez a2, a3 +; RV32I-NEXT: or a2, a3, a0 +; RV32I-NEXT: neg a0, a2 +; RV32I-NEXT: snez a2, a2 ; RV32I-NEXT: srai a1, a1, 16 ; RV32I-NEXT: add a1, a1, a2 ; RV32I-NEXT: neg a1, a1 diff --git a/llvm/test/CodeGen/RISCV/div.ll b/llvm/test/CodeGen/RISCV/div.ll --- a/llvm/test/CodeGen/RISCV/div.ll +++ b/llvm/test/CodeGen/RISCV/div.ll @@ -197,9 +197,9 @@ ; RV32IM-NEXT: mulhu a6, a5, a4 ; RV32IM-NEXT: add a3, a6, a3 ; RV32IM-NEXT: sltu a0, a0, a2 -; RV32IM-NEXT: sub a1, a1, a0 -; RV32IM-NEXT: mul a1, a1, a4 -; RV32IM-NEXT: add a1, a3, a1 +; RV32IM-NEXT: sub a0, a1, a0 +; RV32IM-NEXT: mul a0, a0, a4 +; RV32IM-NEXT: add a1, a3, a0 ; RV32IM-NEXT: mul a0, a5, a4 ; RV32IM-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll b/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll --- a/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll +++ b/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll @@ -20,7 +20,7 @@ ; CHECK-NEXT: li a1, 10 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * VLENB ; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_49) ; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_49) ; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma @@ -87,6 +87,7 @@ ; CHECK-NEXT: li a1, 10 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/RISCV/iabs.ll b/llvm/test/CodeGen/RISCV/iabs.ll --- a/llvm/test/CodeGen/RISCV/iabs.ll +++ b/llvm/test/CodeGen/RISCV/iabs.ll @@ -178,7 +178,7 @@ ; RV64ZBB-LABEL: abs32: ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: sext.w a0, a0 -; RV64ZBB-NEXT: negw a1, a0 +; RV64ZBB-NEXT: neg a1, a0 ; RV64ZBB-NEXT: max a0, a0, a1 ; RV64ZBB-NEXT: ret %abs = tail call i32 @llvm.abs.i32(i32 %x, i1 true) @@ -209,7 +209,7 @@ ; RV64ZBB-LABEL: select_abs32: ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: sext.w a0, a0 -; RV64ZBB-NEXT: negw a1, a0 +; RV64ZBB-NEXT: neg a1, a0 ; RV64ZBB-NEXT: max a0, a0, a1 ; RV64ZBB-NEXT: ret %1 = icmp slt i32 %x, 0 @@ -501,9 +501,9 @@ ; ; RV64ZBB-LABEL: zext_abs32: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: sext.w a0, a0 -; RV64ZBB-NEXT: negw a1, a0 -; RV64ZBB-NEXT: max a0, a0, a1 +; RV64ZBB-NEXT: sext.w a1, a0 +; RV64ZBB-NEXT: negw a0, a0 +; RV64ZBB-NEXT: max a0, a1, a0 ; RV64ZBB-NEXT: ret %abs = tail call i32 @llvm.abs.i32(i32 %x, i1 true) %zext = zext i32 %abs to i64 diff --git a/llvm/test/CodeGen/RISCV/mul.ll b/llvm/test/CodeGen/RISCV/mul.ll --- a/llvm/test/CodeGen/RISCV/mul.ll +++ b/llvm/test/CodeGen/RISCV/mul.ll @@ -819,8 +819,8 @@ ; ; RV32IM-LABEL: muli32_p4352: ; RV32IM: # %bb.0: -; RV32IM-NEXT: li a1, 17 -; RV32IM-NEXT: slli a1, a1, 8 +; RV32IM-NEXT: lui a1, 1 +; RV32IM-NEXT: addi a1, a1, 256 ; RV32IM-NEXT: mul a0, a0, a1 ; RV32IM-NEXT: ret ; @@ -851,8 +851,8 @@ ; ; RV32IM-LABEL: muli32_p3840: ; RV32IM: # %bb.0: -; RV32IM-NEXT: li a1, 15 -; RV32IM-NEXT: slli a1, a1, 8 +; RV32IM-NEXT: lui a1, 1 +; RV32IM-NEXT: addi a1, a1, -256 ; RV32IM-NEXT: mul a0, a0, a1 ; RV32IM-NEXT: ret ; @@ -883,8 +883,8 @@ ; ; RV32IM-LABEL: muli32_m3840: ; RV32IM: # %bb.0: -; RV32IM-NEXT: li a1, -15 -; RV32IM-NEXT: slli a1, a1, 8 +; RV32IM-NEXT: lui a1, 1048575 +; RV32IM-NEXT: addi a1, a1, 256 ; RV32IM-NEXT: mul a0, a0, a1 ; RV32IM-NEXT: ret ; @@ -908,14 +908,14 @@ define i32 @muli32_m4352(i32 %a) nounwind { ; RV32I-LABEL: muli32_m4352: ; RV32I: # %bb.0: -; RV32I-NEXT: li a1, -17 -; RV32I-NEXT: slli a1, a1, 8 +; RV32I-NEXT: lui a1, 1048575 +; RV32I-NEXT: addi a1, a1, -256 ; RV32I-NEXT: tail __mulsi3@plt ; ; RV32IM-LABEL: muli32_m4352: ; RV32IM: # %bb.0: -; RV32IM-NEXT: li a1, -17 -; RV32IM-NEXT: slli a1, a1, 8 +; RV32IM-NEXT: lui a1, 1048575 +; RV32IM-NEXT: addi a1, a1, -256 ; RV32IM-NEXT: mul a0, a0, a1 ; RV32IM-NEXT: ret ; @@ -923,8 +923,8 @@ ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: li a1, -17 -; RV64I-NEXT: slli a1, a1, 8 +; RV64I-NEXT: lui a1, 1048575 +; RV64I-NEXT: addiw a1, a1, -256 ; RV64I-NEXT: call __muldi3@plt ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 @@ -932,8 +932,8 @@ ; ; RV64IM-LABEL: muli32_m4352: ; RV64IM: # %bb.0: -; RV64IM-NEXT: li a1, -17 -; RV64IM-NEXT: slli a1, a1, 8 +; RV64IM-NEXT: lui a1, 1048575 +; RV64IM-NEXT: addiw a1, a1, -256 ; RV64IM-NEXT: mulw a0, a0, a1 ; RV64IM-NEXT: ret %1 = mul i32 %a, -4352 @@ -959,8 +959,8 @@ ; ; RV32IM-LABEL: muli64_p4352: ; RV32IM: # %bb.0: -; RV32IM-NEXT: li a2, 17 -; RV32IM-NEXT: slli a2, a2, 8 +; RV32IM-NEXT: lui a2, 1 +; RV32IM-NEXT: addi a2, a2, 256 ; RV32IM-NEXT: mul a1, a1, a2 ; RV32IM-NEXT: mulhu a3, a0, a2 ; RV32IM-NEXT: add a1, a3, a1 @@ -976,8 +976,8 @@ ; ; RV64IM-LABEL: muli64_p4352: ; RV64IM: # %bb.0: -; RV64IM-NEXT: li a1, 17 -; RV64IM-NEXT: slli a1, a1, 8 +; RV64IM-NEXT: lui a1, 1 +; RV64IM-NEXT: addiw a1, a1, 256 ; RV64IM-NEXT: mul a0, a0, a1 ; RV64IM-NEXT: ret %1 = mul i64 %a, 4352 @@ -1003,8 +1003,8 @@ ; ; RV32IM-LABEL: muli64_p3840: ; RV32IM: # %bb.0: -; RV32IM-NEXT: li a2, 15 -; RV32IM-NEXT: slli a2, a2, 8 +; RV32IM-NEXT: lui a2, 1 +; RV32IM-NEXT: addi a2, a2, -256 ; RV32IM-NEXT: mul a1, a1, a2 ; RV32IM-NEXT: mulhu a3, a0, a2 ; RV32IM-NEXT: add a1, a3, a1 @@ -1020,8 +1020,8 @@ ; ; RV64IM-LABEL: muli64_p3840: ; RV64IM: # %bb.0: -; RV64IM-NEXT: li a1, 15 -; RV64IM-NEXT: slli a1, a1, 8 +; RV64IM-NEXT: lui a1, 1 +; RV64IM-NEXT: addiw a1, a1, -256 ; RV64IM-NEXT: mul a0, a0, a1 ; RV64IM-NEXT: ret %1 = mul i64 %a, 3840 @@ -1033,8 +1033,8 @@ ; RV32I: # %bb.0: ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: li a2, -17 -; RV32I-NEXT: slli a2, a2, 8 +; RV32I-NEXT: lui a2, 1048575 +; RV32I-NEXT: addi a2, a2, -256 ; RV32I-NEXT: li a3, -1 ; RV32I-NEXT: call __muldi3@plt ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -1043,8 +1043,8 @@ ; ; RV32IM-LABEL: muli64_m4352: ; RV32IM: # %bb.0: -; RV32IM-NEXT: li a2, -17 -; RV32IM-NEXT: slli a2, a2, 8 +; RV32IM-NEXT: lui a2, 1048575 +; RV32IM-NEXT: addi a2, a2, -256 ; RV32IM-NEXT: mul a1, a1, a2 ; RV32IM-NEXT: mulhu a3, a0, a2 ; RV32IM-NEXT: sub a3, a3, a0 @@ -1054,14 +1054,14 @@ ; ; RV64I-LABEL: muli64_m4352: ; RV64I: # %bb.0: -; RV64I-NEXT: li a1, -17 -; RV64I-NEXT: slli a1, a1, 8 +; RV64I-NEXT: lui a1, 1048575 +; RV64I-NEXT: addiw a1, a1, -256 ; RV64I-NEXT: tail __muldi3@plt ; ; RV64IM-LABEL: muli64_m4352: ; RV64IM: # %bb.0: -; RV64IM-NEXT: li a1, -17 -; RV64IM-NEXT: slli a1, a1, 8 +; RV64IM-NEXT: lui a1, 1048575 +; RV64IM-NEXT: addiw a1, a1, -256 ; RV64IM-NEXT: mul a0, a0, a1 ; RV64IM-NEXT: ret %1 = mul i64 %a, -4352 @@ -1087,8 +1087,8 @@ ; ; RV32IM-LABEL: muli64_m3840: ; RV32IM: # %bb.0: -; RV32IM-NEXT: li a2, -15 -; RV32IM-NEXT: slli a2, a2, 8 +; RV32IM-NEXT: lui a2, 1048575 +; RV32IM-NEXT: addi a2, a2, 256 ; RV32IM-NEXT: mul a1, a1, a2 ; RV32IM-NEXT: mulhu a3, a0, a2 ; RV32IM-NEXT: sub a3, a3, a0 @@ -1105,8 +1105,8 @@ ; ; RV64IM-LABEL: muli64_m3840: ; RV64IM: # %bb.0: -; RV64IM-NEXT: li a1, -15 -; RV64IM-NEXT: slli a1, a1, 8 +; RV64IM-NEXT: lui a1, 1048575 +; RV64IM-NEXT: addiw a1, a1, 256 ; RV64IM-NEXT: mul a0, a0, a1 ; RV64IM-NEXT: ret %1 = mul i64 %a, -3840 @@ -1149,17 +1149,17 @@ ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: sltu t1, a5, a3 ; RV32I-NEXT: .LBB30_2: -; RV32I-NEXT: sub a2, a2, a1 -; RV32I-NEXT: sltu a1, a2, t1 -; RV32I-NEXT: sub a1, t0, a1 -; RV32I-NEXT: sub a2, a2, t1 -; RV32I-NEXT: sub a5, a5, a3 -; RV32I-NEXT: sub a3, a5, a7 +; RV32I-NEXT: sub a1, a2, a1 +; RV32I-NEXT: sltu a2, a1, t1 +; RV32I-NEXT: sub a2, t0, a2 +; RV32I-NEXT: sub a1, a1, t1 +; RV32I-NEXT: sub a3, a5, a3 +; RV32I-NEXT: sub a3, a3, a7 ; RV32I-NEXT: sub a4, a6, a4 ; RV32I-NEXT: sw a4, 0(a0) ; RV32I-NEXT: sw a3, 4(a0) -; RV32I-NEXT: sw a2, 8(a0) -; RV32I-NEXT: sw a1, 12(a0) +; RV32I-NEXT: sw a1, 8(a0) +; RV32I-NEXT: sw a2, 12(a0) ; RV32I-NEXT: ret ; ; RV32IM-LABEL: muli128_m3840: @@ -1171,8 +1171,8 @@ ; RV32IM-NEXT: lw a3, 8(a1) ; RV32IM-NEXT: lw a4, 0(a1) ; RV32IM-NEXT: lw a1, 4(a1) -; RV32IM-NEXT: li a5, -15 -; RV32IM-NEXT: slli a5, a5, 8 +; RV32IM-NEXT: lui a5, 1048575 +; RV32IM-NEXT: addi a5, a5, 256 ; RV32IM-NEXT: mulhu a6, a4, a5 ; RV32IM-NEXT: mul a7, a1, a5 ; RV32IM-NEXT: add a6, a7, a6 @@ -1203,16 +1203,16 @@ ; RV32IM-NEXT: sub a3, t1, a3 ; RV32IM-NEXT: add a2, a3, a2 ; RV32IM-NEXT: sub a3, t3, a4 -; RV32IM-NEXT: sub a3, a3, a1 -; RV32IM-NEXT: add a2, a3, a2 -; RV32IM-NEXT: add a2, a2, t0 -; RV32IM-NEXT: add a2, a7, a2 -; RV32IM-NEXT: add a2, a2, s0 -; RV32IM-NEXT: mul a1, a4, a5 -; RV32IM-NEXT: sw a1, 0(a0) +; RV32IM-NEXT: sub a1, a3, a1 +; RV32IM-NEXT: add a1, a1, a2 +; RV32IM-NEXT: add a1, a1, t0 +; RV32IM-NEXT: add a1, a7, a1 +; RV32IM-NEXT: add a1, a1, s0 +; RV32IM-NEXT: mul a2, a4, a5 +; RV32IM-NEXT: sw a2, 0(a0) ; RV32IM-NEXT: sw a6, 4(a0) ; RV32IM-NEXT: sw t6, 8(a0) -; RV32IM-NEXT: sw a2, 12(a0) +; RV32IM-NEXT: sw a1, 12(a0) ; RV32IM-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32IM-NEXT: lw s1, 8(sp) # 4-byte Folded Reload ; RV32IM-NEXT: addi sp, sp, 16 @@ -1236,8 +1236,8 @@ ; ; RV64IM-LABEL: muli128_m3840: ; RV64IM: # %bb.0: -; RV64IM-NEXT: li a2, -15 -; RV64IM-NEXT: slli a2, a2, 8 +; RV64IM-NEXT: lui a2, 1048575 +; RV64IM-NEXT: addiw a2, a2, 256 ; RV64IM-NEXT: mul a1, a1, a2 ; RV64IM-NEXT: mulhu a3, a0, a2 ; RV64IM-NEXT: sub a3, a3, a0 @@ -1280,8 +1280,8 @@ ; RV32I-NEXT: sub a7, t2, t0 ; RV32I-NEXT: sub a3, a3, a6 ; RV32I-NEXT: sub a3, a3, a4 -; RV32I-NEXT: sub a2, a2, a1 -; RV32I-NEXT: sw a2, 0(a0) +; RV32I-NEXT: sub a1, a2, a1 +; RV32I-NEXT: sw a1, 0(a0) ; RV32I-NEXT: sw a3, 4(a0) ; RV32I-NEXT: sw a7, 8(a0) ; RV32I-NEXT: sw a5, 12(a0) @@ -1327,19 +1327,19 @@ ; RV32IM-NEXT: slli t1, a2, 6 ; RV32IM-NEXT: sub a2, a2, t1 ; RV32IM-NEXT: mulhu a5, a1, a5 -; RV32IM-NEXT: sub a5, a5, a1 -; RV32IM-NEXT: add a2, a5, a2 -; RV32IM-NEXT: sub a1, t3, a3 -; RV32IM-NEXT: sub a1, a1, a4 +; RV32IM-NEXT: sub a1, a5, a1 ; RV32IM-NEXT: add a1, a1, a2 +; RV32IM-NEXT: sub a2, t3, a3 +; RV32IM-NEXT: sub a2, a2, a4 +; RV32IM-NEXT: add a1, a2, a1 ; RV32IM-NEXT: neg a2, t5 ; RV32IM-NEXT: sltu a2, a2, t0 ; RV32IM-NEXT: add a1, a1, a2 ; RV32IM-NEXT: add a1, a7, a1 ; RV32IM-NEXT: add a1, a1, s0 ; RV32IM-NEXT: slli a2, a3, 6 -; RV32IM-NEXT: sub a3, a3, a2 -; RV32IM-NEXT: sw a3, 0(a0) +; RV32IM-NEXT: sub a2, a3, a2 +; RV32IM-NEXT: sw a2, 0(a0) ; RV32IM-NEXT: sw a6, 4(a0) ; RV32IM-NEXT: sw t6, 8(a0) ; RV32IM-NEXT: sw a1, 12(a0) @@ -1412,8 +1412,8 @@ ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __muldi3@plt -; RV32I-NEXT: add s5, a0, s5 -; RV32I-NEXT: sltu a0, s5, a0 +; RV32I-NEXT: add a2, a0, s5 +; RV32I-NEXT: sltu a0, a2, a0 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: add s8, s7, a0 ; RV32I-NEXT: mv a0, s0 @@ -1436,18 +1436,18 @@ ; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: call __muldi3@plt -; RV32I-NEXT: add s2, a0, s2 -; RV32I-NEXT: add a2, s9, s2 -; RV32I-NEXT: sltu a3, a2, s9 -; RV32I-NEXT: sltu a4, s9, s5 -; RV32I-NEXT: sltu a5, s8, s7 -; RV32I-NEXT: add a5, s6, a5 -; RV32I-NEXT: add a4, a5, a4 +; RV32I-NEXT: add a3, a0, s2 +; RV32I-NEXT: add a2, s9, a3 +; RV32I-NEXT: sltu a4, a2, s9 +; RV32I-NEXT: sltu a5, s9, s5 +; RV32I-NEXT: sltu a6, s8, s7 +; RV32I-NEXT: add a6, s6, a6 +; RV32I-NEXT: add a5, a6, a5 ; RV32I-NEXT: add a1, a1, s3 -; RV32I-NEXT: sltu a0, s2, a0 +; RV32I-NEXT: sltu a0, a3, a0 ; RV32I-NEXT: add a0, a1, a0 -; RV32I-NEXT: add a0, a4, a0 -; RV32I-NEXT: add a1, a0, a3 +; RV32I-NEXT: add a0, a5, a0 +; RV32I-NEXT: add a1, a0, a4 ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload @@ -1594,29 +1594,29 @@ ; RV32I-LABEL: muladd_demand_2: ; RV32I: # %bb.0: ; RV32I-NEXT: slli a0, a0, 1 -; RV32I-NEXT: sub a1, a1, a0 -; RV32I-NEXT: ori a0, a1, -16 +; RV32I-NEXT: sub a0, a1, a0 +; RV32I-NEXT: ori a0, a0, -16 ; RV32I-NEXT: ret ; ; RV32IM-LABEL: muladd_demand_2: ; RV32IM: # %bb.0: ; RV32IM-NEXT: slli a0, a0, 1 -; RV32IM-NEXT: sub a1, a1, a0 -; RV32IM-NEXT: ori a0, a1, -16 +; RV32IM-NEXT: sub a0, a1, a0 +; RV32IM-NEXT: ori a0, a0, -16 ; RV32IM-NEXT: ret ; ; RV64I-LABEL: muladd_demand_2: ; RV64I: # %bb.0: ; RV64I-NEXT: slliw a0, a0, 1 -; RV64I-NEXT: subw a1, a1, a0 -; RV64I-NEXT: ori a0, a1, -16 +; RV64I-NEXT: subw a0, a1, a0 +; RV64I-NEXT: ori a0, a0, -16 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: muladd_demand_2: ; RV64IM: # %bb.0: ; RV64IM-NEXT: slliw a0, a0, 1 -; RV64IM-NEXT: subw a1, a1, a0 -; RV64IM-NEXT: ori a0, a1, -16 +; RV64IM-NEXT: subw a0, a1, a0 +; RV64IM-NEXT: ori a0, a0, -16 ; RV64IM-NEXT: ret %m = mul i8 %x, 14 %a = add i8 %y, %m diff --git a/llvm/test/CodeGen/RISCV/neg-abs.ll b/llvm/test/CodeGen/RISCV/neg-abs.ll --- a/llvm/test/CodeGen/RISCV/neg-abs.ll +++ b/llvm/test/CodeGen/RISCV/neg-abs.ll @@ -187,7 +187,7 @@ ; RV64ZBB-LABEL: neg_abs32_multiuse: ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: sext.w a0, a0 -; RV64ZBB-NEXT: negw a2, a0 +; RV64ZBB-NEXT: neg a2, a0 ; RV64ZBB-NEXT: max a2, a0, a2 ; RV64ZBB-NEXT: negw a0, a2 ; RV64ZBB-NEXT: sw a2, 0(a1) diff --git a/llvm/test/CodeGen/RISCV/pr58511.ll b/llvm/test/CodeGen/RISCV/pr58511.ll --- a/llvm/test/CodeGen/RISCV/pr58511.ll +++ b/llvm/test/CodeGen/RISCV/pr58511.ll @@ -9,8 +9,8 @@ ; CHECK-NEXT: subw a1, a1, a3 ; CHECK-NEXT: andi a0, a0, 1 ; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: li a3, 1 -; CHECK-NEXT: slli a3, a3, 11 +; CHECK-NEXT: lui a3, 1 +; CHECK-NEXT: addiw a3, a3, -2048 ; CHECK-NEXT: or a0, a0, a3 ; CHECK-NEXT: sw a1, 0(a2) ; CHECK-NEXT: ret @@ -29,9 +29,9 @@ ; CHECK-NEXT: slliw a1, a1, 12 ; CHECK-NEXT: subw a1, a1, a3 ; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: li a3, 1 -; CHECK-NEXT: slli a3, a3, 11 +; CHECK-NEXT: addiw a0, a0, -1 +; CHECK-NEXT: lui a3, 1 +; CHECK-NEXT: addiw a3, a3, -2048 ; CHECK-NEXT: or a0, a0, a3 ; CHECK-NEXT: sw a1, 0(a2) ; CHECK-NEXT: ret @@ -68,9 +68,9 @@ ; CHECK-NEXT: slliw a3, a1, 11 ; CHECK-NEXT: slliw a1, a1, 12 ; CHECK-NEXT: subw a1, a1, a3 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: li a3, 1 -; CHECK-NEXT: slli a3, a3, 11 +; CHECK-NEXT: addiw a0, a0, -1 +; CHECK-NEXT: lui a3, 1 +; CHECK-NEXT: addiw a3, a3, -2048 ; CHECK-NEXT: and a0, a0, a3 ; CHECK-NEXT: sw a1, 0(a2) ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll b/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll --- a/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll +++ b/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll @@ -23,7 +23,7 @@ ; CHECK-NEXT: li a1, 24 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 24 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 24 * VLENB ; CHECK-NEXT: li a0, 55 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vloxseg2ei32.v v8, (a0), v8 @@ -64,7 +64,7 @@ ; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vfwsub.wv v16, v8, v24 ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill @@ -75,9 +75,9 @@ ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfdiv.vv v8, v16, v8, v0.t ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: csrr a0, vlenb @@ -86,6 +86,7 @@ ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: addi sp, sp, 32 ; CHECK-NEXT: ret ; @@ -100,7 +101,7 @@ ; SUBREGLIVENESS-NEXT: csrr a0, vlenb ; SUBREGLIVENESS-NEXT: slli a0, a0, 4 ; SUBREGLIVENESS-NEXT: sub sp, sp, a0 -; SUBREGLIVENESS-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 16 * vlenb +; SUBREGLIVENESS-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 16 * VLENB ; SUBREGLIVENESS-NEXT: li a0, 55 ; SUBREGLIVENESS-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; SUBREGLIVENESS-NEXT: vloxseg2ei32.v v8, (a0), v8 @@ -135,7 +136,7 @@ ; SUBREGLIVENESS-NEXT: add a1, a1, a2 ; SUBREGLIVENESS-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload ; SUBREGLIVENESS-NEXT: addi a1, sp, 16 -; SUBREGLIVENESS-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; SUBREGLIVENESS-NEXT: vl8re8.v v24, (a1) # Unknown-size Folded Reload ; SUBREGLIVENESS-NEXT: vfwsub.wv v8, v24, v20 ; SUBREGLIVENESS-NEXT: vsetvli zero, a0, e16, m4, tu, mu ; SUBREGLIVENESS-NEXT: vssubu.vv v16, v16, v8, v0.t @@ -147,6 +148,7 @@ ; SUBREGLIVENESS-NEXT: add sp, sp, a0 ; SUBREGLIVENESS-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; SUBREGLIVENESS-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; SUBREGLIVENESS-NEXT: .cfi_def_cfa_offset 32 ; SUBREGLIVENESS-NEXT: addi sp, sp, 32 ; SUBREGLIVENESS-NEXT: ret entry: diff --git a/llvm/test/CodeGen/RISCV/rem.ll b/llvm/test/CodeGen/RISCV/rem.ll --- a/llvm/test/CodeGen/RISCV/rem.ll +++ b/llvm/test/CodeGen/RISCV/rem.ll @@ -125,7 +125,7 @@ ; RV64I: # %bb.0: ; RV64I-NEXT: sraiw a1, a0, 31 ; RV64I-NEXT: srliw a1, a1, 29 -; RV64I-NEXT: add a1, a0, a1 +; RV64I-NEXT: addw a1, a0, a1 ; RV64I-NEXT: andi a1, a1, -8 ; RV64I-NEXT: subw a0, a0, a1 ; RV64I-NEXT: ret @@ -134,7 +134,7 @@ ; RV64IM: # %bb.0: ; RV64IM-NEXT: sraiw a1, a0, 31 ; RV64IM-NEXT: srliw a1, a1, 29 -; RV64IM-NEXT: add a1, a0, a1 +; RV64IM-NEXT: addw a1, a0, a1 ; RV64IM-NEXT: andi a1, a1, -8 ; RV64IM-NEXT: subw a0, a0, a1 ; RV64IM-NEXT: ret @@ -167,7 +167,7 @@ ; RV64I: # %bb.0: ; RV64I-NEXT: sraiw a1, a0, 31 ; RV64I-NEXT: srliw a1, a1, 16 -; RV64I-NEXT: add a1, a0, a1 +; RV64I-NEXT: addw a1, a0, a1 ; RV64I-NEXT: lui a2, 1048560 ; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: subw a0, a0, a1 @@ -177,7 +177,7 @@ ; RV64IM: # %bb.0: ; RV64IM-NEXT: sraiw a1, a0, 31 ; RV64IM-NEXT: srliw a1, a1, 16 -; RV64IM-NEXT: add a1, a0, a1 +; RV64IM-NEXT: addw a1, a0, a1 ; RV64IM-NEXT: lui a2, 1048560 ; RV64IM-NEXT: and a1, a1, a2 ; RV64IM-NEXT: subw a0, a0, a1 diff --git a/llvm/test/CodeGen/RISCV/rv32zbkb.ll b/llvm/test/CodeGen/RISCV/rv32zbkb.ll --- a/llvm/test/CodeGen/RISCV/rv32zbkb.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbkb.ll @@ -24,16 +24,11 @@ } define i32 @pack_i32_2(i16 zeroext %a, i16 zeroext %b) nounwind { -; RV32I-LABEL: pack_i32_2: -; RV32I: # %bb.0: -; RV32I-NEXT: slli a1, a1, 16 -; RV32I-NEXT: or a0, a1, a0 -; RV32I-NEXT: ret -; -; RV32ZBKB-LABEL: pack_i32_2: -; RV32ZBKB: # %bb.0: -; RV32ZBKB-NEXT: pack a0, a0, a1 -; RV32ZBKB-NEXT: ret +; CHECK-LABEL: pack_i32_2: +; CHECK: # %bb.0: +; CHECK-NEXT: slli a1, a1, 16 +; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: ret %zexta = zext i16 %a to i32 %zextb = zext i16 %b to i32 %shl1 = shl i32 %zextb, 16 @@ -42,18 +37,12 @@ } define i32 @pack_i32_3(i16 zeroext %0, i16 zeroext %1, i32 %2) { -; RV32I-LABEL: pack_i32_3: -; RV32I: # %bb.0: -; RV32I-NEXT: slli a0, a0, 16 -; RV32I-NEXT: or a0, a0, a1 -; RV32I-NEXT: add a0, a0, a2 -; RV32I-NEXT: ret -; -; RV32ZBKB-LABEL: pack_i32_3: -; RV32ZBKB: # %bb.0: -; RV32ZBKB-NEXT: pack a0, a1, a0 -; RV32ZBKB-NEXT: add a0, a0, a2 -; RV32ZBKB-NEXT: ret +; CHECK-LABEL: pack_i32_3: +; CHECK: # %bb.0: +; CHECK-NEXT: slli a0, a0, 16 +; CHECK-NEXT: or a0, a0, a1 +; CHECK-NEXT: add a0, a0, a2 +; CHECK-NEXT: ret %4 = zext i16 %0 to i32 %5 = shl nuw i32 %4, 16 %6 = zext i16 %1 to i32 @@ -149,9 +138,9 @@ ; RV32I-LABEL: packh_i64: ; RV32I: # %bb.0: ; RV32I-NEXT: andi a0, a0, 255 -; RV32I-NEXT: slli a2, a2, 24 -; RV32I-NEXT: srli a2, a2, 16 -; RV32I-NEXT: or a0, a2, a0 +; RV32I-NEXT: slli a1, a2, 24 +; RV32I-NEXT: srli a1, a1, 16 +; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: ret ; @@ -191,16 +180,11 @@ define zeroext i16 @packh_i16(i8 zeroext %a, i8 zeroext %b) nounwind { -; RV32I-LABEL: packh_i16: -; RV32I: # %bb.0: -; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: or a0, a1, a0 -; RV32I-NEXT: ret -; -; RV32ZBKB-LABEL: packh_i16: -; RV32ZBKB: # %bb.0: -; RV32ZBKB-NEXT: packh a0, a0, a1 -; RV32ZBKB-NEXT: ret +; CHECK-LABEL: packh_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: slli a1, a1, 8 +; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: ret %zext = zext i8 %a to i16 %zext1 = zext i8 %b to i16 %shl = shl i16 %zext1, 8 @@ -210,20 +194,14 @@ define zeroext i16 @packh_i16_2(i8 zeroext %0, i8 zeroext %1, i8 zeroext %2) { -; RV32I-LABEL: packh_i16_2: -; RV32I: # %bb.0: -; RV32I-NEXT: add a0, a1, a0 -; RV32I-NEXT: slli a0, a0, 8 -; RV32I-NEXT: or a0, a0, a2 -; RV32I-NEXT: slli a0, a0, 16 -; RV32I-NEXT: srli a0, a0, 16 -; RV32I-NEXT: ret -; -; RV32ZBKB-LABEL: packh_i16_2: -; RV32ZBKB: # %bb.0: -; RV32ZBKB-NEXT: add a0, a1, a0 -; RV32ZBKB-NEXT: packh a0, a2, a0 -; RV32ZBKB-NEXT: ret +; CHECK-LABEL: packh_i16_2: +; CHECK: # %bb.0: +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: slli a0, a0, 8 +; CHECK-NEXT: or a0, a0, a2 +; CHECK-NEXT: slli a0, a0, 16 +; CHECK-NEXT: srli a0, a0, 16 +; CHECK-NEXT: ret %4 = add i8 %1, %0 %5 = zext i8 %4 to i16 %6 = shl i16 %5, 8 diff --git a/llvm/test/CodeGen/RISCV/rv64zbb-zbkb.ll b/llvm/test/CodeGen/RISCV/rv64zbb-zbkb.ll --- a/llvm/test/CodeGen/RISCV/rv64zbb-zbkb.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbb-zbkb.ll @@ -279,7 +279,7 @@ ; RV64I-LABEL: rori_i32_fshl_nosext: ; RV64I: # %bb.0: ; RV64I-NEXT: srliw a2, a0, 1 -; RV64I-NEXT: slli a0, a0, 31 +; RV64I-NEXT: slliw a0, a0, 31 ; RV64I-NEXT: or a0, a0, a2 ; RV64I-NEXT: sw a0, 0(a1) ; RV64I-NEXT: ret @@ -314,7 +314,7 @@ define void @rori_i32_fshr_nosext(i32 signext %a, i32* %x) nounwind { ; RV64I-LABEL: rori_i32_fshr_nosext: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a2, a0, 1 +; RV64I-NEXT: slliw a2, a0, 1 ; RV64I-NEXT: srliw a0, a0, 31 ; RV64I-NEXT: or a0, a0, a2 ; RV64I-NEXT: sw a0, 0(a1) @@ -352,7 +352,7 @@ define i64 @roriw_bug(i64 %x) nounwind { ; CHECK-LABEL: roriw_bug: ; CHECK: # %bb.0: -; CHECK-NEXT: slli a1, a0, 31 +; CHECK-NEXT: slliw a1, a0, 31 ; CHECK-NEXT: andi a2, a0, -2 ; CHECK-NEXT: srli a0, a0, 1 ; CHECK-NEXT: or a0, a1, a0 diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll --- a/llvm/test/CodeGen/RISCV/rv64zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll @@ -220,7 +220,7 @@ ; RV64I-NEXT: srliw a0, a0, 24 ; RV64I-NEXT: xori a0, a0, 31 ; RV64I-NEXT: snez a1, s0 -; RV64I-NEXT: addi a1, a1, -1 +; RV64I-NEXT: addiw a1, a1, -1 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload @@ -232,7 +232,7 @@ ; RV64ZBB-NEXT: clzw a1, a0 ; RV64ZBB-NEXT: xori a1, a1, 31 ; RV64ZBB-NEXT: snez a0, a0 -; RV64ZBB-NEXT: addi a0, a0, -1 +; RV64ZBB-NEXT: addiw a0, a0, -1 ; RV64ZBB-NEXT: or a0, a0, a1 ; RV64ZBB-NEXT: ret %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 true) @@ -436,7 +436,7 @@ ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: lbu a0, 0(a0) ; RV64I-NEXT: snez a1, s0 -; RV64I-NEXT: addi a1, a1, -1 +; RV64I-NEXT: addiw a1, a1, -1 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload @@ -447,7 +447,7 @@ ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: ctzw a1, a0 ; RV64ZBB-NEXT: snez a0, a0 -; RV64ZBB-NEXT: addi a0, a0, -1 +; RV64ZBB-NEXT: addiw a0, a0, -1 ; RV64ZBB-NEXT: or a0, a0, a1 ; RV64ZBB-NEXT: ret %1 = call i32 @llvm.cttz.i32(i32 %a, i1 true) @@ -475,7 +475,7 @@ ; RV64I-NEXT: lbu a0, 0(a0) ; RV64I-NEXT: addi a0, a0, 1 ; RV64I-NEXT: seqz a1, s0 -; RV64I-NEXT: addi a1, a1, -1 +; RV64I-NEXT: addiw a1, a1, -1 ; RV64I-NEXT: and a0, a1, a0 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload @@ -487,7 +487,7 @@ ; RV64ZBB-NEXT: ctzw a1, a0 ; RV64ZBB-NEXT: addi a1, a1, 1 ; RV64ZBB-NEXT: seqz a0, a0 -; RV64ZBB-NEXT: addi a0, a0, -1 +; RV64ZBB-NEXT: addiw a0, a0, -1 ; RV64ZBB-NEXT: and a0, a0, a1 ; RV64ZBB-NEXT: ret %1 = call i32 @llvm.cttz.i32(i32 %a, i1 true) @@ -870,7 +870,7 @@ ; RV64ZBB-LABEL: abs_i32: ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: sext.w a0, a0 -; RV64ZBB-NEXT: negw a1, a0 +; RV64ZBB-NEXT: neg a1, a0 ; RV64ZBB-NEXT: max a0, a0, a1 ; RV64ZBB-NEXT: ret %abs = tail call i32 @llvm.abs.i32(i32 %x, i1 true) @@ -952,10 +952,11 @@ ; RV64I-NEXT: lui a2, 16 ; RV64I-NEXT: addiw a2, a2, -256 ; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srliw a3, a0, 24 -; RV64I-NEXT: or a1, a1, a3 -; RV64I-NEXT: and a2, a0, a2 -; RV64I-NEXT: slli a2, a2, 8 +; RV64I-NEXT: srliw a2, a0, 24 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: slli a2, a0, 8 +; RV64I-NEXT: lui a3, 4080 +; RV64I-NEXT: and a2, a2, a3 ; RV64I-NEXT: slliw a0, a0, 24 ; RV64I-NEXT: or a0, a0, a2 ; RV64I-NEXT: or a0, a0, a1 @@ -978,11 +979,12 @@ ; RV64I-NEXT: lui a3, 16 ; RV64I-NEXT: addiw a3, a3, -256 ; RV64I-NEXT: and a2, a2, a3 -; RV64I-NEXT: srliw a4, a0, 24 -; RV64I-NEXT: or a2, a2, a4 -; RV64I-NEXT: and a3, a0, a3 -; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: slli a0, a0, 24 +; RV64I-NEXT: srliw a3, a0, 24 +; RV64I-NEXT: or a2, a2, a3 +; RV64I-NEXT: slliw a3, a0, 8 +; RV64I-NEXT: lui a4, 4080 +; RV64I-NEXT: and a3, a3, a4 +; RV64I-NEXT: slliw a0, a0, 24 ; RV64I-NEXT: or a0, a0, a3 ; RV64I-NEXT: or a0, a0, a2 ; RV64I-NEXT: sw a0, 0(a1) @@ -1004,31 +1006,34 @@ define i64 @bswap_i64(i64 %a) { ; RV64I-LABEL: bswap_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: srli a1, a0, 40 -; RV64I-NEXT: lui a2, 16 -; RV64I-NEXT: addiw a2, a2, -256 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srli a3, a0, 56 +; RV64I-NEXT: slli a1, a0, 24 +; RV64I-NEXT: li a2, 255 +; RV64I-NEXT: slli a3, a2, 40 +; RV64I-NEXT: and a1, a1, a3 +; RV64I-NEXT: srliw a3, a0, 24 +; RV64I-NEXT: slli a3, a3, 32 ; RV64I-NEXT: or a1, a1, a3 +; RV64I-NEXT: slli a3, a0, 40 +; RV64I-NEXT: slli a2, a2, 48 +; RV64I-NEXT: and a2, a3, a2 +; RV64I-NEXT: slli a3, a0, 56 +; RV64I-NEXT: or a2, a3, a2 +; RV64I-NEXT: or a1, a2, a1 +; RV64I-NEXT: srli a2, a0, 40 +; RV64I-NEXT: lui a3, 16 +; RV64I-NEXT: addiw a3, a3, -256 +; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: srli a3, a0, 56 +; RV64I-NEXT: or a2, a2, a3 ; RV64I-NEXT: srli a3, a0, 24 ; RV64I-NEXT: lui a4, 4080 ; RV64I-NEXT: and a3, a3, a4 -; RV64I-NEXT: srli a5, a0, 8 -; RV64I-NEXT: srliw a5, a5, 24 -; RV64I-NEXT: slli a5, a5, 24 -; RV64I-NEXT: or a3, a5, a3 -; RV64I-NEXT: or a1, a3, a1 -; RV64I-NEXT: and a4, a0, a4 -; RV64I-NEXT: slli a4, a4, 24 -; RV64I-NEXT: srliw a3, a0, 24 -; RV64I-NEXT: slli a3, a3, 32 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: and a2, a0, a2 -; RV64I-NEXT: slli a2, a2, 40 -; RV64I-NEXT: slli a0, a0, 56 -; RV64I-NEXT: or a0, a0, a2 +; RV64I-NEXT: srli a0, a0, 8 +; RV64I-NEXT: srliw a0, a0, 24 +; RV64I-NEXT: slli a0, a0, 24 ; RV64I-NEXT: or a0, a0, a3 -; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: or a0, a0, a2 +; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret ; ; RV64ZBB-LABEL: bswap_i64: diff --git a/llvm/test/CodeGen/RISCV/rv64zbkb.ll b/llvm/test/CodeGen/RISCV/rv64zbkb.ll --- a/llvm/test/CodeGen/RISCV/rv64zbkb.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbkb.ll @@ -32,7 +32,8 @@ ; ; RV64ZBKB-LABEL: pack_i32_2: ; RV64ZBKB: # %bb.0: -; RV64ZBKB-NEXT: packw a0, a0, a1 +; RV64ZBKB-NEXT: slliw a1, a1, 16 +; RV64ZBKB-NEXT: or a0, a1, a0 ; RV64ZBKB-NEXT: ret %zexta = zext i16 %a to i32 %zextb = zext i16 %b to i32 @@ -45,14 +46,15 @@ define signext i32 @pack_i32_3(i16 zeroext %0, i16 zeroext %1, i32 signext %2) { ; RV64I-LABEL: pack_i32_3: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a0, a0, 16 +; RV64I-NEXT: slliw a0, a0, 16 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: addw a0, a0, a2 ; RV64I-NEXT: ret ; ; RV64ZBKB-LABEL: pack_i32_3: ; RV64ZBKB: # %bb.0: -; RV64ZBKB-NEXT: packw a0, a1, a0 +; RV64ZBKB-NEXT: slliw a0, a0, 16 +; RV64ZBKB-NEXT: or a0, a0, a1 ; RV64ZBKB-NEXT: addw a0, a0, a2 ; RV64ZBKB-NEXT: ret %4 = zext i16 %0 to i32 @@ -115,7 +117,8 @@ ; RV64ZBKB: # %bb.0: ; RV64ZBKB-NEXT: lw a0, 0(a0) ; RV64ZBKB-NEXT: lwu a1, 0(a1) -; RV64ZBKB-NEXT: pack a0, a1, a0 +; RV64ZBKB-NEXT: slli a0, a0, 32 +; RV64ZBKB-NEXT: or a0, a0, a1 ; RV64ZBKB-NEXT: ret %3 = load i32, ptr %0, align 4 %4 = zext i32 %3 to i64 @@ -215,7 +218,8 @@ ; ; RV64ZBKB-LABEL: packh_i16: ; RV64ZBKB: # %bb.0: -; RV64ZBKB-NEXT: packh a0, a0, a1 +; RV64ZBKB-NEXT: slli a1, a1, 8 +; RV64ZBKB-NEXT: or a0, a1, a0 ; RV64ZBKB-NEXT: ret %zext = zext i8 %a to i16 %zext1 = zext i8 %b to i16 @@ -227,8 +231,8 @@ define zeroext i16 @packh_i16_2(i8 zeroext %0, i8 zeroext %1, i8 zeroext %2) { ; RV64I-LABEL: packh_i16_2: ; RV64I: # %bb.0: -; RV64I-NEXT: add a0, a1, a0 -; RV64I-NEXT: slli a0, a0, 8 +; RV64I-NEXT: addw a0, a1, a0 +; RV64I-NEXT: slliw a0, a0, 8 ; RV64I-NEXT: or a0, a0, a2 ; RV64I-NEXT: slli a0, a0, 48 ; RV64I-NEXT: srli a0, a0, 48 @@ -237,7 +241,10 @@ ; RV64ZBKB-LABEL: packh_i16_2: ; RV64ZBKB: # %bb.0: ; RV64ZBKB-NEXT: addw a0, a1, a0 -; RV64ZBKB-NEXT: packh a0, a2, a0 +; RV64ZBKB-NEXT: slliw a0, a0, 8 +; RV64ZBKB-NEXT: or a0, a0, a2 +; RV64ZBKB-NEXT: slli a0, a0, 48 +; RV64ZBKB-NEXT: srli a0, a0, 48 ; RV64ZBKB-NEXT: ret %4 = add i8 %1, %0 %5 = zext i8 %4 to i16 @@ -252,14 +259,14 @@ ; RV64I: # %bb.0: ; RV64I-NEXT: addw a0, a1, a0 ; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: slli a2, a2, 32 -; RV64I-NEXT: srli a2, a2, 32 -; RV64I-NEXT: or a0, a0, a2 +; RV64I-NEXT: slli a1, a2, 32 +; RV64I-NEXT: srli a1, a1, 32 +; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64ZBKB-LABEL: pack_i64_allWUsers: ; RV64ZBKB: # %bb.0: -; RV64ZBKB-NEXT: addw a0, a1, a0 +; RV64ZBKB-NEXT: add a0, a1, a0 ; RV64ZBKB-NEXT: pack a0, a2, a0 ; RV64ZBKB-NEXT: ret %4 = add i32 %1, %0 @@ -281,7 +288,8 @@ ; RV64ZBKB-LABEL: pack_i32_allWUsers: ; RV64ZBKB: # %bb.0: ; RV64ZBKB-NEXT: addw a0, a1, a0 -; RV64ZBKB-NEXT: packw a0, a2, a0 +; RV64ZBKB-NEXT: slliw a0, a0, 16 +; RV64ZBKB-NEXT: or a0, a0, a2 ; RV64ZBKB-NEXT: ret %4 = add i16 %1, %0 %5 = zext i16 %4 to i32 diff --git a/llvm/test/CodeGen/RISCV/rvv/access-fixed-objects-by-rvv.ll b/llvm/test/CodeGen/RISCV/rvv/access-fixed-objects-by-rvv.ll --- a/llvm/test/CodeGen/RISCV/rvv/access-fixed-objects-by-rvv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/access-fixed-objects-by-rvv.ll @@ -36,7 +36,7 @@ ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 1 ; RV64IV-NEXT: sub sp, sp, a0 -; RV64IV-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x90, 0x04, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 528 + 2 * vlenb +; RV64IV-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x90, 0x04, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 528 + 2 * VLENB ; RV64IV-NEXT: addi a0, sp, 8 ; RV64IV-NEXT: vl1re64.v v8, (a0) ; RV64IV-NEXT: addi a0, sp, 528 @@ -47,6 +47,7 @@ ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 1 ; RV64IV-NEXT: add sp, sp, a0 +; RV64IV-NEXT: .cfi_def_cfa_offset 528 ; RV64IV-NEXT: addi sp, sp, 528 ; RV64IV-NEXT: ret %local = alloca i64 diff --git a/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll @@ -579,8 +579,9 @@ ; RV32-NEXT: vand.vx v9, v9, a0 ; RV32-NEXT: vsrl.vi v10, v8, 24 ; RV32-NEXT: vor.vv v9, v9, v10 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vsll.vi v10, v10, 8 +; RV32-NEXT: vsll.vi v10, v8, 8 +; RV32-NEXT: lui a0, 4080 +; RV32-NEXT: vand.vx v10, v10, a0 ; RV32-NEXT: vsll.vi v8, v8, 24 ; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: vor.vv v8, v8, v9 @@ -616,8 +617,9 @@ ; RV64-NEXT: vand.vx v9, v9, a0 ; RV64-NEXT: vsrl.vi v10, v8, 24 ; RV64-NEXT: vor.vv v9, v9, v10 -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vsll.vi v10, v10, 8 +; RV64-NEXT: vsll.vi v10, v8, 8 +; RV64-NEXT: lui a0, 4080 +; RV64-NEXT: vand.vx v10, v10, a0 ; RV64-NEXT: vsll.vi v8, v8, 24 ; RV64-NEXT: vor.vv v8, v8, v10 ; RV64-NEXT: vor.vv v8, v8, v9 @@ -658,8 +660,9 @@ ; RV32-NEXT: vand.vx v9, v9, a0 ; RV32-NEXT: vsrl.vi v10, v8, 24 ; RV32-NEXT: vor.vv v9, v9, v10 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vsll.vi v10, v10, 8 +; RV32-NEXT: vsll.vi v10, v8, 8 +; RV32-NEXT: lui a0, 4080 +; RV32-NEXT: vand.vx v10, v10, a0 ; RV32-NEXT: vsll.vi v8, v8, 24 ; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: vor.vv v8, v8, v9 @@ -695,8 +698,9 @@ ; RV64-NEXT: vand.vx v9, v9, a0 ; RV64-NEXT: vsrl.vi v10, v8, 24 ; RV64-NEXT: vor.vv v9, v9, v10 -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vsll.vi v10, v10, 8 +; RV64-NEXT: vsll.vi v10, v8, 8 +; RV64-NEXT: lui a0, 4080 +; RV64-NEXT: vand.vx v10, v10, a0 ; RV64-NEXT: vsll.vi v8, v8, 24 ; RV64-NEXT: vor.vv v8, v8, v10 ; RV64-NEXT: vor.vv v8, v8, v9 @@ -737,8 +741,9 @@ ; RV32-NEXT: vand.vx v10, v10, a0 ; RV32-NEXT: vsrl.vi v12, v8, 24 ; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: vand.vx v12, v8, a0 -; RV32-NEXT: vsll.vi v12, v12, 8 +; RV32-NEXT: vsll.vi v12, v8, 8 +; RV32-NEXT: lui a0, 4080 +; RV32-NEXT: vand.vx v12, v12, a0 ; RV32-NEXT: vsll.vi v8, v8, 24 ; RV32-NEXT: vor.vv v8, v8, v12 ; RV32-NEXT: vor.vv v8, v8, v10 @@ -774,8 +779,9 @@ ; RV64-NEXT: vand.vx v10, v10, a0 ; RV64-NEXT: vsrl.vi v12, v8, 24 ; RV64-NEXT: vor.vv v10, v10, v12 -; RV64-NEXT: vand.vx v12, v8, a0 -; RV64-NEXT: vsll.vi v12, v12, 8 +; RV64-NEXT: vsll.vi v12, v8, 8 +; RV64-NEXT: lui a0, 4080 +; RV64-NEXT: vand.vx v12, v12, a0 ; RV64-NEXT: vsll.vi v8, v8, 24 ; RV64-NEXT: vor.vv v8, v8, v12 ; RV64-NEXT: vor.vv v8, v8, v10 @@ -816,8 +822,9 @@ ; RV32-NEXT: vand.vx v12, v12, a0 ; RV32-NEXT: vsrl.vi v16, v8, 24 ; RV32-NEXT: vor.vv v12, v12, v16 -; RV32-NEXT: vand.vx v16, v8, a0 -; RV32-NEXT: vsll.vi v16, v16, 8 +; RV32-NEXT: vsll.vi v16, v8, 8 +; RV32-NEXT: lui a0, 4080 +; RV32-NEXT: vand.vx v16, v16, a0 ; RV32-NEXT: vsll.vi v8, v8, 24 ; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: vor.vv v8, v8, v12 @@ -853,8 +860,9 @@ ; RV64-NEXT: vand.vx v12, v12, a0 ; RV64-NEXT: vsrl.vi v16, v8, 24 ; RV64-NEXT: vor.vv v12, v12, v16 -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsll.vi v16, v16, 8 +; RV64-NEXT: vsll.vi v16, v8, 8 +; RV64-NEXT: lui a0, 4080 +; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vsll.vi v8, v8, 24 ; RV64-NEXT: vor.vv v8, v8, v16 ; RV64-NEXT: vor.vv v8, v8, v12 @@ -895,8 +903,9 @@ ; RV32-NEXT: vand.vx v16, v16, a0 ; RV32-NEXT: vsrl.vi v24, v8, 24 ; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: vand.vx v24, v8, a0 -; RV32-NEXT: vsll.vi v24, v24, 8 +; RV32-NEXT: vsll.vi v24, v8, 8 +; RV32-NEXT: lui a0, 4080 +; RV32-NEXT: vand.vx v24, v24, a0 ; RV32-NEXT: vsll.vi v8, v8, 24 ; RV32-NEXT: vor.vv v8, v8, v24 ; RV32-NEXT: vor.vv v8, v8, v16 @@ -932,8 +941,9 @@ ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vsrl.vi v24, v8, 24 ; RV64-NEXT: vor.vv v16, v16, v24 -; RV64-NEXT: vand.vx v24, v8, a0 -; RV64-NEXT: vsll.vi v24, v24, 8 +; RV64-NEXT: vsll.vi v24, v8, 8 +; RV64-NEXT: lui a0, 4080 +; RV64-NEXT: vand.vx v24, v24, a0 ; RV64-NEXT: vsll.vi v8, v8, 24 ; RV64-NEXT: vor.vv v8, v8, v24 ; RV64-NEXT: vor.vv v8, v8, v16 @@ -972,58 +982,66 @@ ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a0, 1044480 ; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 +; RV32-NEXT: lui a0, 4080 ; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v9, v8, a0 -; RV32-NEXT: li a1, 40 -; RV32-NEXT: vsrl.vx v10, v8, a1 -; RV32-NEXT: lui a2, 16 -; RV32-NEXT: addi a2, a2, -256 -; RV32-NEXT: vand.vx v10, v10, a2 +; RV32-NEXT: sw zero, 8(sp) +; RV32-NEXT: li a1, 255 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: lui a1, 16 +; RV32-NEXT: addi a1, a1, -256 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: lui a2, 61681 +; RV32-NEXT: addi a2, a2, -241 +; RV32-NEXT: sw a2, 12(sp) +; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: lui a2, 209715 +; RV32-NEXT: addi a2, a2, 819 +; RV32-NEXT: sw a2, 12(sp) +; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: lui a2, 349525 +; RV32-NEXT: addi a2, a2, 1365 +; RV32-NEXT: sw a2, 12(sp) +; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: li a2, 56 +; RV32-NEXT: vsetvli a3, zero, e64, m1, ta, ma +; RV32-NEXT: vsrl.vx v9, v8, a2 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsrl.vx v10, v8, a3 +; RV32-NEXT: vand.vx v10, v10, a1 ; RV32-NEXT: vor.vv v9, v10, v9 -; RV32-NEXT: vsrl.vi v10, v8, 24 -; RV32-NEXT: addi a3, sp, 8 -; RV32-NEXT: vlse64.v v11, (a3), zero -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v10, v10, a4 +; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: vsrl.vi v11, v8, 24 +; RV32-NEXT: vand.vx v11, v11, a0 ; RV32-NEXT: vsrl.vi v12, v8, 8 -; RV32-NEXT: vand.vv v12, v12, v11 -; RV32-NEXT: vor.vv v10, v12, v10 +; RV32-NEXT: vand.vv v10, v12, v10 +; RV32-NEXT: vor.vv v10, v10, v11 +; RV32-NEXT: vlse64.v v11, (a1), zero ; RV32-NEXT: vor.vv v9, v10, v9 -; RV32-NEXT: vsll.vx v10, v8, a0 -; RV32-NEXT: vand.vx v12, v8, a2 -; RV32-NEXT: vsll.vx v12, v12, a1 -; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: vand.vx v12, v8, a4 -; RV32-NEXT: vsll.vi v12, v12, 24 +; RV32-NEXT: vsll.vx v10, v8, a2 +; RV32-NEXT: vsll.vx v12, v8, a3 +; RV32-NEXT: vand.vv v11, v12, v11 +; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: vor.vv v10, v10, v11 +; RV32-NEXT: vlse64.v v11, (a1), zero +; RV32-NEXT: vsll.vi v13, v8, 8 +; RV32-NEXT: vand.vv v12, v13, v12 +; RV32-NEXT: vsll.vi v8, v8, 24 ; RV32-NEXT: vand.vv v8, v8, v11 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: vlse64.v v11, (a3), zero +; RV32-NEXT: vor.vv v8, v8, v12 +; RV32-NEXT: vlse64.v v11, (a1), zero ; RV32-NEXT: vor.vv v8, v10, v8 ; RV32-NEXT: vor.vv v8, v8, v9 ; RV32-NEXT: vsrl.vi v9, v8, 4 ; RV32-NEXT: vand.vv v9, v9, v11 ; RV32-NEXT: vand.vv v8, v8, v11 -; RV32-NEXT: vlse64.v v10, (a3), zero +; RV32-NEXT: vlse64.v v10, (a1), zero ; RV32-NEXT: vsll.vi v8, v8, 4 ; RV32-NEXT: vor.vv v8, v9, v8 ; RV32-NEXT: vsrl.vi v9, v8, 2 ; RV32-NEXT: vand.vv v9, v9, v10 ; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: vlse64.v v10, (a3), zero +; RV32-NEXT: vlse64.v v10, (a1), zero ; RV32-NEXT: vsll.vi v8, v8, 2 ; RV32-NEXT: vor.vv v8, v9, v8 ; RV32-NEXT: vsrl.vi v9, v8, 1 @@ -1046,22 +1064,25 @@ ; RV64-NEXT: vand.vx v10, v10, a2 ; RV64-NEXT: vor.vv v9, v10, v9 ; RV64-NEXT: vsrl.vi v10, v8, 24 -; RV64-NEXT: lui a3, 4080 -; RV64-NEXT: vand.vx v10, v10, a3 +; RV64-NEXT: lui a2, 4080 +; RV64-NEXT: vand.vx v10, v10, a2 ; RV64-NEXT: vsrl.vi v11, v8, 8 -; RV64-NEXT: li a4, 255 -; RV64-NEXT: slli a4, a4, 24 -; RV64-NEXT: vand.vx v11, v11, a4 +; RV64-NEXT: li a2, 255 +; RV64-NEXT: slli a3, a2, 24 +; RV64-NEXT: vand.vx v11, v11, a3 ; RV64-NEXT: vor.vv v10, v11, v10 ; RV64-NEXT: vor.vv v9, v10, v9 -; RV64-NEXT: vand.vx v10, v8, a3 -; RV64-NEXT: vsll.vi v10, v10, 24 -; RV64-NEXT: vand.vx v11, v8, a4 -; RV64-NEXT: vsll.vi v11, v11, 8 -; RV64-NEXT: vor.vv v10, v10, v11 +; RV64-NEXT: vsll.vi v10, v8, 8 +; RV64-NEXT: slli a3, a2, 32 +; RV64-NEXT: vand.vx v10, v10, a3 +; RV64-NEXT: vsll.vi v11, v8, 24 +; RV64-NEXT: slli a3, a2, 40 +; RV64-NEXT: vand.vx v11, v11, a3 +; RV64-NEXT: vor.vv v10, v11, v10 ; RV64-NEXT: vsll.vx v11, v8, a0 -; RV64-NEXT: vand.vx v8, v8, a2 ; RV64-NEXT: vsll.vx v8, v8, a1 +; RV64-NEXT: slli a0, a2, 48 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vor.vv v8, v11, v8 ; RV64-NEXT: lui a0, %hi(.LCPI18_0) ; RV64-NEXT: ld a0, %lo(.LCPI18_0)(a0) @@ -1100,58 +1121,66 @@ ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a0, 1044480 ; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 +; RV32-NEXT: lui a0, 4080 ; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma -; RV32-NEXT: vsrl.vx v10, v8, a0 -; RV32-NEXT: li a1, 40 -; RV32-NEXT: vsrl.vx v12, v8, a1 -; RV32-NEXT: lui a2, 16 -; RV32-NEXT: addi a2, a2, -256 -; RV32-NEXT: vand.vx v12, v12, a2 +; RV32-NEXT: sw zero, 8(sp) +; RV32-NEXT: li a1, 255 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: lui a1, 16 +; RV32-NEXT: addi a1, a1, -256 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: lui a2, 61681 +; RV32-NEXT: addi a2, a2, -241 +; RV32-NEXT: sw a2, 12(sp) +; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: lui a2, 209715 +; RV32-NEXT: addi a2, a2, 819 +; RV32-NEXT: sw a2, 12(sp) +; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: lui a2, 349525 +; RV32-NEXT: addi a2, a2, 1365 +; RV32-NEXT: sw a2, 12(sp) +; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: li a2, 56 +; RV32-NEXT: vsetvli a3, zero, e64, m2, ta, ma +; RV32-NEXT: vsrl.vx v10, v8, a2 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsrl.vx v12, v8, a3 +; RV32-NEXT: vand.vx v12, v12, a1 ; RV32-NEXT: vor.vv v10, v12, v10 -; RV32-NEXT: vsrl.vi v12, v8, 24 -; RV32-NEXT: addi a3, sp, 8 -; RV32-NEXT: vlse64.v v14, (a3), zero -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v12, v12, a4 +; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: vsrl.vi v14, v8, 24 +; RV32-NEXT: vand.vx v14, v14, a0 ; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: vand.vv v16, v16, v14 -; RV32-NEXT: vor.vv v12, v16, v12 +; RV32-NEXT: vand.vv v12, v16, v12 +; RV32-NEXT: vor.vv v12, v12, v14 +; RV32-NEXT: vlse64.v v14, (a1), zero ; RV32-NEXT: vor.vv v10, v12, v10 -; RV32-NEXT: vsll.vx v12, v8, a0 -; RV32-NEXT: vand.vx v16, v8, a2 -; RV32-NEXT: vsll.vx v16, v16, a1 -; RV32-NEXT: vor.vv v12, v12, v16 -; RV32-NEXT: vand.vx v16, v8, a4 -; RV32-NEXT: vsll.vi v16, v16, 24 +; RV32-NEXT: vsll.vx v12, v8, a2 +; RV32-NEXT: vsll.vx v16, v8, a3 +; RV32-NEXT: vand.vv v14, v16, v14 +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vor.vv v12, v12, v14 +; RV32-NEXT: vlse64.v v14, (a1), zero +; RV32-NEXT: vsll.vi v18, v8, 8 +; RV32-NEXT: vand.vv v16, v18, v16 +; RV32-NEXT: vsll.vi v8, v8, 24 ; RV32-NEXT: vand.vv v8, v8, v14 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vlse64.v v14, (a3), zero +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vlse64.v v14, (a1), zero ; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: vsrl.vi v10, v8, 4 ; RV32-NEXT: vand.vv v10, v10, v14 ; RV32-NEXT: vand.vv v8, v8, v14 -; RV32-NEXT: vlse64.v v12, (a3), zero +; RV32-NEXT: vlse64.v v12, (a1), zero ; RV32-NEXT: vsll.vi v8, v8, 4 ; RV32-NEXT: vor.vv v8, v10, v8 ; RV32-NEXT: vsrl.vi v10, v8, 2 ; RV32-NEXT: vand.vv v10, v10, v12 ; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: vlse64.v v12, (a3), zero +; RV32-NEXT: vlse64.v v12, (a1), zero ; RV32-NEXT: vsll.vi v8, v8, 2 ; RV32-NEXT: vor.vv v8, v10, v8 ; RV32-NEXT: vsrl.vi v10, v8, 1 @@ -1174,22 +1203,25 @@ ; RV64-NEXT: vand.vx v12, v12, a2 ; RV64-NEXT: vor.vv v10, v12, v10 ; RV64-NEXT: vsrl.vi v12, v8, 24 -; RV64-NEXT: lui a3, 4080 -; RV64-NEXT: vand.vx v12, v12, a3 +; RV64-NEXT: lui a2, 4080 +; RV64-NEXT: vand.vx v12, v12, a2 ; RV64-NEXT: vsrl.vi v14, v8, 8 -; RV64-NEXT: li a4, 255 -; RV64-NEXT: slli a4, a4, 24 -; RV64-NEXT: vand.vx v14, v14, a4 +; RV64-NEXT: li a2, 255 +; RV64-NEXT: slli a3, a2, 24 +; RV64-NEXT: vand.vx v14, v14, a3 ; RV64-NEXT: vor.vv v12, v14, v12 ; RV64-NEXT: vor.vv v10, v12, v10 -; RV64-NEXT: vand.vx v12, v8, a3 -; RV64-NEXT: vsll.vi v12, v12, 24 -; RV64-NEXT: vand.vx v14, v8, a4 -; RV64-NEXT: vsll.vi v14, v14, 8 -; RV64-NEXT: vor.vv v12, v12, v14 +; RV64-NEXT: vsll.vi v12, v8, 8 +; RV64-NEXT: slli a3, a2, 32 +; RV64-NEXT: vand.vx v12, v12, a3 +; RV64-NEXT: vsll.vi v14, v8, 24 +; RV64-NEXT: slli a3, a2, 40 +; RV64-NEXT: vand.vx v14, v14, a3 +; RV64-NEXT: vor.vv v12, v14, v12 ; RV64-NEXT: vsll.vx v14, v8, a0 -; RV64-NEXT: vand.vx v8, v8, a2 ; RV64-NEXT: vsll.vx v8, v8, a1 +; RV64-NEXT: slli a0, a2, 48 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vor.vv v8, v14, v8 ; RV64-NEXT: lui a0, %hi(.LCPI19_0) ; RV64-NEXT: ld a0, %lo(.LCPI19_0)(a0) @@ -1228,58 +1260,66 @@ ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a0, 1044480 ; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 +; RV32-NEXT: lui a0, 4080 ; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma -; RV32-NEXT: vsrl.vx v12, v8, a0 -; RV32-NEXT: li a1, 40 -; RV32-NEXT: vsrl.vx v16, v8, a1 -; RV32-NEXT: lui a2, 16 -; RV32-NEXT: addi a2, a2, -256 -; RV32-NEXT: vand.vx v16, v16, a2 +; RV32-NEXT: sw zero, 8(sp) +; RV32-NEXT: li a1, 255 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: lui a1, 16 +; RV32-NEXT: addi a1, a1, -256 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: lui a2, 61681 +; RV32-NEXT: addi a2, a2, -241 +; RV32-NEXT: sw a2, 12(sp) +; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: lui a2, 209715 +; RV32-NEXT: addi a2, a2, 819 +; RV32-NEXT: sw a2, 12(sp) +; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: lui a2, 349525 +; RV32-NEXT: addi a2, a2, 1365 +; RV32-NEXT: sw a2, 12(sp) +; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: li a2, 56 +; RV32-NEXT: vsetvli a3, zero, e64, m4, ta, ma +; RV32-NEXT: vsrl.vx v12, v8, a2 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsrl.vx v16, v8, a3 +; RV32-NEXT: vand.vx v16, v16, a1 ; RV32-NEXT: vor.vv v12, v16, v12 -; RV32-NEXT: vsrl.vi v16, v8, 24 -; RV32-NEXT: addi a3, sp, 8 -; RV32-NEXT: vlse64.v v20, (a3), zero -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v16, a4 +; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vsrl.vi v20, v8, 24 +; RV32-NEXT: vand.vx v20, v20, a0 ; RV32-NEXT: vsrl.vi v24, v8, 8 -; RV32-NEXT: vand.vv v24, v24, v20 -; RV32-NEXT: vor.vv v16, v24, v16 +; RV32-NEXT: vand.vv v16, v24, v16 +; RV32-NEXT: vor.vv v16, v16, v20 +; RV32-NEXT: vlse64.v v20, (a1), zero ; RV32-NEXT: vor.vv v12, v16, v12 -; RV32-NEXT: vsll.vx v16, v8, a0 -; RV32-NEXT: vand.vx v24, v8, a2 -; RV32-NEXT: vsll.vx v24, v24, a1 -; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: vand.vx v24, v8, a4 -; RV32-NEXT: vsll.vi v24, v24, 24 +; RV32-NEXT: vsll.vx v16, v8, a2 +; RV32-NEXT: vsll.vx v24, v8, a3 +; RV32-NEXT: vand.vv v20, v24, v20 +; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: vor.vv v16, v16, v20 +; RV32-NEXT: vlse64.v v20, (a1), zero +; RV32-NEXT: vsll.vi v28, v8, 8 +; RV32-NEXT: vand.vv v24, v28, v24 +; RV32-NEXT: vsll.vi v8, v8, 24 ; RV32-NEXT: vand.vv v8, v8, v20 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v24, v8 -; RV32-NEXT: vlse64.v v20, (a3), zero +; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: vlse64.v v20, (a1), zero ; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vor.vv v8, v8, v12 ; RV32-NEXT: vsrl.vi v12, v8, 4 ; RV32-NEXT: vand.vv v12, v12, v20 ; RV32-NEXT: vand.vv v8, v8, v20 -; RV32-NEXT: vlse64.v v16, (a3), zero +; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsll.vi v8, v8, 4 ; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: vsrl.vi v12, v8, 2 ; RV32-NEXT: vand.vv v12, v12, v16 ; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vlse64.v v16, (a3), zero +; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsll.vi v8, v8, 2 ; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: vsrl.vi v12, v8, 1 @@ -1302,22 +1342,25 @@ ; RV64-NEXT: vand.vx v16, v16, a2 ; RV64-NEXT: vor.vv v12, v16, v12 ; RV64-NEXT: vsrl.vi v16, v8, 24 -; RV64-NEXT: lui a3, 4080 -; RV64-NEXT: vand.vx v16, v16, a3 +; RV64-NEXT: lui a2, 4080 +; RV64-NEXT: vand.vx v16, v16, a2 ; RV64-NEXT: vsrl.vi v20, v8, 8 -; RV64-NEXT: li a4, 255 -; RV64-NEXT: slli a4, a4, 24 -; RV64-NEXT: vand.vx v20, v20, a4 +; RV64-NEXT: li a2, 255 +; RV64-NEXT: slli a3, a2, 24 +; RV64-NEXT: vand.vx v20, v20, a3 ; RV64-NEXT: vor.vv v16, v20, v16 ; RV64-NEXT: vor.vv v12, v16, v12 -; RV64-NEXT: vand.vx v16, v8, a3 -; RV64-NEXT: vsll.vi v16, v16, 24 -; RV64-NEXT: vand.vx v20, v8, a4 -; RV64-NEXT: vsll.vi v20, v20, 8 -; RV64-NEXT: vor.vv v16, v16, v20 +; RV64-NEXT: vsll.vi v16, v8, 8 +; RV64-NEXT: slli a3, a2, 32 +; RV64-NEXT: vand.vx v16, v16, a3 +; RV64-NEXT: vsll.vi v20, v8, 24 +; RV64-NEXT: slli a3, a2, 40 +; RV64-NEXT: vand.vx v20, v20, a3 +; RV64-NEXT: vor.vv v16, v20, v16 ; RV64-NEXT: vsll.vx v20, v8, a0 -; RV64-NEXT: vand.vx v8, v8, a2 ; RV64-NEXT: vsll.vx v8, v8, a1 +; RV64-NEXT: slli a0, a2, 48 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vor.vv v8, v20, v8 ; RV64-NEXT: lui a0, %hi(.LCPI20_0) ; RV64-NEXT: ld a0, %lo(.LCPI20_0)(a0) @@ -1354,71 +1397,96 @@ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: sub sp, sp, a0 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * VLENB ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a0, 1044480 ; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 +; RV32-NEXT: lui a0, 4080 ; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vsrl.vx v16, v8, a0 -; RV32-NEXT: li a1, 40 -; RV32-NEXT: vsrl.vx v24, v8, a1 -; RV32-NEXT: lui a2, 16 -; RV32-NEXT: addi a2, a2, -256 -; RV32-NEXT: vand.vx v24, v24, a2 -; RV32-NEXT: vor.vv v16, v24, v16 -; RV32-NEXT: addi a3, sp, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v0, v8, 24 -; RV32-NEXT: addi a3, sp, 8 -; RV32-NEXT: vlse64.v v24, (a3), zero -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v0, v0, a4 -; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: sw zero, 8(sp) +; RV32-NEXT: li a1, 255 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: lui a1, 16 +; RV32-NEXT: addi a1, a1, -256 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: lui a2, 61681 +; RV32-NEXT: addi a2, a2, -241 +; RV32-NEXT: sw a2, 12(sp) +; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: lui a2, 209715 +; RV32-NEXT: addi a2, a2, 819 +; RV32-NEXT: sw a2, 12(sp) +; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: lui a2, 349525 +; RV32-NEXT: addi a2, a2, 1365 +; RV32-NEXT: sw a2, 12(sp) +; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: li a2, 56 +; RV32-NEXT: vsetvli a3, zero, e64, m8, ta, ma +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsrl.vx v16, v8, a3 +; RV32-NEXT: vand.vx v16, v16, a1 +; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: vsrl.vx v0, v8, a2 ; RV32-NEXT: vor.vv v16, v16, v0 -; RV32-NEXT: addi a5, sp, 16 -; RV32-NEXT: vl8r.v v0, (a5) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v16, v0 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; RV32-NEXT: vand.vx v0, v8, a2 -; RV32-NEXT: vsll.vx v0, v0, a1 -; RV32-NEXT: vsll.vx v16, v8, a0 -; RV32-NEXT: vor.vv v0, v16, v0 -; RV32-NEXT: vand.vv v16, v8, v24 -; RV32-NEXT: vand.vx v8, v8, a4 +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 3 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v0, v8, 8 +; RV32-NEXT: vand.vv v24, v0, v24 +; RV32-NEXT: vsrl.vi v0, v8, 24 +; RV32-NEXT: vand.vx v0, v0, a0 +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vsll.vx v24, v8, a3 +; RV32-NEXT: vand.vv v16, v24, v16 +; RV32-NEXT: vsll.vx v24, v8, a2 +; RV32-NEXT: vlse64.v v0, (a1), zero +; RV32-NEXT: vor.vv v16, v24, v16 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vsll.vi v24, v8, 8 +; RV32-NEXT: vand.vv v24, v24, v0 ; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vsll.vi v16, v16, 8 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vlse64.v v16, (a3), zero -; RV32-NEXT: vor.vv v8, v0, v8 +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v8, v24, v8 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v8, v24 ; RV32-NEXT: vsrl.vi v24, v8, 4 ; RV32-NEXT: vand.vv v24, v24, v16 ; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vlse64.v v16, (a3), zero +; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsll.vi v8, v8, 4 ; RV32-NEXT: vor.vv v8, v24, v8 ; RV32-NEXT: vsrl.vi v24, v8, 2 ; RV32-NEXT: vand.vv v24, v24, v16 ; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vlse64.v v16, (a3), zero +; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsll.vi v8, v8, 2 ; RV32-NEXT: vor.vv v8, v24, v8 ; RV32-NEXT: vsrl.vi v24, v8, 1 @@ -1427,8 +1495,9 @@ ; RV32-NEXT: vadd.vv v8, v8, v8 ; RV32-NEXT: vor.vv v8, v24, v8 ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -1444,22 +1513,25 @@ ; RV64-NEXT: vand.vx v24, v24, a2 ; RV64-NEXT: vor.vv v16, v24, v16 ; RV64-NEXT: vsrl.vi v24, v8, 24 -; RV64-NEXT: lui a3, 4080 -; RV64-NEXT: vand.vx v24, v24, a3 +; RV64-NEXT: lui a2, 4080 +; RV64-NEXT: vand.vx v24, v24, a2 ; RV64-NEXT: vsrl.vi v0, v8, 8 -; RV64-NEXT: li a4, 255 -; RV64-NEXT: slli a4, a4, 24 -; RV64-NEXT: vand.vx v0, v0, a4 +; RV64-NEXT: li a2, 255 +; RV64-NEXT: slli a3, a2, 24 +; RV64-NEXT: vand.vx v0, v0, a3 ; RV64-NEXT: vor.vv v24, v0, v24 ; RV64-NEXT: vor.vv v16, v24, v16 -; RV64-NEXT: vand.vx v24, v8, a3 -; RV64-NEXT: vsll.vi v24, v24, 24 -; RV64-NEXT: vand.vx v0, v8, a4 -; RV64-NEXT: vsll.vi v0, v0, 8 -; RV64-NEXT: vor.vv v24, v24, v0 +; RV64-NEXT: vsll.vi v24, v8, 8 +; RV64-NEXT: slli a3, a2, 32 +; RV64-NEXT: vand.vx v24, v24, a3 +; RV64-NEXT: vsll.vi v0, v8, 24 +; RV64-NEXT: slli a3, a2, 40 +; RV64-NEXT: vand.vx v0, v0, a3 +; RV64-NEXT: vor.vv v24, v0, v24 ; RV64-NEXT: vsll.vx v0, v8, a0 -; RV64-NEXT: vand.vx v8, v8, a2 ; RV64-NEXT: vsll.vx v8, v8, a1 +; RV64-NEXT: slli a0, a2, 48 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vor.vv v8, v0, v8 ; RV64-NEXT: lui a0, %hi(.LCPI21_0) ; RV64-NEXT: ld a0, %lo(.LCPI21_0)(a0) diff --git a/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll @@ -90,8 +90,9 @@ ; RV32-NEXT: vand.vx v9, v9, a0 ; RV32-NEXT: vsrl.vi v10, v8, 24 ; RV32-NEXT: vor.vv v9, v9, v10 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vsll.vi v10, v10, 8 +; RV32-NEXT: vsll.vi v10, v8, 8 +; RV32-NEXT: lui a0, 4080 +; RV32-NEXT: vand.vx v10, v10, a0 ; RV32-NEXT: vsll.vi v8, v8, 24 ; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: vor.vv v8, v8, v9 @@ -106,8 +107,9 @@ ; RV64-NEXT: vand.vx v9, v9, a0 ; RV64-NEXT: vsrl.vi v10, v8, 24 ; RV64-NEXT: vor.vv v9, v9, v10 -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vsll.vi v10, v10, 8 +; RV64-NEXT: vsll.vi v10, v8, 8 +; RV64-NEXT: lui a0, 4080 +; RV64-NEXT: vand.vx v10, v10, a0 ; RV64-NEXT: vsll.vi v8, v8, 24 ; RV64-NEXT: vor.vv v8, v8, v10 ; RV64-NEXT: vor.vv v8, v8, v9 @@ -127,8 +129,9 @@ ; RV32-NEXT: vand.vx v9, v9, a0 ; RV32-NEXT: vsrl.vi v10, v8, 24 ; RV32-NEXT: vor.vv v9, v9, v10 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vsll.vi v10, v10, 8 +; RV32-NEXT: vsll.vi v10, v8, 8 +; RV32-NEXT: lui a0, 4080 +; RV32-NEXT: vand.vx v10, v10, a0 ; RV32-NEXT: vsll.vi v8, v8, 24 ; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: vor.vv v8, v8, v9 @@ -143,8 +146,9 @@ ; RV64-NEXT: vand.vx v9, v9, a0 ; RV64-NEXT: vsrl.vi v10, v8, 24 ; RV64-NEXT: vor.vv v9, v9, v10 -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vsll.vi v10, v10, 8 +; RV64-NEXT: vsll.vi v10, v8, 8 +; RV64-NEXT: lui a0, 4080 +; RV64-NEXT: vand.vx v10, v10, a0 ; RV64-NEXT: vsll.vi v8, v8, 24 ; RV64-NEXT: vor.vv v8, v8, v10 ; RV64-NEXT: vor.vv v8, v8, v9 @@ -164,8 +168,9 @@ ; RV32-NEXT: vand.vx v10, v10, a0 ; RV32-NEXT: vsrl.vi v12, v8, 24 ; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: vand.vx v12, v8, a0 -; RV32-NEXT: vsll.vi v12, v12, 8 +; RV32-NEXT: vsll.vi v12, v8, 8 +; RV32-NEXT: lui a0, 4080 +; RV32-NEXT: vand.vx v12, v12, a0 ; RV32-NEXT: vsll.vi v8, v8, 24 ; RV32-NEXT: vor.vv v8, v8, v12 ; RV32-NEXT: vor.vv v8, v8, v10 @@ -180,8 +185,9 @@ ; RV64-NEXT: vand.vx v10, v10, a0 ; RV64-NEXT: vsrl.vi v12, v8, 24 ; RV64-NEXT: vor.vv v10, v10, v12 -; RV64-NEXT: vand.vx v12, v8, a0 -; RV64-NEXT: vsll.vi v12, v12, 8 +; RV64-NEXT: vsll.vi v12, v8, 8 +; RV64-NEXT: lui a0, 4080 +; RV64-NEXT: vand.vx v12, v12, a0 ; RV64-NEXT: vsll.vi v8, v8, 24 ; RV64-NEXT: vor.vv v8, v8, v12 ; RV64-NEXT: vor.vv v8, v8, v10 @@ -201,8 +207,9 @@ ; RV32-NEXT: vand.vx v12, v12, a0 ; RV32-NEXT: vsrl.vi v16, v8, 24 ; RV32-NEXT: vor.vv v12, v12, v16 -; RV32-NEXT: vand.vx v16, v8, a0 -; RV32-NEXT: vsll.vi v16, v16, 8 +; RV32-NEXT: vsll.vi v16, v8, 8 +; RV32-NEXT: lui a0, 4080 +; RV32-NEXT: vand.vx v16, v16, a0 ; RV32-NEXT: vsll.vi v8, v8, 24 ; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: vor.vv v8, v8, v12 @@ -217,8 +224,9 @@ ; RV64-NEXT: vand.vx v12, v12, a0 ; RV64-NEXT: vsrl.vi v16, v8, 24 ; RV64-NEXT: vor.vv v12, v12, v16 -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsll.vi v16, v16, 8 +; RV64-NEXT: vsll.vi v16, v8, 8 +; RV64-NEXT: lui a0, 4080 +; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vsll.vi v8, v8, 24 ; RV64-NEXT: vor.vv v8, v8, v16 ; RV64-NEXT: vor.vv v8, v8, v12 @@ -238,8 +246,9 @@ ; RV32-NEXT: vand.vx v16, v16, a0 ; RV32-NEXT: vsrl.vi v24, v8, 24 ; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: vand.vx v24, v8, a0 -; RV32-NEXT: vsll.vi v24, v24, 8 +; RV32-NEXT: vsll.vi v24, v8, 8 +; RV32-NEXT: lui a0, 4080 +; RV32-NEXT: vand.vx v24, v24, a0 ; RV32-NEXT: vsll.vi v8, v8, 24 ; RV32-NEXT: vor.vv v8, v8, v24 ; RV32-NEXT: vor.vv v8, v8, v16 @@ -254,8 +263,9 @@ ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vsrl.vi v24, v8, 24 ; RV64-NEXT: vor.vv v16, v16, v24 -; RV64-NEXT: vand.vx v24, v8, a0 -; RV64-NEXT: vsll.vi v24, v24, 8 +; RV64-NEXT: vsll.vi v24, v8, 8 +; RV64-NEXT: lui a0, 4080 +; RV64-NEXT: vand.vx v24, v24, a0 ; RV64-NEXT: vsll.vi v8, v8, 24 ; RV64-NEXT: vor.vv v8, v8, v24 ; RV64-NEXT: vor.vv v8, v8, v16 @@ -273,33 +283,41 @@ ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a0, 1044480 ; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v9, v8, a0 -; RV32-NEXT: li a1, 40 -; RV32-NEXT: vsrl.vx v10, v8, a1 -; RV32-NEXT: lui a2, 16 -; RV32-NEXT: addi a2, a2, -256 -; RV32-NEXT: vand.vx v10, v10, a2 +; RV32-NEXT: lui a0, 4080 +; RV32-NEXT: sw a0, 12(sp) +; RV32-NEXT: sw zero, 8(sp) +; RV32-NEXT: li a1, 255 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: lui a1, 16 +; RV32-NEXT: addi a1, a1, -256 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: li a2, 56 +; RV32-NEXT: vsetvli a3, zero, e64, m1, ta, ma +; RV32-NEXT: vsrl.vx v9, v8, a2 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsrl.vx v10, v8, a3 +; RV32-NEXT: vand.vx v10, v10, a1 ; RV32-NEXT: vor.vv v9, v10, v9 -; RV32-NEXT: vsrl.vi v10, v8, 24 -; RV32-NEXT: addi a3, sp, 8 -; RV32-NEXT: vlse64.v v11, (a3), zero -; RV32-NEXT: lui a3, 4080 -; RV32-NEXT: vand.vx v10, v10, a3 +; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: vsrl.vi v11, v8, 24 +; RV32-NEXT: vand.vx v11, v11, a0 ; RV32-NEXT: vsrl.vi v12, v8, 8 -; RV32-NEXT: vand.vv v12, v12, v11 -; RV32-NEXT: vor.vv v10, v12, v10 +; RV32-NEXT: vand.vv v10, v12, v10 +; RV32-NEXT: vor.vv v10, v10, v11 +; RV32-NEXT: vlse64.v v11, (a1), zero ; RV32-NEXT: vor.vv v9, v10, v9 -; RV32-NEXT: vsll.vx v10, v8, a0 -; RV32-NEXT: vand.vx v12, v8, a2 -; RV32-NEXT: vsll.vx v12, v12, a1 -; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: vand.vx v12, v8, a3 -; RV32-NEXT: vsll.vi v12, v12, 24 +; RV32-NEXT: vsll.vx v10, v8, a2 +; RV32-NEXT: vsll.vx v12, v8, a3 +; RV32-NEXT: vand.vv v11, v12, v11 +; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: vor.vv v10, v10, v11 +; RV32-NEXT: vlse64.v v11, (a1), zero +; RV32-NEXT: vsll.vi v13, v8, 8 +; RV32-NEXT: vand.vv v12, v13, v12 +; RV32-NEXT: vsll.vi v8, v8, 24 ; RV32-NEXT: vand.vv v8, v8, v11 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v12, v8 +; RV32-NEXT: vor.vv v8, v8, v12 ; RV32-NEXT: vor.vv v8, v10, v8 ; RV32-NEXT: vor.vv v8, v8, v9 ; RV32-NEXT: addi sp, sp, 16 @@ -317,22 +335,25 @@ ; RV64-NEXT: vand.vx v10, v10, a2 ; RV64-NEXT: vor.vv v9, v10, v9 ; RV64-NEXT: vsrl.vi v10, v8, 24 -; RV64-NEXT: lui a3, 4080 -; RV64-NEXT: vand.vx v10, v10, a3 +; RV64-NEXT: lui a2, 4080 +; RV64-NEXT: vand.vx v10, v10, a2 ; RV64-NEXT: vsrl.vi v11, v8, 8 -; RV64-NEXT: li a4, 255 -; RV64-NEXT: slli a4, a4, 24 -; RV64-NEXT: vand.vx v11, v11, a4 +; RV64-NEXT: li a2, 255 +; RV64-NEXT: slli a3, a2, 24 +; RV64-NEXT: vand.vx v11, v11, a3 ; RV64-NEXT: vor.vv v10, v11, v10 ; RV64-NEXT: vor.vv v9, v10, v9 -; RV64-NEXT: vand.vx v10, v8, a3 -; RV64-NEXT: vsll.vi v10, v10, 24 -; RV64-NEXT: vand.vx v11, v8, a4 -; RV64-NEXT: vsll.vi v11, v11, 8 -; RV64-NEXT: vor.vv v10, v10, v11 +; RV64-NEXT: vsll.vi v10, v8, 8 +; RV64-NEXT: slli a3, a2, 32 +; RV64-NEXT: vand.vx v10, v10, a3 +; RV64-NEXT: vsll.vi v11, v8, 24 +; RV64-NEXT: slli a3, a2, 40 +; RV64-NEXT: vand.vx v11, v11, a3 +; RV64-NEXT: vor.vv v10, v11, v10 ; RV64-NEXT: vsll.vx v11, v8, a0 -; RV64-NEXT: vand.vx v8, v8, a2 ; RV64-NEXT: vsll.vx v8, v8, a1 +; RV64-NEXT: slli a0, a2, 48 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vor.vv v8, v11, v8 ; RV64-NEXT: vor.vv v8, v8, v10 ; RV64-NEXT: vor.vv v8, v8, v9 @@ -350,33 +371,41 @@ ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a0, 1044480 ; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma -; RV32-NEXT: vsrl.vx v10, v8, a0 -; RV32-NEXT: li a1, 40 -; RV32-NEXT: vsrl.vx v12, v8, a1 -; RV32-NEXT: lui a2, 16 -; RV32-NEXT: addi a2, a2, -256 -; RV32-NEXT: vand.vx v12, v12, a2 +; RV32-NEXT: lui a0, 4080 +; RV32-NEXT: sw a0, 12(sp) +; RV32-NEXT: sw zero, 8(sp) +; RV32-NEXT: li a1, 255 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: lui a1, 16 +; RV32-NEXT: addi a1, a1, -256 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: li a2, 56 +; RV32-NEXT: vsetvli a3, zero, e64, m2, ta, ma +; RV32-NEXT: vsrl.vx v10, v8, a2 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsrl.vx v12, v8, a3 +; RV32-NEXT: vand.vx v12, v12, a1 ; RV32-NEXT: vor.vv v10, v12, v10 -; RV32-NEXT: vsrl.vi v12, v8, 24 -; RV32-NEXT: addi a3, sp, 8 -; RV32-NEXT: vlse64.v v14, (a3), zero -; RV32-NEXT: lui a3, 4080 -; RV32-NEXT: vand.vx v12, v12, a3 +; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: vsrl.vi v14, v8, 24 +; RV32-NEXT: vand.vx v14, v14, a0 ; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: vand.vv v16, v16, v14 -; RV32-NEXT: vor.vv v12, v16, v12 +; RV32-NEXT: vand.vv v12, v16, v12 +; RV32-NEXT: vor.vv v12, v12, v14 +; RV32-NEXT: vlse64.v v14, (a1), zero ; RV32-NEXT: vor.vv v10, v12, v10 -; RV32-NEXT: vsll.vx v12, v8, a0 -; RV32-NEXT: vand.vx v16, v8, a2 -; RV32-NEXT: vsll.vx v16, v16, a1 -; RV32-NEXT: vor.vv v12, v12, v16 -; RV32-NEXT: vand.vx v16, v8, a3 -; RV32-NEXT: vsll.vi v16, v16, 24 +; RV32-NEXT: vsll.vx v12, v8, a2 +; RV32-NEXT: vsll.vx v16, v8, a3 +; RV32-NEXT: vand.vv v14, v16, v14 +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vor.vv v12, v12, v14 +; RV32-NEXT: vlse64.v v14, (a1), zero +; RV32-NEXT: vsll.vi v18, v8, 8 +; RV32-NEXT: vand.vv v16, v18, v16 +; RV32-NEXT: vsll.vi v8, v8, 24 ; RV32-NEXT: vand.vv v8, v8, v14 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: addi sp, sp, 16 @@ -394,22 +423,25 @@ ; RV64-NEXT: vand.vx v12, v12, a2 ; RV64-NEXT: vor.vv v10, v12, v10 ; RV64-NEXT: vsrl.vi v12, v8, 24 -; RV64-NEXT: lui a3, 4080 -; RV64-NEXT: vand.vx v12, v12, a3 +; RV64-NEXT: lui a2, 4080 +; RV64-NEXT: vand.vx v12, v12, a2 ; RV64-NEXT: vsrl.vi v14, v8, 8 -; RV64-NEXT: li a4, 255 -; RV64-NEXT: slli a4, a4, 24 -; RV64-NEXT: vand.vx v14, v14, a4 +; RV64-NEXT: li a2, 255 +; RV64-NEXT: slli a3, a2, 24 +; RV64-NEXT: vand.vx v14, v14, a3 ; RV64-NEXT: vor.vv v12, v14, v12 ; RV64-NEXT: vor.vv v10, v12, v10 -; RV64-NEXT: vand.vx v12, v8, a3 -; RV64-NEXT: vsll.vi v12, v12, 24 -; RV64-NEXT: vand.vx v14, v8, a4 -; RV64-NEXT: vsll.vi v14, v14, 8 -; RV64-NEXT: vor.vv v12, v12, v14 +; RV64-NEXT: vsll.vi v12, v8, 8 +; RV64-NEXT: slli a3, a2, 32 +; RV64-NEXT: vand.vx v12, v12, a3 +; RV64-NEXT: vsll.vi v14, v8, 24 +; RV64-NEXT: slli a3, a2, 40 +; RV64-NEXT: vand.vx v14, v14, a3 +; RV64-NEXT: vor.vv v12, v14, v12 ; RV64-NEXT: vsll.vx v14, v8, a0 -; RV64-NEXT: vand.vx v8, v8, a2 ; RV64-NEXT: vsll.vx v8, v8, a1 +; RV64-NEXT: slli a0, a2, 48 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vor.vv v8, v14, v8 ; RV64-NEXT: vor.vv v8, v8, v12 ; RV64-NEXT: vor.vv v8, v8, v10 @@ -427,33 +459,41 @@ ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a0, 1044480 ; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma -; RV32-NEXT: vsrl.vx v12, v8, a0 -; RV32-NEXT: li a1, 40 -; RV32-NEXT: vsrl.vx v16, v8, a1 -; RV32-NEXT: lui a2, 16 -; RV32-NEXT: addi a2, a2, -256 -; RV32-NEXT: vand.vx v16, v16, a2 +; RV32-NEXT: lui a0, 4080 +; RV32-NEXT: sw a0, 12(sp) +; RV32-NEXT: sw zero, 8(sp) +; RV32-NEXT: li a1, 255 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: lui a1, 16 +; RV32-NEXT: addi a1, a1, -256 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: li a2, 56 +; RV32-NEXT: vsetvli a3, zero, e64, m4, ta, ma +; RV32-NEXT: vsrl.vx v12, v8, a2 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsrl.vx v16, v8, a3 +; RV32-NEXT: vand.vx v16, v16, a1 ; RV32-NEXT: vor.vv v12, v16, v12 -; RV32-NEXT: vsrl.vi v16, v8, 24 -; RV32-NEXT: addi a3, sp, 8 -; RV32-NEXT: vlse64.v v20, (a3), zero -; RV32-NEXT: lui a3, 4080 -; RV32-NEXT: vand.vx v16, v16, a3 +; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vsrl.vi v20, v8, 24 +; RV32-NEXT: vand.vx v20, v20, a0 ; RV32-NEXT: vsrl.vi v24, v8, 8 -; RV32-NEXT: vand.vv v24, v24, v20 -; RV32-NEXT: vor.vv v16, v24, v16 +; RV32-NEXT: vand.vv v16, v24, v16 +; RV32-NEXT: vor.vv v16, v16, v20 +; RV32-NEXT: vlse64.v v20, (a1), zero ; RV32-NEXT: vor.vv v12, v16, v12 -; RV32-NEXT: vsll.vx v16, v8, a0 -; RV32-NEXT: vand.vx v24, v8, a2 -; RV32-NEXT: vsll.vx v24, v24, a1 -; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: vand.vx v24, v8, a3 -; RV32-NEXT: vsll.vi v24, v24, 24 +; RV32-NEXT: vsll.vx v16, v8, a2 +; RV32-NEXT: vsll.vx v24, v8, a3 +; RV32-NEXT: vand.vv v20, v24, v20 +; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: vor.vv v16, v16, v20 +; RV32-NEXT: vlse64.v v20, (a1), zero +; RV32-NEXT: vsll.vi v28, v8, 8 +; RV32-NEXT: vand.vv v24, v28, v24 +; RV32-NEXT: vsll.vi v8, v8, 24 ; RV32-NEXT: vand.vv v8, v8, v20 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v24, v8 +; RV32-NEXT: vor.vv v8, v8, v24 ; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vor.vv v8, v8, v12 ; RV32-NEXT: addi sp, sp, 16 @@ -471,22 +511,25 @@ ; RV64-NEXT: vand.vx v16, v16, a2 ; RV64-NEXT: vor.vv v12, v16, v12 ; RV64-NEXT: vsrl.vi v16, v8, 24 -; RV64-NEXT: lui a3, 4080 -; RV64-NEXT: vand.vx v16, v16, a3 +; RV64-NEXT: lui a2, 4080 +; RV64-NEXT: vand.vx v16, v16, a2 ; RV64-NEXT: vsrl.vi v20, v8, 8 -; RV64-NEXT: li a4, 255 -; RV64-NEXT: slli a4, a4, 24 -; RV64-NEXT: vand.vx v20, v20, a4 +; RV64-NEXT: li a2, 255 +; RV64-NEXT: slli a3, a2, 24 +; RV64-NEXT: vand.vx v20, v20, a3 ; RV64-NEXT: vor.vv v16, v20, v16 ; RV64-NEXT: vor.vv v12, v16, v12 -; RV64-NEXT: vand.vx v16, v8, a3 -; RV64-NEXT: vsll.vi v16, v16, 24 -; RV64-NEXT: vand.vx v20, v8, a4 -; RV64-NEXT: vsll.vi v20, v20, 8 -; RV64-NEXT: vor.vv v16, v16, v20 +; RV64-NEXT: vsll.vi v16, v8, 8 +; RV64-NEXT: slli a3, a2, 32 +; RV64-NEXT: vand.vx v16, v16, a3 +; RV64-NEXT: vsll.vi v20, v8, 24 +; RV64-NEXT: slli a3, a2, 40 +; RV64-NEXT: vand.vx v20, v20, a3 +; RV64-NEXT: vor.vv v16, v20, v16 ; RV64-NEXT: vsll.vx v20, v8, a0 -; RV64-NEXT: vand.vx v8, v8, a2 ; RV64-NEXT: vsll.vx v8, v8, a1 +; RV64-NEXT: slli a0, a2, 48 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vor.vv v8, v20, v8 ; RV64-NEXT: vor.vv v8, v8, v16 ; RV64-NEXT: vor.vv v8, v8, v12 @@ -502,51 +545,77 @@ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: sub sp, sp, a0 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * VLENB ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a0, 1044480 ; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: li a0, 56 -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vsrl.vx v16, v8, a0 -; RV32-NEXT: li a1, 40 -; RV32-NEXT: vsrl.vx v24, v8, a1 -; RV32-NEXT: lui a2, 16 -; RV32-NEXT: addi a2, a2, -256 -; RV32-NEXT: vand.vx v24, v24, a2 -; RV32-NEXT: vor.vv v16, v24, v16 -; RV32-NEXT: addi a3, sp, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v0, v8, 24 -; RV32-NEXT: addi a3, sp, 8 -; RV32-NEXT: vlse64.v v24, (a3), zero -; RV32-NEXT: lui a3, 4080 -; RV32-NEXT: vand.vx v0, v0, a3 -; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vor.vv v16, v16, v0 -; RV32-NEXT: addi a4, sp, 16 -; RV32-NEXT: vl8r.v v0, (a4) # Unknown-size Folded Reload +; RV32-NEXT: lui a0, 4080 +; RV32-NEXT: sw a0, 12(sp) +; RV32-NEXT: sw zero, 8(sp) +; RV32-NEXT: li a1, 255 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: lui a1, 16 +; RV32-NEXT: addi a1, a1, -256 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: li a2, 56 +; RV32-NEXT: vsetvli a3, zero, e64, m8, ta, ma +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsrl.vx v16, v8, a3 +; RV32-NEXT: vand.vx v16, v16, a1 +; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: vsrl.vx v0, v8, a2 ; RV32-NEXT: vor.vv v16, v16, v0 +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 3 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vand.vx v0, v8, a2 -; RV32-NEXT: vsll.vx v0, v0, a1 -; RV32-NEXT: vsll.vx v16, v8, a0 -; RV32-NEXT: vor.vv v16, v16, v0 -; RV32-NEXT: vand.vv v24, v8, v24 -; RV32-NEXT: vand.vx v8, v8, a3 -; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vsll.vi v24, v24, 8 -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vsrl.vi v0, v8, 8 +; RV32-NEXT: vand.vv v24, v0, v24 +; RV32-NEXT: vsrl.vi v0, v8, 24 +; RV32-NEXT: vand.vx v0, v0, a0 +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vsll.vx v0, v8, a3 +; RV32-NEXT: vand.vv v16, v0, v16 +; RV32-NEXT: vsll.vx v0, v8, a2 +; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: vor.vv v16, v0, v16 ; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vlse64.v v0, (a1), zero +; RV32-NEXT: vsll.vi v16, v8, 8 +; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vsll.vi v8, v8, 24 +; RV32-NEXT: vand.vv v8, v8, v0 ; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -562,22 +631,25 @@ ; RV64-NEXT: vand.vx v24, v24, a2 ; RV64-NEXT: vor.vv v16, v24, v16 ; RV64-NEXT: vsrl.vi v24, v8, 24 -; RV64-NEXT: lui a3, 4080 -; RV64-NEXT: vand.vx v24, v24, a3 +; RV64-NEXT: lui a2, 4080 +; RV64-NEXT: vand.vx v24, v24, a2 ; RV64-NEXT: vsrl.vi v0, v8, 8 -; RV64-NEXT: li a4, 255 -; RV64-NEXT: slli a4, a4, 24 -; RV64-NEXT: vand.vx v0, v0, a4 +; RV64-NEXT: li a2, 255 +; RV64-NEXT: slli a3, a2, 24 +; RV64-NEXT: vand.vx v0, v0, a3 ; RV64-NEXT: vor.vv v24, v0, v24 ; RV64-NEXT: vor.vv v16, v24, v16 -; RV64-NEXT: vand.vx v24, v8, a3 -; RV64-NEXT: vsll.vi v24, v24, 24 -; RV64-NEXT: vand.vx v0, v8, a4 -; RV64-NEXT: vsll.vi v0, v0, 8 -; RV64-NEXT: vor.vv v24, v24, v0 +; RV64-NEXT: vsll.vi v24, v8, 8 +; RV64-NEXT: slli a3, a2, 32 +; RV64-NEXT: vand.vx v24, v24, a3 +; RV64-NEXT: vsll.vi v0, v8, 24 +; RV64-NEXT: slli a3, a2, 40 +; RV64-NEXT: vand.vx v0, v0, a3 +; RV64-NEXT: vor.vv v24, v0, v24 ; RV64-NEXT: vsll.vx v0, v8, a0 -; RV64-NEXT: vand.vx v8, v8, a2 ; RV64-NEXT: vsll.vx v8, v8, a1 +; RV64-NEXT: slli a0, a2, 48 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vor.vv v8, v0, v8 ; RV64-NEXT: vor.vv v8, v8, v24 ; RV64-NEXT: vor.vv v8, v8, v16 diff --git a/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll b/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll --- a/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll +++ b/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll @@ -82,10 +82,10 @@ ; CHECK-NEXT: vl8re32.v v24, (a1) ; CHECK-NEXT: vl8re32.v v0, (a5) ; CHECK-NEXT: vs8r.v v16, (a0) -; CHECK-NEXT: add a2, a0, a2 -; CHECK-NEXT: vs8r.v v24, (a2) -; CHECK-NEXT: add a4, a0, a4 -; CHECK-NEXT: vs8r.v v0, (a4) +; CHECK-NEXT: add a1, a0, a2 +; CHECK-NEXT: vs8r.v v24, (a1) +; CHECK-NEXT: add a1, a0, a4 +; CHECK-NEXT: vs8r.v v0, (a1) ; CHECK-NEXT: add a0, a0, a3 ; CHECK-NEXT: vs8r.v v8, (a0) ; CHECK-NEXT: ret @@ -102,7 +102,7 @@ ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 5 ; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * VLENB ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a3, a2, 3 ; CHECK-NEXT: add a4, a1, a3 @@ -149,41 +149,42 @@ ; CHECK-NEXT: vl8re32.v v16, (t3) ; CHECK-NEXT: vl8re32.v v24, (t2) ; CHECK-NEXT: vs8r.v v8, (a0) -; CHECK-NEXT: add a2, a0, a2 -; CHECK-NEXT: vs8r.v v16, (a2) -; CHECK-NEXT: add t1, a0, t1 -; CHECK-NEXT: vs8r.v v24, (t1) -; CHECK-NEXT: add a7, a0, a7 -; CHECK-NEXT: vs8r.v v0, (a7) -; CHECK-NEXT: add a6, a0, a6 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vs8r.v v8, (a6) -; CHECK-NEXT: add a5, a0, a5 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vs8r.v v8, (a5) -; CHECK-NEXT: add a4, a0, a4 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vs8r.v v8, (a4) +; CHECK-NEXT: add a1, a0, a2 +; CHECK-NEXT: vs8r.v v16, (a1) +; CHECK-NEXT: add a1, a0, t1 +; CHECK-NEXT: vs8r.v v24, (a1) +; CHECK-NEXT: add a1, a0, a7 +; CHECK-NEXT: vs8r.v v0, (a1) +; CHECK-NEXT: add a1, a0, a6 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vs8r.v v8, (a1) +; CHECK-NEXT: add a1, a0, a5 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vs8r.v v8, (a1) +; CHECK-NEXT: add a1, a0, a4 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vs8r.v v8, (a1) ; CHECK-NEXT: add a0, a0, a3 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: li a2, 24 ; CHECK-NEXT: mul a1, a1, a2 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vs8r.v v8, (a0) ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 5 ; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = load , * %x @@ -238,7 +239,7 @@ ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * VLENB ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 @@ -261,10 +262,10 @@ ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vadd.vv v8, v24, v8 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vadd.vv v8, v8, v24 ; CHECK-NEXT: vadd.vv v24, v0, v16 ; CHECK-NEXT: vadd.vx v16, v8, a4 @@ -272,6 +273,7 @@ ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %r = add %x, %y @@ -411,14 +413,14 @@ ; RV32-NEXT: addi a2, a2, 128 ; RV32-NEXT: li a5, 42 ; RV32-NEXT: addi a3, sp, 128 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vl8re8.v v8, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vs8r.v v8, (a1) ; RV32-NEXT: vmv8r.v v8, v0 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 128 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: call ext3@plt ; RV32-NEXT: addi sp, s0, -144 ; RV32-NEXT: lw ra, 140(sp) # 4-byte Folded Reload @@ -479,14 +481,14 @@ ; RV64-NEXT: addi a2, a2, 128 ; RV64-NEXT: li a5, 42 ; RV64-NEXT: addi a3, sp, 128 -; RV64-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV64-NEXT: vl8re8.v v8, (a3) # Unknown-size Folded Reload ; RV64-NEXT: vs8r.v v8, (a1) ; RV64-NEXT: vmv8r.v v8, v0 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 128 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload ; RV64-NEXT: call ext3@plt ; RV64-NEXT: addi sp, s0, -144 ; RV64-NEXT: ld ra, 136(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll @@ -732,103 +732,11 @@ declare @llvm.vp.ceil.nxv16f64(, , i32) define @vp_ceil_vv_nxv16f64( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_ceil_vv_nxv16f64: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; CHECK-NEXT: vmv1r.v v1, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a2, a1, 3 -; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v2, v0, a2 -; CHECK-NEXT: sub a2, a0, a1 -; CHECK-NEXT: sltu a3, a0, a2 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: lui a3, %hi(.LCPI32_0) -; CHECK-NEXT: fld ft0, %lo(.LCPI32_0)(a3) -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t -; CHECK-NEXT: fsrmi a2, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a0, a1, .LBB32_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB32_2: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 -; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v1, v16, ft0, v0.t -; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret %v = call @llvm.vp.ceil.nxv16f64( %va, %m, i32 %evl) ret %v } define @vp_ceil_vv_nxv16f64_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_ceil_vv_nxv16f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: sub a2, a0, a1 -; CHECK-NEXT: lui a3, %hi(.LCPI33_0) -; CHECK-NEXT: fld ft0, %lo(.LCPI33_0)(a3) -; CHECK-NEXT: sltu a3, a0, a2 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: vmflt.vf v0, v24, ft0 -; CHECK-NEXT: fsrmi a2, 3 -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB33_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB33_2: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8 -; CHECK-NEXT: vmflt.vf v0, v24, ft0 -; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ceil.nxv16f64( %va, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll --- a/llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll @@ -487,10 +487,8 @@ define void @store_extractelt_nxv8f64(* %x, double* %p) { ; CHECK-LABEL: store_extractelt_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vl8re64.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e64, m8, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 1 -; CHECK-NEXT: vse64.v v8, (a1) +; CHECK-NEXT: fld ft0, 8(a0) +; CHECK-NEXT: fsd ft0, 0(a1) ; CHECK-NEXT: ret %a = load , * %x %b = extractelement %a, i64 1 @@ -641,15 +639,15 @@ ; RV64-NEXT: add a3, a0, a1 ; RV64-NEXT: li a1, -1 ; RV64-NEXT: srli a1, a1, 32 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: addi a2, a2, -1 +; RV64-NEXT: slliw a2, a2, 1 +; RV64-NEXT: addiw a2, a2, -1 ; RV64-NEXT: vs8r.v v16, (a3) ; RV64-NEXT: bltu a2, a1, .LBB52_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a2, a1 ; RV64-NEXT: .LBB52_2: -; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: add a0, a0, a2 +; RV64-NEXT: slli a1, a2, 3 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: fld fa0, 0(a0) ; RV64-NEXT: addi sp, s0, -80 ; RV64-NEXT: ld ra, 72(sp) # 8-byte Folded Reload @@ -710,8 +708,8 @@ ; RV64-LABEL: extractelt_nxv16f64_idx: ; RV64: # %bb.0: ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a2, a1, 1 -; RV64-NEXT: addi a2, a2, -1 +; RV64-NEXT: slliw a2, a1, 1 +; RV64-NEXT: addiw a2, a2, -1 ; RV64-NEXT: bltu a0, a2, .LBB54_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a0, a2 diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll --- a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll @@ -151,18 +151,18 @@ ; RV32-NEXT: add a4, a0, a2 ; RV32-NEXT: vl8r.v v16, (a4) ; RV32-NEXT: vl8r.v v24, (a0) -; RV32-NEXT: add a1, a3, a1 -; RV32-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; RV32-NEXT: add a0, a3, a1 +; RV32-NEXT: vsetvli a1, zero, e8, m8, ta, ma ; RV32-NEXT: vmseq.vi v8, v16, 0 ; RV32-NEXT: vmseq.vi v0, v24, 0 ; RV32-NEXT: vmv.v.i v16, 0 ; RV32-NEXT: vmerge.vim v24, v16, 1, v0 ; RV32-NEXT: vs8r.v v24, (a3) -; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: add a1, a3, a2 ; RV32-NEXT: vmv1r.v v0, v8 ; RV32-NEXT: vmerge.vim v8, v16, 1, v0 -; RV32-NEXT: vs8r.v v8, (a2) -; RV32-NEXT: lb a0, 0(a1) +; RV32-NEXT: vs8r.v v8, (a1) +; RV32-NEXT: lb a0, 0(a0) ; RV32-NEXT: addi sp, s0, -80 ; RV32-NEXT: lw ra, 76(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 72(sp) # 4-byte Folded Reload @@ -172,8 +172,8 @@ ; RV64-LABEL: extractelt_nxv128i1: ; RV64: # %bb.0: ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a3, a2, 4 -; RV64-NEXT: addi a3, a3, -1 +; RV64-NEXT: slliw a3, a2, 4 +; RV64-NEXT: addiw a3, a3, -1 ; RV64-NEXT: bltu a1, a3, .LBB7_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a1, a3 @@ -191,18 +191,18 @@ ; RV64-NEXT: add a4, a0, a2 ; RV64-NEXT: vl8r.v v16, (a4) ; RV64-NEXT: vl8r.v v24, (a0) -; RV64-NEXT: add a1, a3, a1 -; RV64-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; RV64-NEXT: add a0, a3, a1 +; RV64-NEXT: vsetvli a1, zero, e8, m8, ta, ma ; RV64-NEXT: vmseq.vi v8, v16, 0 ; RV64-NEXT: vmseq.vi v0, v24, 0 ; RV64-NEXT: vmv.v.i v16, 0 ; RV64-NEXT: vmerge.vim v24, v16, 1, v0 ; RV64-NEXT: vs8r.v v24, (a3) -; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a1, a3, a2 ; RV64-NEXT: vmv1r.v v0, v8 ; RV64-NEXT: vmerge.vim v8, v16, 1, v0 -; RV64-NEXT: vs8r.v v8, (a2) -; RV64-NEXT: lb a0, 0(a1) +; RV64-NEXT: vs8r.v v8, (a1) +; RV64-NEXT: lb a0, 0(a0) ; RV64-NEXT: addi sp, s0, -80 ; RV64-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; RV64-NEXT: ld s0, 64(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll --- a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll @@ -823,15 +823,15 @@ ; CHECK-NEXT: add a3, a0, a1 ; CHECK-NEXT: li a1, -1 ; CHECK-NEXT: srli a1, a1, 32 -; CHECK-NEXT: slli a2, a2, 1 -; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: slliw a2, a2, 1 +; CHECK-NEXT: addiw a2, a2, -1 ; CHECK-NEXT: vs8r.v v16, (a3) ; CHECK-NEXT: bltu a2, a1, .LBB72_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: .LBB72_2: -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a0, a0, a2 +; CHECK-NEXT: slli a1, a2, 3 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: ld a0, 0(a0) ; CHECK-NEXT: addi sp, s0, -80 ; CHECK-NEXT: ld ra, 72(sp) # 8-byte Folded Reload @@ -857,8 +857,8 @@ ; CHECK-LABEL: extractelt_nxv16i64_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a2, a1, 1 -; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: slliw a2, a1, 1 +; CHECK-NEXT: addiw a2, a2, -1 ; CHECK-NEXT: bltu a0, a2, .LBB74_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a2 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-fptrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-fptrunc-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-fptrunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-fptrunc-vp.ll @@ -92,44 +92,6 @@ declare <32 x float> @llvm.vp.fptrunc.v32f64.v32f32(<32 x double>, <32 x i1>, i32) define <32 x float> @vfptrunc_v32f32_v32f64(<32 x double> %a, <32 x i1> %m, i32 zeroext %vl) { -; CHECK-LABEL: vfptrunc_v32f32_v32f64: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a2, a0, a1 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a1, a2, a1 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vfncvt.f.f.w v8, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB7_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: .LBB7_2: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfncvt.f.f.w v16, v24, v0.t -; CHECK-NEXT: li a0, 32 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, ma -; CHECK-NEXT: vslideup.vi v16, v8, 16 -; CHECK-NEXT: vmv8r.v v8, v16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret %v = call <32 x float> @llvm.vp.fptrunc.v32f64.v32f32(<32 x double> %a, <32 x i1> %m, i32 %vl) ret <32 x float> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-asm.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-asm.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-asm.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-asm.ll @@ -185,12 +185,12 @@ ; CHECK-NEXT: li a4, 1024 ; CHECK-NEXT: .LBB3_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: lbu a5, 0(a1) -; CHECK-NEXT: add a6, a0, a2 ; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, ma -; CHECK-NEXT: vle8.v v8, (a6) -; CHECK-NEXT: vadd.vx v8, v8, a5 -; CHECK-NEXT: vse8.v v8, (a6) +; CHECK-NEXT: vlse8.v v8, (a1), zero +; CHECK-NEXT: add a5, a0, a2 +; CHECK-NEXT: vle8.v v9, (a5) +; CHECK-NEXT: vadd.vv v8, v9, v8 +; CHECK-NEXT: vse8.v v8, (a5) ; CHECK-NEXT: addi a2, a2, 32 ; CHECK-NEXT: addi a1, a1, 160 ; CHECK-NEXT: bne a2, a4, .LBB3_1 @@ -221,63 +221,24 @@ } define void @gather_zero_stride_unfold(i8* noalias nocapture %A, i8* noalias nocapture readonly %B) { -; V-LABEL: gather_zero_stride_unfold: -; V: # %bb.0: # %entry -; V-NEXT: li a2, 0 -; V-NEXT: li a3, 32 -; V-NEXT: li a4, 1024 -; V-NEXT: .LBB4_1: # %vector.body -; V-NEXT: # =>This Inner Loop Header: Depth=1 -; V-NEXT: vsetvli zero, a3, e8, m1, ta, ma -; V-NEXT: vlse8.v v8, (a1), zero -; V-NEXT: add a5, a0, a2 -; V-NEXT: vle8.v v9, (a5) -; V-NEXT: vdivu.vv v8, v8, v9 -; V-NEXT: vse8.v v8, (a5) -; V-NEXT: addi a2, a2, 32 -; V-NEXT: addi a1, a1, 160 -; V-NEXT: bne a2, a4, .LBB4_1 -; V-NEXT: # %bb.2: # %for.cond.cleanup -; V-NEXT: ret -; -; ZVE32F-LABEL: gather_zero_stride_unfold: -; ZVE32F: # %bb.0: # %entry -; ZVE32F-NEXT: li a2, 0 -; ZVE32F-NEXT: li a3, 32 -; ZVE32F-NEXT: li a4, 1024 -; ZVE32F-NEXT: .LBB4_1: # %vector.body -; ZVE32F-NEXT: # =>This Inner Loop Header: Depth=1 -; ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma -; ZVE32F-NEXT: vlse8.v v8, (a1), zero -; ZVE32F-NEXT: add a5, a0, a2 -; ZVE32F-NEXT: vle8.v v9, (a5) -; ZVE32F-NEXT: vdivu.vv v8, v8, v9 -; ZVE32F-NEXT: vse8.v v8, (a5) -; ZVE32F-NEXT: addi a2, a2, 32 -; ZVE32F-NEXT: addi a1, a1, 160 -; ZVE32F-NEXT: bne a2, a4, .LBB4_1 -; ZVE32F-NEXT: # %bb.2: # %for.cond.cleanup -; ZVE32F-NEXT: ret -; -; NOT-OPTIMIZED-LABEL: gather_zero_stride_unfold: -; NOT-OPTIMIZED: # %bb.0: # %entry -; NOT-OPTIMIZED-NEXT: li a2, 0 -; NOT-OPTIMIZED-NEXT: li a3, 32 -; NOT-OPTIMIZED-NEXT: li a4, 1024 -; NOT-OPTIMIZED-NEXT: .LBB4_1: # %vector.body -; NOT-OPTIMIZED-NEXT: # =>This Inner Loop Header: Depth=1 -; NOT-OPTIMIZED-NEXT: lbu a5, 0(a1) -; NOT-OPTIMIZED-NEXT: vsetvli zero, a3, e8, m1, ta, ma -; NOT-OPTIMIZED-NEXT: add a6, a0, a2 -; NOT-OPTIMIZED-NEXT: vle8.v v8, (a6) -; NOT-OPTIMIZED-NEXT: vmv.v.x v9, a5 -; NOT-OPTIMIZED-NEXT: vdivu.vv v8, v9, v8 -; NOT-OPTIMIZED-NEXT: vse8.v v8, (a6) -; NOT-OPTIMIZED-NEXT: addi a2, a2, 32 -; NOT-OPTIMIZED-NEXT: addi a1, a1, 160 -; NOT-OPTIMIZED-NEXT: bne a2, a4, .LBB4_1 -; NOT-OPTIMIZED-NEXT: # %bb.2: # %for.cond.cleanup -; NOT-OPTIMIZED-NEXT: ret +; CHECK-LABEL: gather_zero_stride_unfold: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a2, 0 +; CHECK-NEXT: li a3, 32 +; CHECK-NEXT: li a4, 1024 +; CHECK-NEXT: .LBB4_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, ma +; CHECK-NEXT: vlse8.v v8, (a1), zero +; CHECK-NEXT: add a5, a0, a2 +; CHECK-NEXT: vle8.v v9, (a5) +; CHECK-NEXT: vdivu.vv v8, v8, v9 +; CHECK-NEXT: vse8.v v8, (a5) +; CHECK-NEXT: addi a2, a2, 32 +; CHECK-NEXT: addi a1, a1, 160 +; CHECK-NEXT: bne a2, a4, .LBB4_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: ret entry: br label %vector.body @@ -792,18 +753,18 @@ ; ZVE32F-NEXT: li a4, 1024 ; ZVE32F-NEXT: li a5, 40 ; ZVE32F-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 -; ZVE32F-NEXT: ld a6, 8(a1) -; ZVE32F-NEXT: ld a7, 0(a1) -; ZVE32F-NEXT: ld t0, 24(a1) -; ZVE32F-NEXT: ld t1, 16(a1) -; ZVE32F-NEXT: mul t2, a3, a5 -; ZVE32F-NEXT: add t2, a0, t2 -; ZVE32F-NEXT: mul t3, a2, a5 -; ZVE32F-NEXT: add t3, a0, t3 -; ZVE32F-NEXT: sd a7, 0(t3) -; ZVE32F-NEXT: sd a6, 0(t2) -; ZVE32F-NEXT: sd t1, 80(t3) -; ZVE32F-NEXT: sd t0, 80(t2) +; ZVE32F-NEXT: mul a6, a3, a5 +; ZVE32F-NEXT: add a6, a0, a6 +; ZVE32F-NEXT: mul a7, a2, a5 +; ZVE32F-NEXT: add a7, a0, a7 +; ZVE32F-NEXT: ld t0, 0(a1) +; ZVE32F-NEXT: ld t1, 8(a1) +; ZVE32F-NEXT: ld t2, 16(a1) +; ZVE32F-NEXT: ld t3, 24(a1) +; ZVE32F-NEXT: sd t0, 0(a7) +; ZVE32F-NEXT: sd t1, 0(a6) +; ZVE32F-NEXT: sd t2, 80(a7) +; ZVE32F-NEXT: sd t3, 80(a6) ; ZVE32F-NEXT: addi a2, a2, 4 ; ZVE32F-NEXT: addi a3, a3, 4 ; ZVE32F-NEXT: addi a4, a4, -4 @@ -847,15 +808,15 @@ ; CHECK-NEXT: li a3, 1024 ; CHECK-NEXT: beq a2, a3, .LBB13_7 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a4, 1023 -; CHECK-NEXT: subw a4, a4, a2 +; CHECK-NEXT: li a3, 1023 +; CHECK-NEXT: subw a4, a3, a2 ; CHECK-NEXT: li a5, 31 ; CHECK-NEXT: mv a3, a2 ; CHECK-NEXT: bltu a4, a5, .LBB13_5 ; CHECK-NEXT: # %bb.2: -; CHECK-NEXT: slli a4, a4, 32 -; CHECK-NEXT: srli a4, a4, 32 -; CHECK-NEXT: addi a4, a4, 1 +; CHECK-NEXT: slli a3, a4, 32 +; CHECK-NEXT: srli a3, a3, 32 +; CHECK-NEXT: addi a4, a3, 1 ; CHECK-NEXT: andi a5, a4, -32 ; CHECK-NEXT: add a3, a5, a2 ; CHECK-NEXT: slli a6, a2, 2 @@ -878,20 +839,20 @@ ; CHECK-NEXT: # %bb.4: ; CHECK-NEXT: beq a4, a5, .LBB13_7 ; CHECK-NEXT: .LBB13_5: -; CHECK-NEXT: addiw a2, a3, -1024 -; CHECK-NEXT: add a0, a0, a3 -; CHECK-NEXT: slli a4, a3, 2 -; CHECK-NEXT: add a3, a4, a3 -; CHECK-NEXT: add a1, a1, a3 +; CHECK-NEXT: slli a2, a3, 2 +; CHECK-NEXT: add a2, a2, a3 +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: li a2, 1024 ; CHECK-NEXT: .LBB13_6: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: lb a3, 0(a1) -; CHECK-NEXT: lb a4, 0(a0) -; CHECK-NEXT: addw a3, a4, a3 -; CHECK-NEXT: sb a3, 0(a0) -; CHECK-NEXT: addiw a2, a2, 1 -; CHECK-NEXT: addi a0, a0, 1 +; CHECK-NEXT: lb a4, 0(a1) +; CHECK-NEXT: add a5, a0, a3 +; CHECK-NEXT: lb a6, 0(a5) +; CHECK-NEXT: addw a4, a6, a4 +; CHECK-NEXT: sb a4, 0(a5) +; CHECK-NEXT: addiw a4, a3, 1 +; CHECK-NEXT: addi a3, a3, 1 ; CHECK-NEXT: addi a1, a1, 5 -; CHECK-NEXT: bnez a2, .LBB13_6 +; CHECK-NEXT: bne a4, a2, .LBB13_6 ; CHECK-NEXT: .LBB13_7: ; CHECK-NEXT: ret %4 = icmp eq i32 %2, 1024 @@ -1008,3 +969,5 @@ bb16: ; preds = %bb4, %bb ret void } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; NOT-OPTIMIZED: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-trunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-trunc-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-trunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-trunc-vp.ll @@ -51,44 +51,6 @@ declare <128 x i7> @llvm.vp.trunc.v128i7.v128i16(<128 x i16>, <128 x i1>, i32) define <128 x i7> @vtrunc_v128i7_v128i16(<128 x i16> %a, <128 x i1> %m, i32 zeroext %vl) { -; CHECK-LABEL: vtrunc_v128i7_v128i16: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 8 -; CHECK-NEXT: addi a1, a0, -64 -; CHECK-NEXT: sltu a2, a0, a1 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a1, a2, a1 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-NEXT: li a1, 64 -; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB4_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 64 -; CHECK-NEXT: .LBB4_2: -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vnsrl.wi v16, v24, 0, v0.t -; CHECK-NEXT: li a0, 128 -; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, ma -; CHECK-NEXT: vslideup.vx v16, v8, a1 -; CHECK-NEXT: vmv8r.v v8, v16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret %v = call <128 x i7> @llvm.vp.trunc.v128i7.v128i16(<128 x i16> %a, <128 x i1> %m, i32 %vl) ret <128 x i7> %v } @@ -236,262 +198,6 @@ declare <128 x i32> @llvm.vp.trunc.v128i32.v128i64(<128 x i64>, <128 x i1>, i32) define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 zeroext %vl) { -; CHECK-LABEL: vtrunc_v128i32_v128i64: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a3, 56 -; CHECK-NEXT: mul a2, a2, a3 -; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 56 * vlenb -; CHECK-NEXT: vmv1r.v v1, v0 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a2, a2, a3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 5 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v3, v0, 8 -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v2, v0, 4 -; CHECK-NEXT: vslidedown.vi v27, v3, 4 -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v27, 2 -; CHECK-NEXT: addi a2, a1, 512 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: addi a3, a1, 640 -; CHECK-NEXT: vle64.v v8, (a3) -; CHECK-NEXT: addi a3, a7, -64 -; CHECK-NEXT: sltu a4, a7, a3 -; CHECK-NEXT: addi a4, a4, -1 -; CHECK-NEXT: and a4, a4, a3 -; CHECK-NEXT: addi a3, a4, -32 -; CHECK-NEXT: sltu a5, a4, a3 -; CHECK-NEXT: addi a5, a5, -1 -; CHECK-NEXT: and a3, a5, a3 -; CHECK-NEXT: addi a5, a3, -16 -; CHECK-NEXT: sltu a6, a3, a5 -; CHECK-NEXT: addi a6, a6, -1 -; CHECK-NEXT: and a5, a6, a5 -; CHECK-NEXT: vle64.v v16, (a2) -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a6, 40 -; CHECK-NEXT: mul a2, a2, a6 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 4 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: addi a5, a1, 128 -; CHECK-NEXT: bltu a3, a2, .LBB16_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a3, 16 -; CHECK-NEXT: .LBB16_2: -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v4, v2, 2 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a5) -; CHECK-NEXT: vsetvli zero, a3, e32, m4, ta, ma -; CHECK-NEXT: li a3, 64 -; CHECK-NEXT: vmv1r.v v0, v27 -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: li a6, 40 -; CHECK-NEXT: mul a5, a5, a6 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload -; CHECK-NEXT: vnsrl.wi v24, v16, 0, v0.t -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: li a6, 48 -; CHECK-NEXT: mul a5, a5, a6 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a7, a3, .LBB16_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: li a7, 64 -; CHECK-NEXT: .LBB16_4: -; CHECK-NEXT: li a3, 32 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v16, (a1) -; CHECK-NEXT: addi a5, a7, -32 -; CHECK-NEXT: sltu a6, a7, a5 -; CHECK-NEXT: addi a6, a6, -1 -; CHECK-NEXT: and a5, a6, a5 -; CHECK-NEXT: addi a6, a5, -16 -; CHECK-NEXT: sltu t0, a5, a6 -; CHECK-NEXT: addi t0, t0, -1 -; CHECK-NEXT: and a6, t0, a6 -; CHECK-NEXT: vsetvli zero, a6, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vnsrl.wi v24, v8, 0, v0.t -; CHECK-NEXT: csrr a6, vlenb -; CHECK-NEXT: slli a6, a6, 3 -; CHECK-NEXT: add a6, sp, a6 -; CHECK-NEXT: addi a6, a6, 16 -; CHECK-NEXT: vs8r.v v24, (a6) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a5, a2, .LBB16_6 -; CHECK-NEXT: # %bb.5: -; CHECK-NEXT: li a5, 16 -; CHECK-NEXT: .LBB16_6: -; CHECK-NEXT: addi a6, a1, 384 -; CHECK-NEXT: addi a1, a1, 256 -; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: li t0, 40 -; CHECK-NEXT: mul a5, a5, t0 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a4, a3, .LBB16_8 -; CHECK-NEXT: # %bb.7: -; CHECK-NEXT: li a4, 32 -; CHECK-NEXT: .LBB16_8: -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v4, v3, 2 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v16, (a6) -; CHECK-NEXT: vle64.v v24, (a1) -; CHECK-NEXT: mv a1, a4 -; CHECK-NEXT: bltu a4, a2, .LBB16_10 -; CHECK-NEXT: # %bb.9: -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: .LBB16_10: -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v2, v1, 2 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v3 -; CHECK-NEXT: vnsrl.wi v8, v24, 0, v0.t -; CHECK-NEXT: addi a1, a4, -16 -; CHECK-NEXT: sltu a4, a4, a1 -; CHECK-NEXT: addi a4, a4, -1 -; CHECK-NEXT: and a1, a4, a1 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vnsrl.wi v24, v16, 0, v0.t -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a7, a3, .LBB16_12 -; CHECK-NEXT: # %bb.11: -; CHECK-NEXT: li a7, 32 -; CHECK-NEXT: .LBB16_12: -; CHECK-NEXT: vsetvli zero, a3, e32, m8, tu, ma -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a4, 48 -; CHECK-NEXT: mul a1, a1, a4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vslideup.vi v16, v24, 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a4, 48 -; CHECK-NEXT: mul a1, a1, a4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a4, 40 -; CHECK-NEXT: mul a1, a1, a4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vslideup.vi v16, v24, 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a4, 40 -; CHECK-NEXT: mul a1, a1, a4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vslideup.vi v8, v16, 16 -; CHECK-NEXT: addi a1, a7, -16 -; CHECK-NEXT: sltu a4, a7, a1 -; CHECK-NEXT: addi a4, a4, -1 -; CHECK-NEXT: and a1, a4, a1 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a4, 24 -; CHECK-NEXT: mul a1, a1, a4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vnsrl.wi v24, v16, 0, v0.t -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a7, a2, .LBB16_14 -; CHECK-NEXT: # %bb.13: -; CHECK-NEXT: li a7, 16 -; CHECK-NEXT: .LBB16_14: -; CHECK-NEXT: vsetvli zero, a7, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 5 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vnsrl.wi v24, v16, 0, v0.t -; CHECK-NEXT: vsetvli zero, a3, e32, m8, tu, ma -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vslideup.vi v24, v16, 16 -; CHECK-NEXT: vse32.v v24, (a0) -; CHECK-NEXT: addi a1, a0, 256 -; CHECK-NEXT: vse32.v v8, (a1) -; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a3, 40 -; CHECK-NEXT: mul a2, a2, a3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; CHECK-NEXT: vse32.v v8, (a1) -; CHECK-NEXT: addi a0, a0, 384 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a2, 48 -; CHECK-NEXT: mul a1, a1, a2 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vse32.v v8, (a0) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 56 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret %v = call <128 x i32> @llvm.vp.trunc.v128i32.v128i64(<128 x i64> %a, <128 x i1> %m, i32 %vl) ret <128 x i32> %v } @@ -499,44 +205,6 @@ declare <32 x i32> @llvm.vp.trunc.v32i32.v32i64(<32 x i64>, <32 x i1>, i32) define <32 x i32> @vtrunc_v32i32_v32i64(<32 x i64> %a, <32 x i1> %m, i32 zeroext %vl) { -; CHECK-LABEL: vtrunc_v32i32_v32i64: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a2, a0, a1 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a1, a2, a1 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB17_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: .LBB17_2: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vnsrl.wi v16, v24, 0, v0.t -; CHECK-NEXT: li a0, 32 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, ma -; CHECK-NEXT: vslideup.vi v16, v8, 16 -; CHECK-NEXT: vmv8r.v v8, v16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret %v = call <32 x i32> @llvm.vp.trunc.v32i32.v32i64(<32 x i64> %a, <32 x i1> %m, i32 %vl) ret <32 x i32> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll @@ -659,136 +659,11 @@ declare <32 x double> @llvm.vp.ceil.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vp_ceil_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_ceil_v32f64: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: vslidedown.vi v1, v0, 2 -; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: bltu a0, a2, .LBB26_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: lui a2, %hi(.LCPI26_0) -; CHECK-NEXT: fld ft0, %lo(.LCPI26_0)(a2) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v25, v16, ft0, v0.t -; CHECK-NEXT: fsrmi a1, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a0, a0, a1 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v0, v1 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmflt.vf v1, v24, ft0, v0.t -; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.ceil.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) ret <32 x double> %v } define <32 x double> @vp_ceil_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_ceil_v32f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: vmset.m v1 -; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: bltu a0, a2, .LBB27_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: .LBB27_2: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; CHECK-NEXT: lui a2, %hi(.LCPI27_0) -; CHECK-NEXT: fld ft0, %lo(.LCPI27_0)(a2) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v2, v1 -; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t -; CHECK-NEXT: fsrmi a1, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a0, a0, a1 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 -; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v1, v24, ft0, v0.t -; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer %v = call <32 x double> @llvm.vp.ceil.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll @@ -659,136 +659,11 @@ declare <32 x double> @llvm.vp.floor.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vp_floor_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_floor_v32f64: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: vslidedown.vi v1, v0, 2 -; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: bltu a0, a2, .LBB26_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: lui a2, %hi(.LCPI26_0) -; CHECK-NEXT: fld ft0, %lo(.LCPI26_0)(a2) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v25, v16, ft0, v0.t -; CHECK-NEXT: fsrmi a1, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a0, a0, a1 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v0, v1 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmflt.vf v1, v24, ft0, v0.t -; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.floor.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) ret <32 x double> %v } define <32 x double> @vp_floor_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_floor_v32f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: vmset.m v1 -; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: bltu a0, a2, .LBB27_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: .LBB27_2: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; CHECK-NEXT: lui a2, %hi(.LCPI27_0) -; CHECK-NEXT: fld ft0, %lo(.LCPI27_0)(a2) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v2, v1 -; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t -; CHECK-NEXT: fsrmi a1, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a0, a0, a1 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 -; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v1, v24, ft0, v0.t -; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer %v = call <32 x double> @llvm.vp.floor.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll @@ -263,7 +263,7 @@ ; RV32-V128-NEXT: csrr a0, vlenb ; RV32-V128-NEXT: slli a0, a0, 4 ; RV32-V128-NEXT: sub sp, sp, a0 -; RV32-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV32-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * VLENB ; RV32-V128-NEXT: lui a0, %hi(.LCPI10_0) ; RV32-V128-NEXT: addi a0, a0, %lo(.LCPI10_0) ; RV32-V128-NEXT: li a1, 32 @@ -290,12 +290,12 @@ ; RV32-V128-NEXT: slli a0, a0, 3 ; RV32-V128-NEXT: add a0, sp, a0 ; RV32-V128-NEXT: addi a0, a0, 16 -; RV32-V128-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-V128-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload ; RV32-V128-NEXT: vrgather.vv v8, v16, v24, v0.t ; RV32-V128-NEXT: vmv.v.v v24, v8 ; RV32-V128-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-V128-NEXT: addi a0, sp, 16 -; RV32-V128-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-V128-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload ; RV32-V128-NEXT: vwaddu.vv v0, v8, v16 ; RV32-V128-NEXT: li a0, -1 ; RV32-V128-NEXT: vwmaccu.vx v0, a0, v16 @@ -304,6 +304,7 @@ ; RV32-V128-NEXT: csrr a0, vlenb ; RV32-V128-NEXT: slli a0, a0, 4 ; RV32-V128-NEXT: add sp, sp, a0 +; RV32-V128-NEXT: .cfi_def_cfa_offset 16 ; RV32-V128-NEXT: addi sp, sp, 16 ; RV32-V128-NEXT: ret ; @@ -314,7 +315,7 @@ ; RV64-V128-NEXT: csrr a0, vlenb ; RV64-V128-NEXT: slli a0, a0, 4 ; RV64-V128-NEXT: sub sp, sp, a0 -; RV64-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * VLENB ; RV64-V128-NEXT: lui a0, %hi(.LCPI10_0) ; RV64-V128-NEXT: addi a0, a0, %lo(.LCPI10_0) ; RV64-V128-NEXT: li a1, 32 @@ -341,12 +342,12 @@ ; RV64-V128-NEXT: slli a0, a0, 3 ; RV64-V128-NEXT: add a0, sp, a0 ; RV64-V128-NEXT: addi a0, a0, 16 -; RV64-V128-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV64-V128-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload ; RV64-V128-NEXT: vrgather.vv v8, v16, v24, v0.t ; RV64-V128-NEXT: vmv.v.v v24, v8 ; RV64-V128-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV64-V128-NEXT: addi a0, sp, 16 -; RV64-V128-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV64-V128-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload ; RV64-V128-NEXT: vwaddu.vv v0, v8, v16 ; RV64-V128-NEXT: li a0, -1 ; RV64-V128-NEXT: vwmaccu.vx v0, a0, v16 @@ -355,6 +356,7 @@ ; RV64-V128-NEXT: csrr a0, vlenb ; RV64-V128-NEXT: slli a0, a0, 4 ; RV64-V128-NEXT: add sp, sp, a0 +; RV64-V128-NEXT: .cfi_def_cfa_offset 16 ; RV64-V128-NEXT: addi sp, sp, 16 ; RV64-V128-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll @@ -369,7 +369,7 @@ ; RV32-V128-NEXT: csrr a0, vlenb ; RV32-V128-NEXT: slli a0, a0, 4 ; RV32-V128-NEXT: sub sp, sp, a0 -; RV32-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV32-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * VLENB ; RV32-V128-NEXT: lui a0, %hi(.LCPI15_0) ; RV32-V128-NEXT: addi a0, a0, %lo(.LCPI15_0) ; RV32-V128-NEXT: li a1, 32 @@ -396,12 +396,12 @@ ; RV32-V128-NEXT: slli a0, a0, 3 ; RV32-V128-NEXT: add a0, sp, a0 ; RV32-V128-NEXT: addi a0, a0, 16 -; RV32-V128-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-V128-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload ; RV32-V128-NEXT: vrgather.vv v8, v16, v24, v0.t ; RV32-V128-NEXT: vmv.v.v v24, v8 ; RV32-V128-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-V128-NEXT: addi a0, sp, 16 -; RV32-V128-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-V128-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload ; RV32-V128-NEXT: vwaddu.vv v0, v8, v16 ; RV32-V128-NEXT: li a0, -1 ; RV32-V128-NEXT: vwmaccu.vx v0, a0, v16 @@ -410,6 +410,7 @@ ; RV32-V128-NEXT: csrr a0, vlenb ; RV32-V128-NEXT: slli a0, a0, 4 ; RV32-V128-NEXT: add sp, sp, a0 +; RV32-V128-NEXT: .cfi_def_cfa_offset 16 ; RV32-V128-NEXT: addi sp, sp, 16 ; RV32-V128-NEXT: ret ; @@ -420,7 +421,7 @@ ; RV64-V128-NEXT: csrr a0, vlenb ; RV64-V128-NEXT: slli a0, a0, 4 ; RV64-V128-NEXT: sub sp, sp, a0 -; RV64-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * VLENB ; RV64-V128-NEXT: lui a0, %hi(.LCPI15_0) ; RV64-V128-NEXT: addi a0, a0, %lo(.LCPI15_0) ; RV64-V128-NEXT: li a1, 32 @@ -447,12 +448,12 @@ ; RV64-V128-NEXT: slli a0, a0, 3 ; RV64-V128-NEXT: add a0, sp, a0 ; RV64-V128-NEXT: addi a0, a0, 16 -; RV64-V128-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV64-V128-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload ; RV64-V128-NEXT: vrgather.vv v8, v16, v24, v0.t ; RV64-V128-NEXT: vmv.v.v v24, v8 ; RV64-V128-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV64-V128-NEXT: addi a0, sp, 16 -; RV64-V128-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV64-V128-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload ; RV64-V128-NEXT: vwaddu.vv v0, v8, v16 ; RV64-V128-NEXT: li a0, -1 ; RV64-V128-NEXT: vwmaccu.vx v0, a0, v16 @@ -461,6 +462,7 @@ ; RV64-V128-NEXT: csrr a0, vlenb ; RV64-V128-NEXT: slli a0, a0, 4 ; RV64-V128-NEXT: add sp, sp, a0 +; RV64-V128-NEXT: .cfi_def_cfa_offset 16 ; RV64-V128-NEXT: addi sp, sp, 16 ; RV64-V128-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll @@ -607,117 +607,11 @@ declare <32 x double> @llvm.vp.rint.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vp_rint_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_rint_v32f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v2, v0 -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: vslidedown.vi v1, v0, 2 -; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: bltu a0, a2, .LBB26_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; CHECK-NEXT: lui a2, %hi(.LCPI26_0) -; CHECK-NEXT: fld ft0, %lo(.LCPI26_0)(a2) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a0, a0, a1 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 -; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v1, v24, ft0, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.rint.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) ret <32 x double> %v } define <32 x double> @vp_rint_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_rint_v32f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: vmset.m v1 -; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: bltu a0, a2, .LBB27_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: .LBB27_2: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; CHECK-NEXT: lui a2, %hi(.LCPI27_0) -; CHECK-NEXT: fld ft0, %lo(.LCPI27_0)(a2) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v2, v1 -; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a0, a0, a1 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 -; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v1, v24, ft0, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer %v = call <32 x double> @llvm.vp.rint.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll @@ -659,136 +659,11 @@ declare <32 x double> @llvm.vp.round.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vp_round_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_round_v32f64: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: vslidedown.vi v1, v0, 2 -; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: bltu a0, a2, .LBB26_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: lui a2, %hi(.LCPI26_0) -; CHECK-NEXT: fld ft0, %lo(.LCPI26_0)(a2) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v25, v16, ft0, v0.t -; CHECK-NEXT: fsrmi a1, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a0, a0, a1 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v0, v1 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmflt.vf v1, v24, ft0, v0.t -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.round.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) ret <32 x double> %v } define <32 x double> @vp_round_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_round_v32f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: vmset.m v1 -; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: bltu a0, a2, .LBB27_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: .LBB27_2: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; CHECK-NEXT: lui a2, %hi(.LCPI27_0) -; CHECK-NEXT: fld ft0, %lo(.LCPI27_0)(a2) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v2, v1 -; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t -; CHECK-NEXT: fsrmi a1, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a0, a0, a1 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 -; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v1, v24, ft0, v0.t -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer %v = call <32 x double> @llvm.vp.round.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll @@ -659,136 +659,11 @@ declare <32 x double> @llvm.vp.roundeven.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vp_roundeven_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundeven_v32f64: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: vslidedown.vi v1, v0, 2 -; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: bltu a0, a2, .LBB26_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: lui a2, %hi(.LCPI26_0) -; CHECK-NEXT: fld ft0, %lo(.LCPI26_0)(a2) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v25, v16, ft0, v0.t -; CHECK-NEXT: fsrmi a1, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a0, a0, a1 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v0, v1 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmflt.vf v1, v24, ft0, v0.t -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.roundeven.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) ret <32 x double> %v } define <32 x double> @vp_roundeven_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundeven_v32f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: vmset.m v1 -; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: bltu a0, a2, .LBB27_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: .LBB27_2: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; CHECK-NEXT: lui a2, %hi(.LCPI27_0) -; CHECK-NEXT: fld ft0, %lo(.LCPI27_0)(a2) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v2, v1 -; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t -; CHECK-NEXT: fsrmi a1, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a0, a0, a1 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 -; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v1, v24, ft0, v0.t -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer %v = call <32 x double> @llvm.vp.roundeven.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll @@ -659,136 +659,11 @@ declare <32 x double> @llvm.vp.roundtozero.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vp_roundtozero_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundtozero_v32f64: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: vslidedown.vi v1, v0, 2 -; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: bltu a0, a2, .LBB26_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: lui a2, %hi(.LCPI26_0) -; CHECK-NEXT: fld ft0, %lo(.LCPI26_0)(a2) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v25, v16, ft0, v0.t -; CHECK-NEXT: fsrmi a1, 1 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a0, a0, a1 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v0, v1 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmflt.vf v1, v24, ft0, v0.t -; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.roundtozero.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) ret <32 x double> %v } define <32 x double> @vp_roundtozero_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundtozero_v32f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: vmset.m v1 -; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: bltu a0, a2, .LBB27_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: .LBB27_2: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; CHECK-NEXT: lui a2, %hi(.LCPI27_0) -; CHECK-NEXT: fld ft0, %lo(.LCPI27_0)(a2) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v2, v1 -; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t -; CHECK-NEXT: fsrmi a1, 1 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a0, a0, a1 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 -; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v1, v24, ft0, v0.t -; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer %v = call <32 x double> @llvm.vp.roundtozero.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll @@ -319,80 +319,11 @@ declare <32 x double> @llvm.vp.copysign.v32f64(<32 x double>, <32 x double>, <32 x i1>, i32) define <32 x double> @vfsgnj_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vfsgnj_vv_v32f64: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v1, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: vle64.v v8, (a1) -; CHECK-NEXT: addi a1, a2, -16 -; CHECK-NEXT: sltu a3, a2, a1 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a1, a3, a1 -; CHECK-NEXT: vle64.v v24, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: vfsgnj.vv v16, v16, v8, v0.t -; CHECK-NEXT: bltu a2, a0, .LBB26_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfsgnj.vv v8, v8, v24, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.copysign.v32f64(<32 x double> %va, <32 x double> %vb, <32 x i1> %m, i32 %evl) ret <32 x double> %v } define <32 x double> @vfsgnj_vv_v32f64_unmasked(<32 x double> %va, <32 x double> %vb, i32 zeroext %evl) { -; CHECK-LABEL: vfsgnj_vv_v32f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v24, (a1) -; CHECK-NEXT: vle64.v v0, (a0) -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: mv a0, a2 -; CHECK-NEXT: bltu a2, a1, .LBB27_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: .LBB27_2: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfsgnj.vv v8, v8, v0 -; CHECK-NEXT: addi a0, a2, -16 -; CHECK-NEXT: sltu a1, a2, a0 -; CHECK-NEXT: addi a1, a1, -1 -; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfsgnj.vv v16, v16, v24 -; CHECK-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer %v = call <32 x double> @llvm.vp.copysign.v32f64(<32 x double> %va, <32 x double> %vb, <32 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll @@ -319,80 +319,11 @@ declare <32 x double> @llvm.vp.maxnum.v32f64(<32 x double>, <32 x double>, <32 x i1>, i32) define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vfmax_vv_v32f64: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v1, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: vle64.v v8, (a1) -; CHECK-NEXT: addi a1, a2, -16 -; CHECK-NEXT: sltu a3, a2, a1 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a1, a3, a1 -; CHECK-NEXT: vle64.v v24, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: vfmax.vv v16, v16, v8, v0.t -; CHECK-NEXT: bltu a2, a0, .LBB26_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmax.vv v8, v8, v24, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.maxnum.v32f64(<32 x double> %va, <32 x double> %vb, <32 x i1> %m, i32 %evl) ret <32 x double> %v } define <32 x double> @vfmax_vv_v32f64_unmasked(<32 x double> %va, <32 x double> %vb, i32 zeroext %evl) { -; CHECK-LABEL: vfmax_vv_v32f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v24, (a1) -; CHECK-NEXT: vle64.v v0, (a0) -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: mv a0, a2 -; CHECK-NEXT: bltu a2, a1, .LBB27_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: .LBB27_2: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfmax.vv v8, v8, v0 -; CHECK-NEXT: addi a0, a2, -16 -; CHECK-NEXT: sltu a1, a2, a0 -; CHECK-NEXT: addi a1, a1, -1 -; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfmax.vv v16, v16, v24 -; CHECK-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer %v = call <32 x double> @llvm.vp.maxnum.v32f64(<32 x double> %va, <32 x double> %vb, <32 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll @@ -319,80 +319,11 @@ declare <32 x double> @llvm.vp.minnum.v32f64(<32 x double>, <32 x double>, <32 x i1>, i32) define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vfmin_vv_v32f64: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v1, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: vle64.v v8, (a1) -; CHECK-NEXT: addi a1, a2, -16 -; CHECK-NEXT: sltu a3, a2, a1 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a1, a3, a1 -; CHECK-NEXT: vle64.v v24, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: vfmin.vv v16, v16, v8, v0.t -; CHECK-NEXT: bltu a2, a0, .LBB26_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmin.vv v8, v8, v24, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.minnum.v32f64(<32 x double> %va, <32 x double> %vb, <32 x i1> %m, i32 %evl) ret <32 x double> %v } define <32 x double> @vfmin_vv_v32f64_unmasked(<32 x double> %va, <32 x double> %vb, i32 zeroext %evl) { -; CHECK-LABEL: vfmin_vv_v32f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v24, (a1) -; CHECK-NEXT: vle64.v v0, (a0) -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: mv a0, a2 -; CHECK-NEXT: bltu a2, a1, .LBB27_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: .LBB27_2: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfmin.vv v8, v8, v0 -; CHECK-NEXT: addi a0, a2, -16 -; CHECK-NEXT: sltu a1, a2, a0 -; CHECK-NEXT: addi a1, a1, -1 -; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfmin.vv v16, v16, v24 -; CHECK-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer %v = call <32 x double> @llvm.vp.minnum.v32f64(<32 x double> %va, <32 x double> %vb, <32 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwadd.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwadd.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwadd.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwadd.ll @@ -93,7 +93,7 @@ ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 ; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * VLENB ; CHECK-NEXT: li a2, 64 ; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v16, (a0) @@ -107,11 +107,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfwadd.vv v8, v16, v24 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfwadd.vv v16, v24, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %a = load <64 x half>, <64 x half>* %x @@ -194,7 +195,7 @@ ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 ; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * VLENB ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v16, (a0) @@ -206,11 +207,13 @@ ; CHECK-NEXT: vslidedown.vi v0, v24, 16 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vfwadd.vv v8, v16, v24 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfwadd.vv v16, v24, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %a = load <32 x float>, <32 x float>* %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwmul.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwmul.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwmul.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwmul.ll @@ -93,7 +93,7 @@ ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 ; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * VLENB ; CHECK-NEXT: li a2, 64 ; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v16, (a0) @@ -107,11 +107,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfwmul.vv v8, v16, v24 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfwmul.vv v16, v24, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %a = load <64 x half>, <64 x half>* %x @@ -194,7 +195,7 @@ ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 ; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * VLENB ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v16, (a0) @@ -206,11 +207,13 @@ ; CHECK-NEXT: vslidedown.vi v0, v24, 16 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vfwmul.vv v8, v16, v24 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfwmul.vv v16, v24, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %a = load <32 x float>, <32 x float>* %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwsub.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwsub.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwsub.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwsub.ll @@ -93,7 +93,7 @@ ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 ; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * VLENB ; CHECK-NEXT: li a2, 64 ; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v16, (a0) @@ -107,11 +107,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfwsub.vv v8, v16, v24 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfwsub.vv v16, v24, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %a = load <64 x half>, <64 x half>* %x @@ -194,7 +195,7 @@ ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 ; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * VLENB ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v16, (a0) @@ -206,11 +207,13 @@ ; CHECK-NEXT: vslidedown.vi v0, v24, 16 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vfwsub.vv v8, v16, v24 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfwsub.vv v16, v24, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %a = load <32 x float>, <32 x float>* %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll @@ -149,46 +149,6 @@ declare <256 x i8> @llvm.vp.select.v256i8(<256 x i1>, <256 x i8>, <256 x i8>, i32) define <256 x i8> @select_v256i8(<256 x i1> %a, <256 x i8> %b, <256 x i8> %c, i32 zeroext %evl) { -; CHECK-LABEL: select_v256i8: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v2, v8 -; CHECK-NEXT: vmv1r.v v1, v0 -; CHECK-NEXT: li a2, 128 -; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma -; CHECK-NEXT: vle8.v v24, (a0) -; CHECK-NEXT: addi a0, a1, 128 -; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: addi a0, a3, -128 -; CHECK-NEXT: sltu a4, a3, a0 -; CHECK-NEXT: addi a4, a4, -1 -; CHECK-NEXT: vle8.v v16, (a1) -; CHECK-NEXT: and a0, a4, a0 -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmerge.vvm v24, v8, v24, v0 -; CHECK-NEXT: bltu a3, a2, .LBB11_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a3, 128 -; CHECK-NEXT: .LBB11_2: -; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 -; CHECK-NEXT: vmv8r.v v16, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret %v = call <256 x i8> @llvm.vp.select.v256i8(<256 x i1> %a, <256 x i8> %b, <256 x i8> %c, i32 %evl) ret <256 x i8> %v } @@ -202,7 +162,7 @@ ; CHECK-NEXT: li a3, 24 ; CHECK-NEXT: mul a2, a2, a3 ; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * VLENB ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v24, (a0) @@ -228,7 +188,7 @@ ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmerge.vvm v24, v24, v16, v0 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v9 @@ -236,15 +196,16 @@ ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vmv8r.v v16, v24 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 24 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call <256 x i8> @llvm.vp.select.v256i8(<256 x i1> %a, <256 x i8> %b, <256 x i8> %c, i32 129) @@ -398,54 +359,6 @@ declare <32 x i64> @llvm.vp.select.v32i64(<32 x i1>, <32 x i64>, <32 x i64>, i32) define <32 x i64> @select_v32i64(<32 x i1> %a, <32 x i64> %b, <32 x i64> %c, i32 zeroext %evl) { -; CHECK-LABEL: select_v32i64: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: addi a1, a2, -16 -; CHECK-NEXT: sltu a3, a2, a1 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a1, a3, a1 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0 -; CHECK-NEXT: bltu a2, a0, .LBB25_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: .LBB25_2: -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.select.v32i64(<32 x i1> %a, <32 x i64> %b, <32 x i64> %c, i32 %evl) ret <32 x i64> %v } @@ -566,54 +479,6 @@ declare <64 x float> @llvm.vp.select.v64f32(<64 x i1>, <64 x float>, <64 x float>, i32) define <64 x float> @select_v64f32(<64 x i1> %a, <64 x float> %b, <64 x float> %c, i32 zeroext %evl) { -; CHECK-LABEL: select_v64f32: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: addi a1, a2, -32 -; CHECK-NEXT: sltu a3, a2, a1 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a1, a3, a1 -; CHECK-NEXT: li a3, 32 -; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: addi a4, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 4 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0 -; CHECK-NEXT: bltu a2, a3, .LBB35_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a2, 32 -; CHECK-NEXT: .LBB35_2: -; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret %v = call <64 x float> @llvm.vp.select.v64f32(<64 x i1> %a, <64 x float> %b, <64 x float> %c, i32 %evl) ret <64 x float> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll @@ -277,7 +277,7 @@ ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 ; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * VLENB ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v16, (a0) @@ -291,11 +291,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-NEXT: vwmul.vv v8, v16, v24 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwmul.vv v16, v24, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %a = load <128 x i8>, <128 x i8>* %x @@ -314,7 +315,7 @@ ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 ; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * VLENB ; CHECK-NEXT: li a2, 64 ; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v16, (a0) @@ -328,11 +329,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vwmul.vv v8, v16, v24 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwmul.vv v16, v24, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %a = load <64 x i16>, <64 x i16>* %x @@ -351,7 +353,7 @@ ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 ; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * VLENB ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v16, (a0) @@ -363,11 +365,13 @@ ; CHECK-NEXT: vslidedown.vi v0, v24, 16 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vwmul.vv v8, v16, v24 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwmul.vv v16, v24, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %a = load <32 x i32>, <32 x i32>* %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll @@ -269,7 +269,7 @@ ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 ; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * VLENB ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v16, (a0) @@ -283,11 +283,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-NEXT: vwmulsu.vv v8, v24, v16 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwmulsu.vv v16, v0, v24 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %a = load <128 x i8>, <128 x i8>* %x @@ -306,7 +307,7 @@ ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 ; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * VLENB ; CHECK-NEXT: li a2, 64 ; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v16, (a0) @@ -320,11 +321,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vwmulsu.vv v8, v24, v16 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwmulsu.vv v16, v0, v24 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %a = load <64 x i16>, <64 x i16>* %x @@ -343,7 +345,7 @@ ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 ; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * VLENB ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v16, (a0) @@ -355,11 +357,13 @@ ; CHECK-NEXT: vslidedown.vi v0, v24, 16 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vwmulsu.vv v8, v24, v16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwmulsu.vv v16, v0, v24 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %a = load <32 x i32>, <32 x i32>* %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll @@ -253,7 +253,7 @@ ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 ; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * VLENB ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v16, (a0) @@ -267,11 +267,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-NEXT: vwmulu.vv v8, v16, v24 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwmulu.vv v16, v24, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %a = load <128 x i8>, <128 x i8>* %x @@ -290,7 +291,7 @@ ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 ; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * VLENB ; CHECK-NEXT: li a2, 64 ; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v16, (a0) @@ -304,11 +305,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vwmulu.vv v8, v16, v24 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwmulu.vv v16, v24, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %a = load <64 x i16>, <64 x i16>* %x @@ -327,7 +329,7 @@ ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 ; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * VLENB ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v16, (a0) @@ -339,11 +341,13 @@ ; CHECK-NEXT: vslidedown.vi v0, v24, 16 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vwmulu.vv v8, v16, v24 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwmulu.vv v16, v24, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %a = load <32 x i32>, <32 x i32>* %x diff --git a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll @@ -732,103 +732,11 @@ declare @llvm.vp.floor.nxv16f64(, , i32) define @vp_floor_nxv16f64( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_floor_nxv16f64: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; CHECK-NEXT: vmv1r.v v1, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a2, a1, 3 -; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v2, v0, a2 -; CHECK-NEXT: sub a2, a0, a1 -; CHECK-NEXT: sltu a3, a0, a2 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: lui a3, %hi(.LCPI32_0) -; CHECK-NEXT: fld ft0, %lo(.LCPI32_0)(a3) -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t -; CHECK-NEXT: fsrmi a2, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a0, a1, .LBB32_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB32_2: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 -; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v1, v16, ft0, v0.t -; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret %v = call @llvm.vp.floor.nxv16f64( %va, %m, i32 %evl) ret %v } define @vp_floor_nxv16f64_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_floor_nxv16f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: sub a2, a0, a1 -; CHECK-NEXT: lui a3, %hi(.LCPI33_0) -; CHECK-NEXT: fld ft0, %lo(.LCPI33_0)(a3) -; CHECK-NEXT: sltu a3, a0, a2 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: vmflt.vf v0, v24, ft0 -; CHECK-NEXT: fsrmi a2, 2 -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB33_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB33_2: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8 -; CHECK-NEXT: vmflt.vf v0, v24, ft0 -; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.floor.nxv16f64( %va, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/localvar.ll b/llvm/test/CodeGen/RISCV/rvv/localvar.ll --- a/llvm/test/CodeGen/RISCV/rvv/localvar.ll +++ b/llvm/test/CodeGen/RISCV/rvv/localvar.ll @@ -10,7 +10,7 @@ ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 1 ; RV64IV-NEXT: sub sp, sp, a0 -; RV64IV-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; RV64IV-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * VLENB ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: add a0, sp, a0 ; RV64IV-NEXT: addi a0, a0, 16 @@ -21,6 +21,7 @@ ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 1 ; RV64IV-NEXT: add sp, sp, a0 +; RV64IV-NEXT: .cfi_def_cfa_offset 16 ; RV64IV-NEXT: addi sp, sp, 16 ; RV64IV-NEXT: ret %local0 = alloca @@ -38,7 +39,7 @@ ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 1 ; RV64IV-NEXT: sub sp, sp, a0 -; RV64IV-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; RV64IV-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * VLENB ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: add a0, sp, a0 ; RV64IV-NEXT: addi a0, a0, 16 @@ -48,6 +49,7 @@ ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 1 ; RV64IV-NEXT: add sp, sp, a0 +; RV64IV-NEXT: .cfi_def_cfa_offset 16 ; RV64IV-NEXT: addi sp, sp, 16 ; RV64IV-NEXT: ret %local0 = alloca @@ -65,7 +67,7 @@ ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 2 ; RV64IV-NEXT: sub sp, sp, a0 -; RV64IV-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; RV64IV-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * VLENB ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 1 ; RV64IV-NEXT: add a0, sp, a0 @@ -76,6 +78,7 @@ ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 2 ; RV64IV-NEXT: add sp, sp, a0 +; RV64IV-NEXT: .cfi_def_cfa_offset 16 ; RV64IV-NEXT: addi sp, sp, 16 ; RV64IV-NEXT: ret %local0 = alloca @@ -161,7 +164,7 @@ ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 2 ; RV64IV-NEXT: sub sp, sp, a0 -; RV64IV-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; RV64IV-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * VLENB ; RV64IV-NEXT: lw a0, 12(sp) ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 1 @@ -174,6 +177,7 @@ ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 2 ; RV64IV-NEXT: add sp, sp, a0 +; RV64IV-NEXT: .cfi_def_cfa_offset 16 ; RV64IV-NEXT: addi sp, sp, 16 ; RV64IV-NEXT: ret %local_scalar0 = alloca i32 diff --git a/llvm/test/CodeGen/RISCV/rvv/round-vp.ll b/llvm/test/CodeGen/RISCV/rvv/round-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/round-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/round-vp.ll @@ -732,103 +732,11 @@ declare @llvm.vp.round.nxv16f64(, , i32) define @vp_round_nxv16f64( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_round_nxv16f64: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; CHECK-NEXT: vmv1r.v v1, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a2, a1, 3 -; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v2, v0, a2 -; CHECK-NEXT: sub a2, a0, a1 -; CHECK-NEXT: sltu a3, a0, a2 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: lui a3, %hi(.LCPI32_0) -; CHECK-NEXT: fld ft0, %lo(.LCPI32_0)(a3) -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t -; CHECK-NEXT: fsrmi a2, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a0, a1, .LBB32_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB32_2: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 -; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v1, v16, ft0, v0.t -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret %v = call @llvm.vp.round.nxv16f64( %va, %m, i32 %evl) ret %v } define @vp_round_nxv16f64_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_round_nxv16f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: sub a2, a0, a1 -; CHECK-NEXT: lui a3, %hi(.LCPI33_0) -; CHECK-NEXT: fld ft0, %lo(.LCPI33_0)(a3) -; CHECK-NEXT: sltu a3, a0, a2 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: vmflt.vf v0, v24, ft0 -; CHECK-NEXT: fsrmi a2, 4 -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB33_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB33_2: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8 -; CHECK-NEXT: vmflt.vf v0, v24, ft0 -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.round.nxv16f64( %va, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll b/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll @@ -732,103 +732,11 @@ declare @llvm.vp.roundeven.nxv16f64(, , i32) define @vp_roundeven_nxv16f64( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundeven_nxv16f64: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; CHECK-NEXT: vmv1r.v v1, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a2, a1, 3 -; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v2, v0, a2 -; CHECK-NEXT: sub a2, a0, a1 -; CHECK-NEXT: sltu a3, a0, a2 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: lui a3, %hi(.LCPI32_0) -; CHECK-NEXT: fld ft0, %lo(.LCPI32_0)(a3) -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t -; CHECK-NEXT: fsrmi a2, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a0, a1, .LBB32_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB32_2: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 -; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v1, v16, ft0, v0.t -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret %v = call @llvm.vp.roundeven.nxv16f64( %va, %m, i32 %evl) ret %v } define @vp_roundeven_nxv16f64_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundeven_nxv16f64_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: sub a2, a0, a1 -; CHECK-NEXT: lui a3, %hi(.LCPI33_0) -; CHECK-NEXT: fld ft0, %lo(.LCPI33_0)(a3) -; CHECK-NEXT: sltu a3, a0, a2 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: vmflt.vf v0, v24, ft0 -; CHECK-NEXT: fsrmi a2, 0 -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB33_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB33_2: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8 -; CHECK-NEXT: vmflt.vf v0, v24, ft0 -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.roundeven.nxv16f64( %va, %m, i32 %evl)