diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp @@ -12,6 +12,44 @@ #include "llvm/Support/MathExtras.h" using namespace llvm; +static int getInstSeqCost(RISCVMatInt::InstSeq &Res, bool HasRVC = true, + bool OptSize = false) { + if (!HasRVC) + return Res.size(); + + int Cost = 0; + for (auto Instr : Res) { + bool Compressed; + switch (Instr.Opc) { + case RISCV::SLLI: + case RISCV::SRLI: + Compressed = true; + break; + case RISCV::ADDI: + case RISCV::ADDIW: + case RISCV::LUI: + Compressed = isInt<6>(Instr.Imm); + break; + default: + Compressed = false; + break; + } + // Two RVC instructions take the same space as one RVI instruction, but + // can take longer to execute than the single RVI instruction. Thus, we + // consider that two RVC instruction are slightly more costly than one + // RVI instruction. For longer sequences of RVC instructions the space + // savings can be worth it, though. The costs below try to model that. + if (!Compressed) + Cost += 100; // Baseline cost of one RVI instruction: 100%. + else { + // We prefer three RVC instructions to two RVI instructions when + // optimizing for size. + Cost += OptSize ? 60 : 70; // 60 or 70% cost of baseline. + } + } + return Cost; +} + // Recursively generate a sequence for materializing an integer. static void generateInstSeqImpl(int64_t Val, const FeatureBitset &ActiveFeatures, @@ -107,7 +145,7 @@ TmpSeq.push_back(RISCVMatInt::Inst(RISCV::SRLI, LeadingZeros)); // Keep the new sequence if it is an improvement. - if (TmpSeq.size() < Res.size()) { + if (getInstSeqCost(TmpSeq) < getInstSeqCost(Res)) { Res = TmpSeq; // A 2 instruction sequence is the best we can do. if (Res.size() <= 2) @@ -121,7 +159,7 @@ TmpSeq.push_back(RISCVMatInt::Inst(RISCV::SRLI, LeadingZeros)); // Keep the new sequence if it is an improvement. - if (TmpSeq.size() < Res.size()) { + if (getInstSeqCost(TmpSeq) < getInstSeqCost(Res)) { Res = TmpSeq; // A 2 instruction sequence is the best we can do. if (Res.size() <= 2) @@ -138,7 +176,7 @@ TmpSeq.push_back(RISCVMatInt::Inst(RISCV::ADDUW, 0)); // Keep the new sequence if it is an improvement. - if (TmpSeq.size() < Res.size()) { + if (getInstSeqCost(TmpSeq) < getInstSeqCost(Res)) { Res = TmpSeq; // A 2 instruction sequence is the best we can do. if (Res.size() <= 2) @@ -161,7 +199,7 @@ for (unsigned ShiftVal = 0; ShiftVal < Size; ShiftVal += PlatRegSize) { APInt Chunk = Val.ashr(ShiftVal).sextOrTrunc(PlatRegSize); InstSeq MatSeq = generateInstSeq(Chunk.getSExtValue(), ActiveFeatures); - Cost += MatSeq.size(); + Cost += getInstSeqCost(MatSeq); } return std::max(1, Cost); } diff --git a/llvm/test/CodeGen/RISCV/add-before-shl.ll b/llvm/test/CodeGen/RISCV/add-before-shl.ll --- a/llvm/test/CodeGen/RISCV/add-before-shl.ll +++ b/llvm/test/CodeGen/RISCV/add-before-shl.ll @@ -38,10 +38,10 @@ ; ; RV64I-LABEL: add_large_const: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a0, a0, 48 -; RV64I-NEXT: lui a1, 4095 -; RV64I-NEXT: slli a1, a1, 36 +; RV64I-NEXT: lui a1, 1 +; RV64I-NEXT: addiw a1, a1, -1 ; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a0, a0, 48 ; RV64I-NEXT: srai a0, a0, 48 ; RV64I-NEXT: ret %1 = add i32 %a, 4095 @@ -61,10 +61,10 @@ ; ; RV64I-LABEL: add_huge_const: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a0, a0, 48 -; RV64I-NEXT: lui a1, 32767 -; RV64I-NEXT: slli a1, a1, 36 +; RV64I-NEXT: lui a1, 8 +; RV64I-NEXT: addiw a1, a1, -1 ; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a0, a0, 48 ; RV64I-NEXT: srai a0, a0, 48 ; RV64I-NEXT: ret %1 = add i32 %a, 32767 @@ -95,26 +95,26 @@ define i128 @add_wide_operand(i128 %a) nounwind { ; RV32I-LABEL: add_wide_operand: ; RV32I: # %bb.0: -; RV32I-NEXT: lw a2, 0(a1) -; RV32I-NEXT: lw a3, 4(a1) -; RV32I-NEXT: lw a6, 12(a1) +; RV32I-NEXT: lw a2, 4(a1) +; RV32I-NEXT: lw a3, 12(a1) +; RV32I-NEXT: lw a4, 0(a1) ; RV32I-NEXT: lw a1, 8(a1) -; RV32I-NEXT: srli a5, a2, 29 -; RV32I-NEXT: slli a4, a3, 3 -; RV32I-NEXT: or a4, a4, a5 -; RV32I-NEXT: srli a3, a3, 29 -; RV32I-NEXT: slli a5, a1, 3 -; RV32I-NEXT: or a3, a5, a3 -; RV32I-NEXT: srli a1, a1, 29 -; RV32I-NEXT: slli a5, a6, 3 -; RV32I-NEXT: or a1, a5, a1 -; RV32I-NEXT: slli a2, a2, 3 -; RV32I-NEXT: lui a5, 128 -; RV32I-NEXT: add a1, a1, a5 +; RV32I-NEXT: lui a5, 16 +; RV32I-NEXT: add a3, a3, a5 +; RV32I-NEXT: slli a3, a3, 3 +; RV32I-NEXT: srli a5, a1, 29 +; RV32I-NEXT: or a6, a3, a5 +; RV32I-NEXT: srli a5, a4, 29 +; RV32I-NEXT: slli a3, a2, 3 +; RV32I-NEXT: or a3, a3, a5 +; RV32I-NEXT: srli a2, a2, 29 +; RV32I-NEXT: slli a1, a1, 3 +; RV32I-NEXT: or a1, a1, a2 +; RV32I-NEXT: slli a2, a4, 3 ; RV32I-NEXT: sw a2, 0(a0) -; RV32I-NEXT: sw a3, 8(a0) -; RV32I-NEXT: sw a4, 4(a0) -; RV32I-NEXT: sw a1, 12(a0) +; RV32I-NEXT: sw a1, 8(a0) +; RV32I-NEXT: sw a3, 4(a0) +; RV32I-NEXT: sw a6, 12(a0) ; RV32I-NEXT: ret ; ; RV64I-LABEL: add_wide_operand: diff --git a/llvm/test/CodeGen/RISCV/atomic-rmw.ll b/llvm/test/CodeGen/RISCV/atomic-rmw.ll --- a/llvm/test/CodeGen/RISCV/atomic-rmw.ll +++ b/llvm/test/CodeGen/RISCV/atomic-rmw.ll @@ -2051,24 +2051,24 @@ ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: addi a5, zero, 24 -; RV32IA-NEXT: sub a3, a5, a3 +; RV32IA-NEXT: sub a3, a4, a3 +; RV32IA-NEXT: addi a3, a3, -231 ; RV32IA-NEXT: .LBB35_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a4, a1, .LBB35_3 +; RV32IA-NEXT: lr.w a4, (a6) +; RV32IA-NEXT: and a5, a4, a7 +; RV32IA-NEXT: mv a2, a4 +; RV32IA-NEXT: sll a5, a5, a3 +; RV32IA-NEXT: sra a5, a5, a3 +; RV32IA-NEXT: bge a5, a1, .LBB35_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB35_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 +; RV32IA-NEXT: xor a2, a4, a1 ; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a2, a4, a2 ; RV32IA-NEXT: .LBB35_3: # in Loop: Header=BB35_1 Depth=1 ; RV32IA-NEXT: sc.w a2, a2, (a6) ; RV32IA-NEXT: bnez a2, .LBB35_1 ; RV32IA-NEXT: # %bb.4: -; RV32IA-NEXT: srl a0, a5, a0 +; RV32IA-NEXT: srl a0, a4, a0 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_max_i8_monotonic: @@ -2199,24 +2199,24 @@ ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: addi a5, zero, 24 -; RV32IA-NEXT: sub a3, a5, a3 +; RV32IA-NEXT: sub a3, a4, a3 +; RV32IA-NEXT: addi a3, a3, -231 ; RV32IA-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a4, a1, .LBB36_3 +; RV32IA-NEXT: lr.w.aq a4, (a6) +; RV32IA-NEXT: and a5, a4, a7 +; RV32IA-NEXT: mv a2, a4 +; RV32IA-NEXT: sll a5, a5, a3 +; RV32IA-NEXT: sra a5, a5, a3 +; RV32IA-NEXT: bge a5, a1, .LBB36_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB36_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 +; RV32IA-NEXT: xor a2, a4, a1 ; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a2, a4, a2 ; RV32IA-NEXT: .LBB36_3: # in Loop: Header=BB36_1 Depth=1 ; RV32IA-NEXT: sc.w a2, a2, (a6) ; RV32IA-NEXT: bnez a2, .LBB36_1 ; RV32IA-NEXT: # %bb.4: -; RV32IA-NEXT: srl a0, a5, a0 +; RV32IA-NEXT: srl a0, a4, a0 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_max_i8_acquire: @@ -2347,24 +2347,24 @@ ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: addi a5, zero, 24 -; RV32IA-NEXT: sub a3, a5, a3 +; RV32IA-NEXT: sub a3, a4, a3 +; RV32IA-NEXT: addi a3, a3, -231 ; RV32IA-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a4, a1, .LBB37_3 +; RV32IA-NEXT: lr.w a4, (a6) +; RV32IA-NEXT: and a5, a4, a7 +; RV32IA-NEXT: mv a2, a4 +; RV32IA-NEXT: sll a5, a5, a3 +; RV32IA-NEXT: sra a5, a5, a3 +; RV32IA-NEXT: bge a5, a1, .LBB37_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB37_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 +; RV32IA-NEXT: xor a2, a4, a1 ; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a2, a4, a2 ; RV32IA-NEXT: .LBB37_3: # in Loop: Header=BB37_1 Depth=1 ; RV32IA-NEXT: sc.w.rl a2, a2, (a6) ; RV32IA-NEXT: bnez a2, .LBB37_1 ; RV32IA-NEXT: # %bb.4: -; RV32IA-NEXT: srl a0, a5, a0 +; RV32IA-NEXT: srl a0, a4, a0 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_max_i8_release: @@ -2495,24 +2495,24 @@ ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: addi a5, zero, 24 -; RV32IA-NEXT: sub a3, a5, a3 +; RV32IA-NEXT: sub a3, a4, a3 +; RV32IA-NEXT: addi a3, a3, -231 ; RV32IA-NEXT: .LBB38_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a4, a1, .LBB38_3 +; RV32IA-NEXT: lr.w.aq a4, (a6) +; RV32IA-NEXT: and a5, a4, a7 +; RV32IA-NEXT: mv a2, a4 +; RV32IA-NEXT: sll a5, a5, a3 +; RV32IA-NEXT: sra a5, a5, a3 +; RV32IA-NEXT: bge a5, a1, .LBB38_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB38_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 +; RV32IA-NEXT: xor a2, a4, a1 ; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a2, a4, a2 ; RV32IA-NEXT: .LBB38_3: # in Loop: Header=BB38_1 Depth=1 ; RV32IA-NEXT: sc.w.rl a2, a2, (a6) ; RV32IA-NEXT: bnez a2, .LBB38_1 ; RV32IA-NEXT: # %bb.4: -; RV32IA-NEXT: srl a0, a5, a0 +; RV32IA-NEXT: srl a0, a4, a0 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_max_i8_acq_rel: @@ -2643,24 +2643,24 @@ ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: addi a5, zero, 24 -; RV32IA-NEXT: sub a3, a5, a3 +; RV32IA-NEXT: sub a3, a4, a3 +; RV32IA-NEXT: addi a3, a3, -231 ; RV32IA-NEXT: .LBB39_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a4, a1, .LBB39_3 +; RV32IA-NEXT: lr.w.aqrl a4, (a6) +; RV32IA-NEXT: and a5, a4, a7 +; RV32IA-NEXT: mv a2, a4 +; RV32IA-NEXT: sll a5, a5, a3 +; RV32IA-NEXT: sra a5, a5, a3 +; RV32IA-NEXT: bge a5, a1, .LBB39_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB39_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 +; RV32IA-NEXT: xor a2, a4, a1 ; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a2, a4, a2 ; RV32IA-NEXT: .LBB39_3: # in Loop: Header=BB39_1 Depth=1 ; RV32IA-NEXT: sc.w.aqrl a2, a2, (a6) ; RV32IA-NEXT: bnez a2, .LBB39_1 ; RV32IA-NEXT: # %bb.4: -; RV32IA-NEXT: srl a0, a5, a0 +; RV32IA-NEXT: srl a0, a4, a0 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_max_i8_seq_cst: @@ -2791,24 +2791,24 @@ ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: addi a5, zero, 24 -; RV32IA-NEXT: sub a3, a5, a3 +; RV32IA-NEXT: sub a3, a4, a3 +; RV32IA-NEXT: addi a3, a3, -231 ; RV32IA-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a1, a4, .LBB40_3 +; RV32IA-NEXT: lr.w a4, (a6) +; RV32IA-NEXT: and a5, a4, a7 +; RV32IA-NEXT: mv a2, a4 +; RV32IA-NEXT: sll a5, a5, a3 +; RV32IA-NEXT: sra a5, a5, a3 +; RV32IA-NEXT: bge a1, a5, .LBB40_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB40_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 +; RV32IA-NEXT: xor a2, a4, a1 ; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a2, a4, a2 ; RV32IA-NEXT: .LBB40_3: # in Loop: Header=BB40_1 Depth=1 ; RV32IA-NEXT: sc.w a2, a2, (a6) ; RV32IA-NEXT: bnez a2, .LBB40_1 ; RV32IA-NEXT: # %bb.4: -; RV32IA-NEXT: srl a0, a5, a0 +; RV32IA-NEXT: srl a0, a4, a0 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_min_i8_monotonic: @@ -2939,24 +2939,24 @@ ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: addi a5, zero, 24 -; RV32IA-NEXT: sub a3, a5, a3 +; RV32IA-NEXT: sub a3, a4, a3 +; RV32IA-NEXT: addi a3, a3, -231 ; RV32IA-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a1, a4, .LBB41_3 +; RV32IA-NEXT: lr.w.aq a4, (a6) +; RV32IA-NEXT: and a5, a4, a7 +; RV32IA-NEXT: mv a2, a4 +; RV32IA-NEXT: sll a5, a5, a3 +; RV32IA-NEXT: sra a5, a5, a3 +; RV32IA-NEXT: bge a1, a5, .LBB41_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB41_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 +; RV32IA-NEXT: xor a2, a4, a1 ; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a2, a4, a2 ; RV32IA-NEXT: .LBB41_3: # in Loop: Header=BB41_1 Depth=1 ; RV32IA-NEXT: sc.w a2, a2, (a6) ; RV32IA-NEXT: bnez a2, .LBB41_1 ; RV32IA-NEXT: # %bb.4: -; RV32IA-NEXT: srl a0, a5, a0 +; RV32IA-NEXT: srl a0, a4, a0 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_min_i8_acquire: @@ -3087,24 +3087,24 @@ ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: addi a5, zero, 24 -; RV32IA-NEXT: sub a3, a5, a3 +; RV32IA-NEXT: sub a3, a4, a3 +; RV32IA-NEXT: addi a3, a3, -231 ; RV32IA-NEXT: .LBB42_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a1, a4, .LBB42_3 +; RV32IA-NEXT: lr.w a4, (a6) +; RV32IA-NEXT: and a5, a4, a7 +; RV32IA-NEXT: mv a2, a4 +; RV32IA-NEXT: sll a5, a5, a3 +; RV32IA-NEXT: sra a5, a5, a3 +; RV32IA-NEXT: bge a1, a5, .LBB42_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB42_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 +; RV32IA-NEXT: xor a2, a4, a1 ; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a2, a4, a2 ; RV32IA-NEXT: .LBB42_3: # in Loop: Header=BB42_1 Depth=1 ; RV32IA-NEXT: sc.w.rl a2, a2, (a6) ; RV32IA-NEXT: bnez a2, .LBB42_1 ; RV32IA-NEXT: # %bb.4: -; RV32IA-NEXT: srl a0, a5, a0 +; RV32IA-NEXT: srl a0, a4, a0 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_min_i8_release: @@ -3235,24 +3235,24 @@ ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: addi a5, zero, 24 -; RV32IA-NEXT: sub a3, a5, a3 +; RV32IA-NEXT: sub a3, a4, a3 +; RV32IA-NEXT: addi a3, a3, -231 ; RV32IA-NEXT: .LBB43_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a1, a4, .LBB43_3 +; RV32IA-NEXT: lr.w.aq a4, (a6) +; RV32IA-NEXT: and a5, a4, a7 +; RV32IA-NEXT: mv a2, a4 +; RV32IA-NEXT: sll a5, a5, a3 +; RV32IA-NEXT: sra a5, a5, a3 +; RV32IA-NEXT: bge a1, a5, .LBB43_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB43_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 +; RV32IA-NEXT: xor a2, a4, a1 ; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a2, a4, a2 ; RV32IA-NEXT: .LBB43_3: # in Loop: Header=BB43_1 Depth=1 ; RV32IA-NEXT: sc.w.rl a2, a2, (a6) ; RV32IA-NEXT: bnez a2, .LBB43_1 ; RV32IA-NEXT: # %bb.4: -; RV32IA-NEXT: srl a0, a5, a0 +; RV32IA-NEXT: srl a0, a4, a0 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_min_i8_acq_rel: @@ -3383,24 +3383,24 @@ ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: addi a5, zero, 24 -; RV32IA-NEXT: sub a3, a5, a3 +; RV32IA-NEXT: sub a3, a4, a3 +; RV32IA-NEXT: addi a3, a3, -231 ; RV32IA-NEXT: .LBB44_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a1, a4, .LBB44_3 +; RV32IA-NEXT: lr.w.aqrl a4, (a6) +; RV32IA-NEXT: and a5, a4, a7 +; RV32IA-NEXT: mv a2, a4 +; RV32IA-NEXT: sll a5, a5, a3 +; RV32IA-NEXT: sra a5, a5, a3 +; RV32IA-NEXT: bge a1, a5, .LBB44_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB44_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 +; RV32IA-NEXT: xor a2, a4, a1 ; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a2, a4, a2 ; RV32IA-NEXT: .LBB44_3: # in Loop: Header=BB44_1 Depth=1 ; RV32IA-NEXT: sc.w.aqrl a2, a2, (a6) ; RV32IA-NEXT: bnez a2, .LBB44_1 ; RV32IA-NEXT: # %bb.4: -; RV32IA-NEXT: srl a0, a5, a0 +; RV32IA-NEXT: srl a0, a4, a0 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_min_i8_seq_cst: diff --git a/llvm/test/CodeGen/RISCV/atomic-signext.ll b/llvm/test/CodeGen/RISCV/atomic-signext.ll --- a/llvm/test/CodeGen/RISCV/atomic-signext.ll +++ b/llvm/test/CodeGen/RISCV/atomic-signext.ll @@ -628,24 +628,24 @@ ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: addi a5, zero, 24 -; RV32IA-NEXT: sub a3, a5, a3 +; RV32IA-NEXT: sub a3, a4, a3 +; RV32IA-NEXT: addi a3, a3, -231 ; RV32IA-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a4, a1, .LBB10_3 +; RV32IA-NEXT: lr.w a4, (a6) +; RV32IA-NEXT: and a5, a4, a7 +; RV32IA-NEXT: mv a2, a4 +; RV32IA-NEXT: sll a5, a5, a3 +; RV32IA-NEXT: sra a5, a5, a3 +; RV32IA-NEXT: bge a5, a1, .LBB10_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 +; RV32IA-NEXT: xor a2, a4, a1 ; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a2, a4, a2 ; RV32IA-NEXT: .LBB10_3: # in Loop: Header=BB10_1 Depth=1 ; RV32IA-NEXT: sc.w a2, a2, (a6) ; RV32IA-NEXT: bnez a2, .LBB10_1 ; RV32IA-NEXT: # %bb.4: -; RV32IA-NEXT: srl a0, a5, a0 +; RV32IA-NEXT: srl a0, a4, a0 ; RV32IA-NEXT: slli a0, a0, 24 ; RV32IA-NEXT: srai a0, a0, 24 ; RV32IA-NEXT: ret @@ -782,24 +782,24 @@ ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: addi a5, zero, 24 -; RV32IA-NEXT: sub a3, a5, a3 +; RV32IA-NEXT: sub a3, a4, a3 +; RV32IA-NEXT: addi a3, a3, -231 ; RV32IA-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a1, a4, .LBB11_3 +; RV32IA-NEXT: lr.w a4, (a6) +; RV32IA-NEXT: and a5, a4, a7 +; RV32IA-NEXT: mv a2, a4 +; RV32IA-NEXT: sll a5, a5, a3 +; RV32IA-NEXT: sra a5, a5, a3 +; RV32IA-NEXT: bge a1, a5, .LBB11_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 +; RV32IA-NEXT: xor a2, a4, a1 ; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a2, a4, a2 ; RV32IA-NEXT: .LBB11_3: # in Loop: Header=BB11_1 Depth=1 ; RV32IA-NEXT: sc.w a2, a2, (a6) ; RV32IA-NEXT: bnez a2, .LBB11_1 ; RV32IA-NEXT: # %bb.4: -; RV32IA-NEXT: srl a0, a5, a0 +; RV32IA-NEXT: srl a0, a4, a0 ; RV32IA-NEXT: slli a0, a0, 24 ; RV32IA-NEXT: srai a0, a0, 24 ; RV32IA-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/div.ll b/llvm/test/CodeGen/RISCV/div.ll --- a/llvm/test/CodeGen/RISCV/div.ll +++ b/llvm/test/CodeGen/RISCV/div.ll @@ -79,10 +79,10 @@ ; RV64IM: # %bb.0: ; RV64IM-NEXT: slli a0, a0, 32 ; RV64IM-NEXT: srli a0, a0, 32 -; RV64IM-NEXT: lui a1, 205 +; RV64IM-NEXT: lui a1, 838861 ; RV64IM-NEXT: addiw a1, a1, -819 -; RV64IM-NEXT: slli a1, a1, 12 -; RV64IM-NEXT: addi a1, a1, -819 +; RV64IM-NEXT: slli a1, a1, 32 +; RV64IM-NEXT: srli a1, a1, 32 ; RV64IM-NEXT: mul a0, a0, a1 ; RV64IM-NEXT: srli a0, a0, 34 ; RV64IM-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll --- a/llvm/test/CodeGen/RISCV/rv64zba.ll +++ b/llvm/test/CodeGen/RISCV/rv64zba.ll @@ -912,10 +912,10 @@ define i64 @imm_zextw2() nounwind { ; RV64I-LABEL: imm_zextw2: ; RV64I: # %bb.0: -; RV64I-NEXT: lui a0, 171 -; RV64I-NEXT: addiw a0, a0, -1365 -; RV64I-NEXT: slli a0, a0, 12 -; RV64I-NEXT: addi a0, a0, -1366 +; RV64I-NEXT: lui a0, 873813 +; RV64I-NEXT: addiw a0, a0, 1365 +; RV64I-NEXT: slli a0, a0, 33 +; RV64I-NEXT: srli a0, a0, 32 ; RV64I-NEXT: ret ; ; RV64IB-LABEL: imm_zextw2: diff --git a/llvm/test/CodeGen/RISCV/rv64zbp.ll b/llvm/test/CodeGen/RISCV/rv64zbp.ll --- a/llvm/test/CodeGen/RISCV/rv64zbp.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbp.ll @@ -2910,10 +2910,10 @@ ; RV64I-NEXT: addiw a1, a1, -241 ; RV64I-NEXT: and a1, a0, a1 ; RV64I-NEXT: slli a1, a1, 4 -; RV64I-NEXT: lui a2, 241 +; RV64I-NEXT: lui a2, 1044721 ; RV64I-NEXT: addiw a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 240 +; RV64I-NEXT: slli a2, a2, 36 +; RV64I-NEXT: srli a2, a2, 32 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: srli a0, a0, 4 ; RV64I-NEXT: or a0, a0, a1 @@ -2971,10 +2971,10 @@ ; RV64I-NEXT: addiw a2, a2, -241 ; RV64I-NEXT: and a2, a0, a2 ; RV64I-NEXT: slli a2, a2, 4 -; RV64I-NEXT: lui a3, 241 +; RV64I-NEXT: lui a3, 1044721 ; RV64I-NEXT: addiw a3, a3, -241 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 240 +; RV64I-NEXT: slli a3, a3, 36 +; RV64I-NEXT: srli a3, a3, 32 ; RV64I-NEXT: and a0, a0, a3 ; RV64I-NEXT: srli a0, a0, 4 ; RV64I-NEXT: or a0, a0, a2 @@ -3214,10 +3214,10 @@ ; RV64I-NEXT: addiw a1, a1, -241 ; RV64I-NEXT: and a1, a0, a1 ; RV64I-NEXT: slli a1, a1, 4 -; RV64I-NEXT: lui a3, 241 +; RV64I-NEXT: lui a3, 1044721 ; RV64I-NEXT: addiw a3, a3, -241 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 240 +; RV64I-NEXT: slli a3, a3, 36 +; RV64I-NEXT: srli a3, a3, 32 ; RV64I-NEXT: and a0, a0, a3 ; RV64I-NEXT: srli a0, a0, 4 ; RV64I-NEXT: or a0, a0, a1 @@ -3704,15 +3704,16 @@ ; RV64I-NEXT: and a1, a0, a1 ; RV64I-NEXT: slli a2, a0, 8 ; RV64I-NEXT: addi a3, zero, 255 -; RV64I-NEXT: slli a4, a3, 32 -; RV64I-NEXT: addi a4, a4, 255 -; RV64I-NEXT: slli a4, a4, 16 -; RV64I-NEXT: and a2, a2, a4 +; RV64I-NEXT: slli a3, a3, 32 +; RV64I-NEXT: addi a3, a3, 255 +; RV64I-NEXT: slli a3, a3, 16 +; RV64I-NEXT: and a2, a2, a3 ; RV64I-NEXT: srli a0, a0, 8 +; RV64I-NEXT: addi a3, zero, -1 +; RV64I-NEXT: slli a3, a3, 32 +; RV64I-NEXT: addi a3, a3, 255 ; RV64I-NEXT: slli a3, a3, 24 -; RV64I-NEXT: addi a3, a3, 1 -; RV64I-NEXT: slli a3, a3, 16 -; RV64I-NEXT: addi a3, a3, -256 +; RV64I-NEXT: srli a3, a3, 16 ; RV64I-NEXT: and a0, a0, a3 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: or a0, a2, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll @@ -227,10 +227,10 @@ ; LMULMAX2-RV64-NEXT: addiw a1, a1, -241 ; LMULMAX2-RV64-NEXT: vand.vx v26, v25, a1 ; LMULMAX2-RV64-NEXT: vsll.vi v26, v26, 4 -; LMULMAX2-RV64-NEXT: lui a1, 241 +; LMULMAX2-RV64-NEXT: lui a1, 1044721 ; LMULMAX2-RV64-NEXT: addiw a1, a1, -241 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 240 +; LMULMAX2-RV64-NEXT: slli a1, a1, 36 +; LMULMAX2-RV64-NEXT: srli a1, a1, 32 ; LMULMAX2-RV64-NEXT: vand.vx v25, v25, a1 ; LMULMAX2-RV64-NEXT: vsrl.vi v25, v25, 4 ; LMULMAX2-RV64-NEXT: vor.vv v25, v25, v26 @@ -238,10 +238,10 @@ ; LMULMAX2-RV64-NEXT: addiw a1, a1, 819 ; LMULMAX2-RV64-NEXT: vand.vx v26, v25, a1 ; LMULMAX2-RV64-NEXT: vsll.vi v26, v26, 2 -; LMULMAX2-RV64-NEXT: lui a1, 205 -; LMULMAX2-RV64-NEXT: addiw a1, a1, -819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, -820 +; LMULMAX2-RV64-NEXT: lui a1, 996147 +; LMULMAX2-RV64-NEXT: addiw a1, a1, 819 +; LMULMAX2-RV64-NEXT: slli a1, a1, 34 +; LMULMAX2-RV64-NEXT: srli a1, a1, 32 ; LMULMAX2-RV64-NEXT: vand.vx v25, v25, a1 ; LMULMAX2-RV64-NEXT: vsrl.vi v25, v25, 2 ; LMULMAX2-RV64-NEXT: vor.vv v25, v25, v26 @@ -249,10 +249,10 @@ ; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 ; LMULMAX2-RV64-NEXT: vand.vx v26, v25, a1 ; LMULMAX2-RV64-NEXT: vsll.vi v26, v26, 1 -; LMULMAX2-RV64-NEXT: lui a1, 171 -; LMULMAX2-RV64-NEXT: addiw a1, a1, -1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, -1366 +; LMULMAX2-RV64-NEXT: lui a1, 873813 +; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 +; LMULMAX2-RV64-NEXT: slli a1, a1, 33 +; LMULMAX2-RV64-NEXT: srli a1, a1, 32 ; LMULMAX2-RV64-NEXT: vand.vx v25, v25, a1 ; LMULMAX2-RV64-NEXT: vsrl.vi v25, v25, 1 ; LMULMAX2-RV64-NEXT: vor.vv v25, v25, v26 @@ -325,10 +325,10 @@ ; LMULMAX1-RV64-NEXT: addiw a1, a1, -241 ; LMULMAX1-RV64-NEXT: vand.vx v26, v25, a1 ; LMULMAX1-RV64-NEXT: vsll.vi v26, v26, 4 -; LMULMAX1-RV64-NEXT: lui a1, 241 +; LMULMAX1-RV64-NEXT: lui a1, 1044721 ; LMULMAX1-RV64-NEXT: addiw a1, a1, -241 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 240 +; LMULMAX1-RV64-NEXT: slli a1, a1, 36 +; LMULMAX1-RV64-NEXT: srli a1, a1, 32 ; LMULMAX1-RV64-NEXT: vand.vx v25, v25, a1 ; LMULMAX1-RV64-NEXT: vsrl.vi v25, v25, 4 ; LMULMAX1-RV64-NEXT: vor.vv v25, v25, v26 @@ -336,10 +336,10 @@ ; LMULMAX1-RV64-NEXT: addiw a1, a1, 819 ; LMULMAX1-RV64-NEXT: vand.vx v26, v25, a1 ; LMULMAX1-RV64-NEXT: vsll.vi v26, v26, 2 -; LMULMAX1-RV64-NEXT: lui a1, 205 -; LMULMAX1-RV64-NEXT: addiw a1, a1, -819 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, -820 +; LMULMAX1-RV64-NEXT: lui a1, 996147 +; LMULMAX1-RV64-NEXT: addiw a1, a1, 819 +; LMULMAX1-RV64-NEXT: slli a1, a1, 34 +; LMULMAX1-RV64-NEXT: srli a1, a1, 32 ; LMULMAX1-RV64-NEXT: vand.vx v25, v25, a1 ; LMULMAX1-RV64-NEXT: vsrl.vi v25, v25, 2 ; LMULMAX1-RV64-NEXT: vor.vv v25, v25, v26 @@ -347,10 +347,10 @@ ; LMULMAX1-RV64-NEXT: addiw a1, a1, 1365 ; LMULMAX1-RV64-NEXT: vand.vx v26, v25, a1 ; LMULMAX1-RV64-NEXT: vsll.vi v26, v26, 1 -; LMULMAX1-RV64-NEXT: lui a1, 171 -; LMULMAX1-RV64-NEXT: addiw a1, a1, -1365 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, -1366 +; LMULMAX1-RV64-NEXT: lui a1, 873813 +; LMULMAX1-RV64-NEXT: addiw a1, a1, 1365 +; LMULMAX1-RV64-NEXT: slli a1, a1, 33 +; LMULMAX1-RV64-NEXT: srli a1, a1, 32 ; LMULMAX1-RV64-NEXT: vand.vx v25, v25, a1 ; LMULMAX1-RV64-NEXT: vsrl.vi v25, v25, 1 ; LMULMAX1-RV64-NEXT: vor.vv v25, v25, v26 @@ -1037,10 +1037,10 @@ ; LMULMAX2-RV64-NEXT: addiw a1, a1, -241 ; LMULMAX2-RV64-NEXT: vand.vx v28, v26, a1 ; LMULMAX2-RV64-NEXT: vsll.vi v28, v28, 4 -; LMULMAX2-RV64-NEXT: lui a1, 241 +; LMULMAX2-RV64-NEXT: lui a1, 1044721 ; LMULMAX2-RV64-NEXT: addiw a1, a1, -241 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 240 +; LMULMAX2-RV64-NEXT: slli a1, a1, 36 +; LMULMAX2-RV64-NEXT: srli a1, a1, 32 ; LMULMAX2-RV64-NEXT: vand.vx v26, v26, a1 ; LMULMAX2-RV64-NEXT: vsrl.vi v26, v26, 4 ; LMULMAX2-RV64-NEXT: vor.vv v26, v26, v28 @@ -1048,10 +1048,10 @@ ; LMULMAX2-RV64-NEXT: addiw a1, a1, 819 ; LMULMAX2-RV64-NEXT: vand.vx v28, v26, a1 ; LMULMAX2-RV64-NEXT: vsll.vi v28, v28, 2 -; LMULMAX2-RV64-NEXT: lui a1, 205 -; LMULMAX2-RV64-NEXT: addiw a1, a1, -819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, -820 +; LMULMAX2-RV64-NEXT: lui a1, 996147 +; LMULMAX2-RV64-NEXT: addiw a1, a1, 819 +; LMULMAX2-RV64-NEXT: slli a1, a1, 34 +; LMULMAX2-RV64-NEXT: srli a1, a1, 32 ; LMULMAX2-RV64-NEXT: vand.vx v26, v26, a1 ; LMULMAX2-RV64-NEXT: vsrl.vi v26, v26, 2 ; LMULMAX2-RV64-NEXT: vor.vv v26, v26, v28 @@ -1059,10 +1059,10 @@ ; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 ; LMULMAX2-RV64-NEXT: vand.vx v28, v26, a1 ; LMULMAX2-RV64-NEXT: vsll.vi v28, v28, 1 -; LMULMAX2-RV64-NEXT: lui a1, 171 -; LMULMAX2-RV64-NEXT: addiw a1, a1, -1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, -1366 +; LMULMAX2-RV64-NEXT: lui a1, 873813 +; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 +; LMULMAX2-RV64-NEXT: slli a1, a1, 33 +; LMULMAX2-RV64-NEXT: srli a1, a1, 32 ; LMULMAX2-RV64-NEXT: vand.vx v26, v26, a1 ; LMULMAX2-RV64-NEXT: vsrl.vi v26, v26, 1 ; LMULMAX2-RV64-NEXT: vor.vv v26, v26, v28 @@ -1164,10 +1164,10 @@ ; LMULMAX1-RV64-NEXT: addiw t1, a4, -241 ; LMULMAX1-RV64-NEXT: vand.vx v27, v25, t1 ; LMULMAX1-RV64-NEXT: vsll.vi v27, v27, 4 -; LMULMAX1-RV64-NEXT: lui a5, 241 +; LMULMAX1-RV64-NEXT: lui a5, 1044721 ; LMULMAX1-RV64-NEXT: addiw a5, a5, -241 -; LMULMAX1-RV64-NEXT: slli a5, a5, 12 -; LMULMAX1-RV64-NEXT: addi a5, a5, 240 +; LMULMAX1-RV64-NEXT: slli a5, a5, 36 +; LMULMAX1-RV64-NEXT: srli a5, a5, 32 ; LMULMAX1-RV64-NEXT: vand.vx v25, v25, a5 ; LMULMAX1-RV64-NEXT: vsrl.vi v25, v25, 4 ; LMULMAX1-RV64-NEXT: vor.vv v25, v25, v27 @@ -1175,10 +1175,10 @@ ; LMULMAX1-RV64-NEXT: addiw a3, a3, 819 ; LMULMAX1-RV64-NEXT: vand.vx v27, v25, a3 ; LMULMAX1-RV64-NEXT: vsll.vi v27, v27, 2 -; LMULMAX1-RV64-NEXT: lui a1, 205 -; LMULMAX1-RV64-NEXT: addiw a1, a1, -819 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, -820 +; LMULMAX1-RV64-NEXT: lui a1, 996147 +; LMULMAX1-RV64-NEXT: addiw a1, a1, 819 +; LMULMAX1-RV64-NEXT: slli a1, a1, 34 +; LMULMAX1-RV64-NEXT: srli a1, a1, 32 ; LMULMAX1-RV64-NEXT: vand.vx v25, v25, a1 ; LMULMAX1-RV64-NEXT: vsrl.vi v25, v25, 2 ; LMULMAX1-RV64-NEXT: vor.vv v25, v25, v27 @@ -1186,10 +1186,10 @@ ; LMULMAX1-RV64-NEXT: addiw a2, a2, 1365 ; LMULMAX1-RV64-NEXT: vand.vx v27, v25, a2 ; LMULMAX1-RV64-NEXT: vsll.vi v27, v27, 1 -; LMULMAX1-RV64-NEXT: lui a4, 171 -; LMULMAX1-RV64-NEXT: addiw a4, a4, -1365 -; LMULMAX1-RV64-NEXT: slli a4, a4, 12 -; LMULMAX1-RV64-NEXT: addi a4, a4, -1366 +; LMULMAX1-RV64-NEXT: lui a4, 873813 +; LMULMAX1-RV64-NEXT: addiw a4, a4, 1365 +; LMULMAX1-RV64-NEXT: slli a4, a4, 33 +; LMULMAX1-RV64-NEXT: srli a4, a4, 32 ; LMULMAX1-RV64-NEXT: vand.vx v25, v25, a4 ; LMULMAX1-RV64-NEXT: vsrl.vi v25, v25, 1 ; LMULMAX1-RV64-NEXT: vor.vv v25, v25, v27 diff --git a/llvm/test/CodeGen/RISCV/vararg.ll b/llvm/test/CodeGen/RISCV/vararg.ll --- a/llvm/test/CodeGen/RISCV/vararg.ll +++ b/llvm/test/CodeGen/RISCV/vararg.ll @@ -1542,10 +1542,10 @@ ; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, a0, 655 ; LP64-LP64F-LP64D-FPELIM-NEXT: slli a0, a0, 12 ; LP64-LP64F-LP64D-FPELIM-NEXT: addi t0, a0, 1475 -; LP64-LP64F-LP64D-FPELIM-NEXT: lui a0, 1192 -; LP64-LP64F-LP64D-FPELIM-NEXT: addiw a0, a0, 381 -; LP64-LP64F-LP64D-FPELIM-NEXT: slli a0, a0, 12 -; LP64-LP64F-LP64D-FPELIM-NEXT: addi a6, a0, -2048 +; LP64-LP64F-LP64D-FPELIM-NEXT: lui a0, 1046864 +; LP64-LP64F-LP64D-FPELIM-NEXT: addiw a0, a0, 761 +; LP64-LP64F-LP64D-FPELIM-NEXT: slli a0, a0, 40 +; LP64-LP64F-LP64D-FPELIM-NEXT: srli a6, a0, 29 ; LP64-LP64F-LP64D-FPELIM-NEXT: lui a0, 1048248 ; LP64-LP64F-LP64D-FPELIM-NEXT: addiw a0, a0, 1311 ; LP64-LP64F-LP64D-FPELIM-NEXT: slli a0, a0, 12 @@ -1593,10 +1593,10 @@ ; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, a0, 655 ; LP64-LP64F-LP64D-WITHFP-NEXT: slli a0, a0, 12 ; LP64-LP64F-LP64D-WITHFP-NEXT: addi t0, a0, 1475 -; LP64-LP64F-LP64D-WITHFP-NEXT: lui a0, 1192 -; LP64-LP64F-LP64D-WITHFP-NEXT: addiw a0, a0, 381 -; LP64-LP64F-LP64D-WITHFP-NEXT: slli a0, a0, 12 -; LP64-LP64F-LP64D-WITHFP-NEXT: addi a6, a0, -2048 +; LP64-LP64F-LP64D-WITHFP-NEXT: lui a0, 1046864 +; LP64-LP64F-LP64D-WITHFP-NEXT: addiw a0, a0, 761 +; LP64-LP64F-LP64D-WITHFP-NEXT: slli a0, a0, 40 +; LP64-LP64F-LP64D-WITHFP-NEXT: srli a6, a0, 29 ; LP64-LP64F-LP64D-WITHFP-NEXT: lui a0, 1048248 ; LP64-LP64F-LP64D-WITHFP-NEXT: addiw a0, a0, 1311 ; LP64-LP64F-LP64D-WITHFP-NEXT: slli a0, a0, 12