diff --git a/llvm/include/llvm/CodeGen/MachineTraceMetrics.h b/llvm/include/llvm/CodeGen/MachineTraceMetrics.h --- a/llvm/include/llvm/CodeGen/MachineTraceMetrics.h +++ b/llvm/include/llvm/CodeGen/MachineTraceMetrics.h @@ -253,8 +253,8 @@ /// before the trace are not included. unsigned Depth; - /// Minimum number of cycles from this instruction is issued to the of the - /// trace, as determined by data dependencies and instruction latencies. + /// Minimum number of cycles from this instruction is issued to the end of + /// the trace, as determined by data dependencies and instruction latencies. unsigned Height; }; diff --git a/llvm/lib/CodeGen/MachineCombiner.cpp b/llvm/lib/CodeGen/MachineCombiner.cpp --- a/llvm/lib/CodeGen/MachineCombiner.cpp +++ b/llvm/lib/CodeGen/MachineCombiner.cpp @@ -93,10 +93,11 @@ bool combineInstructions(MachineBasicBlock *); MachineInstr *getOperandDef(const MachineOperand &MO); bool isTransientMI(const MachineInstr *MI); - unsigned getDepth(SmallVectorImpl &InsInstrs, - DenseMap &InstrIdxForVirtReg, - MachineTraceMetrics::Trace BlockTrace, - const MachineBasicBlock &MBB); + std::pair + getInstrCycles(SmallVectorImpl &InsInstrs, + DenseMap &InstrIdxForVirtReg, + MachineTraceMetrics::Trace BlockTrace, + const MachineBasicBlock &MBB); unsigned getLatency(MachineInstr *Root, MachineInstr *NewRoot, MachineTraceMetrics::Trace BlockTrace); bool @@ -149,16 +150,11 @@ MachineFunctionPass::getAnalysisUsage(AU); } -MachineInstr * -MachineCombiner::getOperandDef(const MachineOperand &MO) { - MachineInstr *DefInstr = nullptr; +MachineInstr *MachineCombiner::getOperandDef(const MachineOperand &MO) { // We need a virtual register definition. - if (MO.isReg() && MO.getReg().isVirtual()) - DefInstr = MRI->getUniqueVRegDef(MO.getReg()); - // PHI's have no depth etc. - if (DefInstr && DefInstr->isPHI()) - DefInstr = nullptr; - return DefInstr; + if (!(MO.isReg() && MO.getReg().isVirtual())) + return nullptr; + return MRI->getUniqueVRegDef(MO.getReg()); } /// Return true if MI is unlikely to generate an actual target instruction. @@ -198,7 +194,7 @@ return DstRC->contains(Src); } -/// Computes depth of instructions in vector \InsInstr. +/// Computes depth and height of the last instruction in \InsInstr. /// /// \param InsInstrs is a vector of machine instructions /// \param InstrIdxForVirtReg is a dense map of virtual register to index @@ -206,23 +202,20 @@ /// \param BlockTrace is a trace of machine instructions /// /// \returns Depth of last instruction in \InsInstrs ("NewRoot") -unsigned -MachineCombiner::getDepth(SmallVectorImpl &InsInstrs, - DenseMap &InstrIdxForVirtReg, - MachineTraceMetrics::Trace BlockTrace, - const MachineBasicBlock &MBB) { +std::pair MachineCombiner::getInstrCycles( + SmallVectorImpl &InsInstrs, + DenseMap &InstrIdxForVirtReg, + MachineTraceMetrics::Trace BlockTrace, const MachineBasicBlock &MBB) { SmallVector InstrDepth; // For each instruction in the new sequence compute the depth based on the // operands. Use the trace information when possible. For new operands which // are tracked in the InstrIdxForVirtReg map depth is looked up in InstrDepth for (auto *InstrPtr : InsInstrs) { // for each Use unsigned IDepth = 0; - for (const MachineOperand &MO : InstrPtr->operands()) { + for (const MachineOperand &MO : InstrPtr->uses()) { // Check for virtual register operand. if (!(MO.isReg() && MO.getReg().isVirtual())) continue; - if (!MO.isUse()) - continue; unsigned DepthOp = 0; unsigned LatencyOp = 0; DenseMap::iterator II = @@ -244,18 +237,43 @@ MachineTraceStrategy::TS_Local || DefInstr->getParent() == &MBB)) { DepthOp = BlockTrace.getInstrCycles(*DefInstr).Depth; - if (!isTransientMI(DefInstr)) - LatencyOp = TSchedModel.computeOperandLatency( - DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()), - InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg())); + if (!isTransientMI(DefInstr)) { + int DefIdx = DefInstr->findRegisterDefOperandIdx(MO.getReg()); + int UseIdx = InstrPtr->findRegisterUseOperandIdx(MO.getReg()); + LatencyOp = TSchedModel.computeOperandLatency(DefInstr, DefIdx, + InstrPtr, UseIdx); + } } } IDepth = std::max(IDepth, DepthOp + LatencyOp); } InstrDepth.push_back(IDepth); } + // Compute the height of the last instruction based on the operands, using + // trace information. unsigned NewRootIdx = InsInstrs.size() - 1; - return InstrDepth[NewRootIdx]; + auto *DefInstr = InsInstrs[NewRootIdx]; + unsigned IHeight = 0; + for (const MachineOperand &MO : DefInstr->defs()) { + // Check for virtual register operand. + if (!(MO.isReg() && MO.getReg().isVirtual())) + continue; + unsigned HeightOp = 0; + unsigned LatencyOp = 0; + + // Get the first instruction that uses MO + MachineRegisterInfo::use_iterator UI = MRI->use_begin(MO.getReg()); + if (UI == MRI->use_end()) + continue; + MachineInstr *UseInstr = UI->getParent(); + HeightOp = BlockTrace.getInstrCycles(*UseInstr).Height; + int DefIdx = DefInstr->findRegisterDefOperandIdx(MO.getReg()); + int UseIdx = UseInstr->findRegisterUseOperandIdx(MO.getReg()); + LatencyOp = + TSchedModel.computeOperandLatency(DefInstr, DefIdx, UseInstr, UseIdx); + IHeight = std::max(IHeight, HeightOp + LatencyOp); + } + return {InstrDepth[NewRootIdx], IHeight}; } /// Computes instruction latency as max of latency of defined operands. @@ -376,60 +394,41 @@ SmallVectorImpl &InsInstrs, SmallVectorImpl &DelInstrs, DenseMap &InstrIdxForVirtReg, - MachineCombinerPattern Pattern, - bool SlackIsAccurate) { - // Get depth and latency of NewRoot and Root. - unsigned NewRootDepth = - getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace, *MBB); - unsigned RootDepth = BlockTrace.getInstrCycles(*Root).Depth; + MachineCombinerPattern Pattern, bool SlackIsAccurate) { + LLVM_DEBUG(dbgs() << " Dependence data for " << *Root); - LLVM_DEBUG(dbgs() << " Dependence data for " << *Root << "\tNewRootDepth: " - << NewRootDepth << "\tRootDepth: " << RootDepth); - - // For a transform such as reassociation, the cost equation is - // conservatively calculated so that we must improve the depth (data - // dependency cycles) in the critical path to proceed with the transform. - // Being conservative also protects against inaccuracies in the underlying - // machine trace metrics and CPU models. - if (getCombinerObjective(Pattern) == CombinerObjective::MustReduceDepth) { - LLVM_DEBUG(dbgs() << "\tIt MustReduceDepth "); - LLVM_DEBUG(NewRootDepth < RootDepth - ? dbgs() << "\t and it does it\n" - : dbgs() << "\t but it does NOT do it\n"); - return NewRootDepth < RootDepth; - } - - // A more flexible cost calculation for the critical path includes the slack - // of the original code sequence. This may allow the transform to proceed - // even if the instruction depths (data dependency cycles) become worse. - - // Account for the latency of the inserted and deleted instructions by - unsigned NewRootLatency, RootLatency; - if (TII->accumulateInstrSeqToRootLatency(*Root)) { - std::tie(NewRootLatency, RootLatency) = - getLatenciesForInstrSequences(*Root, InsInstrs, DelInstrs, BlockTrace); - } else { - NewRootLatency = TSchedModel.computeInstrLatency(InsInstrs.back()); - RootLatency = TSchedModel.computeInstrLatency(Root); - } + // Compute depths and heights of Root and NewRoot from the BlockTrace. + unsigned RootDepth = BlockTrace.getInstrCycles(*Root).Depth; + unsigned RootHeight = BlockTrace.getInstrCycles(*Root).Height; + unsigned NewRootDepth, NewRootHeight; + std::tie(NewRootDepth, NewRootHeight) = + getInstrCycles(InsInstrs, InstrIdxForVirtReg, BlockTrace, *MBB); + + // For a transform such as reassociation, the cost equation is conservatively + // calculated so that we must improve the the critical path length to proceed + // with the transform. Being conservative also protects against inaccuracies + // in the underlying machine trace metrics and CPU models. + bool MustReduceDepth = + (getCombinerObjective(Pattern) == CombinerObjective::MustReduceDepth); unsigned RootSlack = BlockTrace.getInstrSlack(*Root); - unsigned NewCycleCount = NewRootDepth + NewRootLatency; - unsigned OldCycleCount = - RootDepth + RootLatency + (SlackIsAccurate ? RootSlack : 0); - LLVM_DEBUG(dbgs() << "\n\tNewRootLatency: " << NewRootLatency - << "\tRootLatency: " << RootLatency << "\n\tRootSlack: " - << RootSlack << " SlackIsAccurate=" << SlackIsAccurate - << "\n\tNewRootDepth + NewRootLatency = " << NewCycleCount - << "\n\tRootDepth + RootLatency + RootSlack = " - << OldCycleCount;); - LLVM_DEBUG(NewCycleCount <= OldCycleCount - ? dbgs() << "\n\t It IMPROVES PathLen because" - : dbgs() << "\n\t It DOES NOT improve PathLen because"); - LLVM_DEBUG(dbgs() << "\n\t\tNewCycleCount = " << NewCycleCount - << ", OldCycleCount = " << OldCycleCount << "\n"); - - return NewCycleCount <= OldCycleCount; + unsigned CriticalPath = RootDepth + RootHeight + + (SlackIsAccurate && !MustReduceDepth ? RootSlack : 0); + unsigned NewCriticalPath = NewRootDepth + NewRootHeight; + bool ReducesCriticalPathLen = MustReduceDepth + ? NewCriticalPath < CriticalPath + : NewCriticalPath <= CriticalPath; + LLVM_DEBUG(dbgs() << "\tNewRootDepth: " << NewRootDepth << ", OldRootDepth: " + << RootDepth << "\n\tNewRootHeight: " << NewRootHeight + << ", OldRootHeight: " << RootHeight + << "\tRootSlack: " << RootSlack); + LLVM_DEBUG( + ReducesCriticalPathLen + ? dbgs() << "\n\t It IMPROVES Critical PathLen because" + : dbgs() << "\n\t It DOES NOT improve Critical PathLen because"); + LLVM_DEBUG(dbgs() << "\n\t\tNewCriticalPath = " << NewCriticalPath + << ", OldCriticalPath = " << CriticalPath << "\n"); + return ReducesCriticalPathLen; } /// helper routine to convert instructions into SC diff --git a/llvm/test/CodeGen/RISCV/addcarry.ll b/llvm/test/CodeGen/RISCV/addcarry.ll --- a/llvm/test/CodeGen/RISCV/addcarry.ll +++ b/llvm/test/CodeGen/RISCV/addcarry.ll @@ -21,8 +21,8 @@ ; RISCV32-NEXT: mulhu t0, a1, a2 ; RISCV32-NEXT: add a6, a6, t0 ; RISCV32-NEXT: add a5, a6, a5 -; RISCV32-NEXT: add a5, a5, a7 ; RISCV32-NEXT: mul a6, a1, a3 +; RISCV32-NEXT: add a6, a7, a6 ; RISCV32-NEXT: add a5, a5, a6 ; RISCV32-NEXT: bgez a1, .LBB0_2 ; RISCV32-NEXT: # %bb.1: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll @@ -246,7 +246,7 @@ ; LMULMAX8RV64-NEXT: vslidedown.vi v8, v9, 2 ; LMULMAX8RV64-NEXT: vmv.x.s a3, v8 ; LMULMAX8RV64-NEXT: slli a3, a3, 30 -; LMULMAX8RV64-NEXT: or a1, a1, a3 +; LMULMAX8RV64-NEXT: or a2, a2, a3 ; LMULMAX8RV64-NEXT: or a1, a1, a2 ; LMULMAX8RV64-NEXT: sw a1, 0(a0) ; LMULMAX8RV64-NEXT: slli a1, a1, 19 @@ -292,7 +292,7 @@ ; LMULMAX1RV64-NEXT: vslidedown.vi v8, v9, 2 ; LMULMAX1RV64-NEXT: vmv.x.s a3, v8 ; LMULMAX1RV64-NEXT: slli a3, a3, 30 -; LMULMAX1RV64-NEXT: or a1, a1, a3 +; LMULMAX1RV64-NEXT: or a2, a2, a3 ; LMULMAX1RV64-NEXT: or a1, a1, a2 ; LMULMAX1RV64-NEXT: sw a1, 0(a0) ; LMULMAX1RV64-NEXT: slli a1, a1, 19 @@ -343,7 +343,7 @@ ; LMULMAX8RV64-NEXT: vslidedown.vi v8, v9, 2 ; LMULMAX8RV64-NEXT: vmv.x.s a3, v8 ; LMULMAX8RV64-NEXT: slli a3, a3, 30 -; LMULMAX8RV64-NEXT: or a1, a1, a3 +; LMULMAX8RV64-NEXT: or a2, a2, a3 ; LMULMAX8RV64-NEXT: or a1, a1, a2 ; LMULMAX8RV64-NEXT: sw a1, 0(a0) ; LMULMAX8RV64-NEXT: slli a1, a1, 19 @@ -389,7 +389,7 @@ ; LMULMAX1RV64-NEXT: vslidedown.vi v8, v9, 2 ; LMULMAX1RV64-NEXT: vmv.x.s a3, v8 ; LMULMAX1RV64-NEXT: slli a3, a3, 30 -; LMULMAX1RV64-NEXT: or a1, a1, a3 +; LMULMAX1RV64-NEXT: or a2, a2, a3 ; LMULMAX1RV64-NEXT: or a1, a1, a2 ; LMULMAX1RV64-NEXT: sw a1, 0(a0) ; LMULMAX1RV64-NEXT: slli a1, a1, 19 diff --git a/llvm/test/CodeGen/RISCV/sadd_sat.ll b/llvm/test/CodeGen/RISCV/sadd_sat.ll --- a/llvm/test/CodeGen/RISCV/sadd_sat.ll +++ b/llvm/test/CodeGen/RISCV/sadd_sat.ll @@ -59,10 +59,10 @@ ; RV32I: # %bb.0: ; RV32I-NEXT: mv a4, a1 ; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: add a5, a4, a3 ; RV32I-NEXT: add a0, a0, a2 ; RV32I-NEXT: sltu a1, a0, a1 -; RV32I-NEXT: add a1, a5, a1 +; RV32I-NEXT: add a1, a3, a1 +; RV32I-NEXT: add a1, a4, a1 ; RV32I-NEXT: xor a2, a4, a1 ; RV32I-NEXT: xor a3, a4, a3 ; RV32I-NEXT: not a3, a3 @@ -94,10 +94,10 @@ ; RV32IZbb: # %bb.0: ; RV32IZbb-NEXT: mv a4, a1 ; RV32IZbb-NEXT: mv a1, a0 -; RV32IZbb-NEXT: add a5, a4, a3 ; RV32IZbb-NEXT: add a0, a0, a2 ; RV32IZbb-NEXT: sltu a1, a0, a1 -; RV32IZbb-NEXT: add a1, a5, a1 +; RV32IZbb-NEXT: add a1, a3, a1 +; RV32IZbb-NEXT: add a1, a4, a1 ; RV32IZbb-NEXT: xor a2, a4, a1 ; RV32IZbb-NEXT: xor a3, a4, a3 ; RV32IZbb-NEXT: andn a2, a2, a3 diff --git a/llvm/test/CodeGen/RISCV/sadd_sat_plus.ll b/llvm/test/CodeGen/RISCV/sadd_sat_plus.ll --- a/llvm/test/CodeGen/RISCV/sadd_sat_plus.ll +++ b/llvm/test/CodeGen/RISCV/sadd_sat_plus.ll @@ -65,10 +65,10 @@ ; RV32I: # %bb.0: ; RV32I-NEXT: mv a2, a1 ; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: add a3, a2, a5 ; RV32I-NEXT: add a0, a0, a4 ; RV32I-NEXT: sltu a1, a0, a1 -; RV32I-NEXT: add a1, a3, a1 +; RV32I-NEXT: add a1, a5, a1 +; RV32I-NEXT: add a1, a2, a1 ; RV32I-NEXT: xor a3, a2, a1 ; RV32I-NEXT: xor a2, a2, a5 ; RV32I-NEXT: not a2, a2 @@ -100,10 +100,10 @@ ; RV32IZbb: # %bb.0: ; RV32IZbb-NEXT: mv a2, a1 ; RV32IZbb-NEXT: mv a1, a0 -; RV32IZbb-NEXT: add a3, a2, a5 ; RV32IZbb-NEXT: add a0, a0, a4 ; RV32IZbb-NEXT: sltu a1, a0, a1 -; RV32IZbb-NEXT: add a1, a3, a1 +; RV32IZbb-NEXT: add a1, a5, a1 +; RV32IZbb-NEXT: add a1, a2, a1 ; RV32IZbb-NEXT: xor a3, a2, a1 ; RV32IZbb-NEXT: xor a2, a2, a5 ; RV32IZbb-NEXT: andn a2, a3, a2 diff --git a/llvm/test/CodeGen/RISCV/ssub_sat.ll b/llvm/test/CodeGen/RISCV/ssub_sat.ll --- a/llvm/test/CodeGen/RISCV/ssub_sat.ll +++ b/llvm/test/CodeGen/RISCV/ssub_sat.ll @@ -59,8 +59,8 @@ ; RV32: # %bb.0: ; RV32-NEXT: mv a4, a1 ; RV32-NEXT: sltu a1, a0, a2 -; RV32-NEXT: sub a5, a4, a3 -; RV32-NEXT: sub a1, a5, a1 +; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: sub a1, a4, a1 ; RV32-NEXT: xor a5, a4, a1 ; RV32-NEXT: xor a3, a4, a3 ; RV32-NEXT: and a3, a3, a5 diff --git a/llvm/test/CodeGen/RISCV/ssub_sat_plus.ll b/llvm/test/CodeGen/RISCV/ssub_sat_plus.ll --- a/llvm/test/CodeGen/RISCV/ssub_sat_plus.ll +++ b/llvm/test/CodeGen/RISCV/ssub_sat_plus.ll @@ -65,8 +65,8 @@ ; RV32: # %bb.0: ; RV32-NEXT: mv a2, a1 ; RV32-NEXT: sltu a1, a0, a4 -; RV32-NEXT: sub a3, a2, a5 -; RV32-NEXT: sub a1, a3, a1 +; RV32-NEXT: add a1, a5, a1 +; RV32-NEXT: sub a1, a2, a1 ; RV32-NEXT: xor a3, a2, a1 ; RV32-NEXT: xor a2, a2, a5 ; RV32-NEXT: and a2, a2, a3 diff --git a/llvm/test/CodeGen/RISCV/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/RISCV/umulo-128-legalisation-lowering.ll --- a/llvm/test/CodeGen/RISCV/umulo-128-legalisation-lowering.ll +++ b/llvm/test/CodeGen/RISCV/umulo-128-legalisation-lowering.ll @@ -37,11 +37,11 @@ ; RISCV32-NEXT: add s4, s3, t1 ; RISCV32-NEXT: add t1, s0, s4 ; RISCV32-NEXT: sltu t2, t1, s0 -; RISCV32-NEXT: sltu s0, s0, t6 +; RISCV32-NEXT: sltu t6, s0, t6 ; RISCV32-NEXT: sltu t4, t5, t4 -; RISCV32-NEXT: mulhu t5, t3, a2 -; RISCV32-NEXT: add t4, t5, t4 -; RISCV32-NEXT: add s0, t4, s0 +; RISCV32-NEXT: mulhu s0, t3, a2 +; RISCV32-NEXT: add t4, t4, t6 +; RISCV32-NEXT: add s0, s0, t4 ; RISCV32-NEXT: mul t4, t3, t0 ; RISCV32-NEXT: mul t5, a7, a5 ; RISCV32-NEXT: add t4, t5, t4 @@ -55,8 +55,8 @@ ; RISCV32-NEXT: add t4, t6, s2 ; RISCV32-NEXT: sltu s3, s4, s3 ; RISCV32-NEXT: add t4, t4, s3 -; RISCV32-NEXT: add t4, s0, t4 ; RISCV32-NEXT: add t4, t4, t2 +; RISCV32-NEXT: add t4, s0, t4 ; RISCV32-NEXT: beq t4, s0, .LBB0_2 ; RISCV32-NEXT: # %bb.1: # %start ; RISCV32-NEXT: sltu t2, t4, s0 @@ -89,8 +89,8 @@ ; RISCV32-NEXT: snez a3, a3 ; RISCV32-NEXT: and a3, a3, a7 ; RISCV32-NEXT: or a2, a3, a2 -; RISCV32-NEXT: or a3, t3, t2 -; RISCV32-NEXT: or a2, a2, a3 +; RISCV32-NEXT: or a2, a2, t3 +; RISCV32-NEXT: or a2, a2, t2 ; RISCV32-NEXT: mul a3, a5, a4 ; RISCV32-NEXT: andi a2, a2, 1 ; RISCV32-NEXT: sw a3, 0(a0) diff --git a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll --- a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll @@ -634,7 +634,7 @@ ; RV64MV-NEXT: vslidedown.vi v8, v8, 2 ; RV64MV-NEXT: vmv.x.s a3, v8 ; RV64MV-NEXT: slli a3, a3, 22 -; RV64MV-NEXT: or a1, a1, a3 +; RV64MV-NEXT: or a2, a2, a3 ; RV64MV-NEXT: or a1, a1, a2 ; RV64MV-NEXT: sw a1, 0(a0) ; RV64MV-NEXT: slli a1, a1, 31 diff --git a/llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll b/llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll --- a/llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll +++ b/llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll @@ -271,8 +271,8 @@ ; RV32I-NEXT: or a3, a3, a4 ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a6, a6, 24 -; RV32I-NEXT: or a4, a6, a5 -; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: or a3, a5, a3 +; RV32I-NEXT: or a3, a6, a3 ; RV32I-NEXT: lbu a4, 1(a1) ; RV32I-NEXT: lbu a5, 0(a1) ; RV32I-NEXT: lbu a6, 2(a1) @@ -407,8 +407,8 @@ ; RV32I-NEXT: or a3, a3, a4 ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a6, a6, 24 -; RV32I-NEXT: or a4, a6, a5 -; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: or a3, a5, a3 +; RV32I-NEXT: or a3, a6, a3 ; RV32I-NEXT: lbu a4, 1(a1) ; RV32I-NEXT: lbu a5, 0(a1) ; RV32I-NEXT: lbu a6, 2(a1) @@ -538,46 +538,46 @@ ; RV32I-NEXT: lbu a3, 5(a0) ; RV32I-NEXT: lbu a4, 4(a0) ; RV32I-NEXT: lbu a5, 6(a0) -; RV32I-NEXT: lbu a6, 7(a0) ; RV32I-NEXT: slli a3, a3, 8 ; RV32I-NEXT: or a3, a3, a4 ; RV32I-NEXT: slli a5, a5, 16 -; RV32I-NEXT: slli a4, a6, 24 -; RV32I-NEXT: or a5, a4, a5 -; RV32I-NEXT: or a3, a5, a3 -; RV32I-NEXT: lbu a5, 1(a1) +; RV32I-NEXT: lbu a4, 1(a1) ; RV32I-NEXT: lbu a6, 0(a1) -; RV32I-NEXT: lbu a7, 2(a1) +; RV32I-NEXT: or a3, a5, a3 +; RV32I-NEXT: lbu a5, 7(a0) +; RV32I-NEXT: slli a4, a4, 8 +; RV32I-NEXT: or a4, a4, a6 +; RV32I-NEXT: lbu a6, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a5, a5, 8 -; RV32I-NEXT: or a5, a5, a6 -; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli a5, a5, 24 +; RV32I-NEXT: or a3, a5, a3 +; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, a7 -; RV32I-NEXT: or a1, a1, a5 -; RV32I-NEXT: slli a5, a1, 3 -; RV32I-NEXT: addi a6, a5, -32 -; RV32I-NEXT: sra a1, a3, a5 +; RV32I-NEXT: or a1, a1, a6 +; RV32I-NEXT: or a1, a1, a4 +; RV32I-NEXT: slli a4, a1, 3 +; RV32I-NEXT: addi a6, a4, -32 +; RV32I-NEXT: sra a1, a3, a4 ; RV32I-NEXT: bltz a6, .LBB5_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: srai a4, a4, 31 +; RV32I-NEXT: srai a5, a5, 31 ; RV32I-NEXT: mv a0, a1 -; RV32I-NEXT: mv a1, a4 +; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: j .LBB5_3 ; RV32I-NEXT: .LBB5_2: -; RV32I-NEXT: lbu a4, 1(a0) +; RV32I-NEXT: lbu a5, 1(a0) ; RV32I-NEXT: lbu a6, 0(a0) ; RV32I-NEXT: lbu a7, 2(a0) ; RV32I-NEXT: lbu a0, 3(a0) -; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: or a4, a4, a6 +; RV32I-NEXT: slli a5, a5, 8 +; RV32I-NEXT: or a5, a5, a6 ; RV32I-NEXT: slli a7, a7, 16 ; RV32I-NEXT: slli a0, a0, 24 ; RV32I-NEXT: or a0, a0, a7 -; RV32I-NEXT: or a0, a0, a4 -; RV32I-NEXT: srl a0, a0, a5 +; RV32I-NEXT: or a0, a0, a5 +; RV32I-NEXT: srl a0, a0, a4 ; RV32I-NEXT: slli a3, a3, 1 -; RV32I-NEXT: not a4, a5 +; RV32I-NEXT: not a4, a4 ; RV32I-NEXT: sll a3, a3, a4 ; RV32I-NEXT: or a0, a0, a3 ; RV32I-NEXT: .LBB5_3: @@ -616,19 +616,19 @@ ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a6, a6, 24 ; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 13(a0) -; RV64I-NEXT: lbu a5, 12(a0) -; RV64I-NEXT: lbu a6, 14(a0) -; RV64I-NEXT: lbu a7, 15(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 -; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a7, a6 -; RV64I-NEXT: or a4, a5, a4 -; RV64I-NEXT: slli a4, a4, 32 -; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a5, 13(a0) +; RV64I-NEXT: lbu a6, 12(a0) +; RV64I-NEXT: lbu a7, 14(a0) +; RV64I-NEXT: lbu t0, 15(a0) +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a5, a5, a6 +; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli t0, t0, 24 +; RV64I-NEXT: or a6, t0, a7 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a5, a5, 32 +; RV64I-NEXT: or a3, a5, a3 +; RV64I-NEXT: or a3, a3, a4 ; RV64I-NEXT: lbu a4, 5(a1) ; RV64I-NEXT: lbu a5, 4(a1) ; RV64I-NEXT: lbu a6, 6(a1) @@ -835,19 +835,19 @@ ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a6, a6, 24 ; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 5(a0) -; RV64I-NEXT: lbu a5, 4(a0) -; RV64I-NEXT: lbu a6, 6(a0) -; RV64I-NEXT: lbu a7, 7(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 -; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a7, a6 -; RV64I-NEXT: or a4, a5, a4 -; RV64I-NEXT: slli a4, a4, 32 -; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a5, 5(a0) +; RV64I-NEXT: lbu a6, 4(a0) +; RV64I-NEXT: lbu a7, 6(a0) +; RV64I-NEXT: lbu t0, 7(a0) +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a5, a5, a6 +; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli t0, t0, 24 +; RV64I-NEXT: or a6, t0, a7 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a5, a5, 32 +; RV64I-NEXT: or a3, a5, a3 +; RV64I-NEXT: or a3, a3, a4 ; RV64I-NEXT: lbu a4, 5(a1) ; RV64I-NEXT: lbu a5, 4(a1) ; RV64I-NEXT: lbu a6, 6(a1) @@ -1053,20 +1053,20 @@ ; RV64I-NEXT: or a3, a3, a4 ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a6, a6, 24 -; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: or a5, a6, a5 ; RV64I-NEXT: lbu a4, 13(a0) -; RV64I-NEXT: lbu a5, 12(a0) -; RV64I-NEXT: lbu a6, 14(a0) -; RV64I-NEXT: lbu a7, 15(a0) +; RV64I-NEXT: lbu a6, 12(a0) +; RV64I-NEXT: lbu a7, 14(a0) +; RV64I-NEXT: lbu t0, 15(a0) ; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 -; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a7, a6 -; RV64I-NEXT: or a4, a5, a4 -; RV64I-NEXT: slli a5, a4, 32 -; RV64I-NEXT: or a3, a5, a3 +; RV64I-NEXT: or a4, a4, a6 +; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli t0, t0, 24 +; RV64I-NEXT: or a6, t0, a7 +; RV64I-NEXT: or a4, a6, a4 +; RV64I-NEXT: slli a6, a4, 32 +; RV64I-NEXT: or a3, a6, a3 +; RV64I-NEXT: or a3, a3, a5 ; RV64I-NEXT: lbu a5, 5(a1) ; RV64I-NEXT: lbu a6, 4(a1) ; RV64I-NEXT: lbu a7, 6(a1) diff --git a/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll b/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll --- a/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll +++ b/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll @@ -261,8 +261,8 @@ ; RV32I-NEXT: or a3, a3, a4 ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a6, a6, 24 -; RV32I-NEXT: or a4, a6, a5 -; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: or a3, a5, a3 +; RV32I-NEXT: or a3, a6, a3 ; RV32I-NEXT: lbu a4, 1(a1) ; RV32I-NEXT: lbu a5, 0(a1) ; RV32I-NEXT: lbu a6, 2(a1) @@ -271,8 +271,8 @@ ; RV32I-NEXT: or a4, a4, a5 ; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a5, a1, a6 -; RV32I-NEXT: or a5, a5, a4 +; RV32I-NEXT: or a5, a6, a4 +; RV32I-NEXT: or a5, a1, a5 ; RV32I-NEXT: addi a4, a5, -32 ; RV32I-NEXT: srl a1, a3, a5 ; RV32I-NEXT: bltz a4, .LBB3_2 @@ -394,8 +394,8 @@ ; RV32I-NEXT: or a3, a3, a4 ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a6, a6, 24 -; RV32I-NEXT: or a4, a6, a5 -; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: or a3, a5, a3 +; RV32I-NEXT: or a3, a6, a3 ; RV32I-NEXT: lbu a4, 1(a1) ; RV32I-NEXT: lbu a5, 0(a1) ; RV32I-NEXT: lbu a6, 2(a1) @@ -404,8 +404,8 @@ ; RV32I-NEXT: or a4, a4, a5 ; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a5, a1, a6 -; RV32I-NEXT: or a5, a5, a4 +; RV32I-NEXT: or a5, a6, a4 +; RV32I-NEXT: or a5, a1, a5 ; RV32I-NEXT: addi a4, a5, -32 ; RV32I-NEXT: sll a1, a3, a5 ; RV32I-NEXT: bltz a4, .LBB4_2 @@ -522,44 +522,44 @@ ; RV32I-NEXT: lbu a3, 5(a0) ; RV32I-NEXT: lbu a4, 4(a0) ; RV32I-NEXT: lbu a5, 6(a0) -; RV32I-NEXT: lbu a6, 7(a0) ; RV32I-NEXT: slli a3, a3, 8 ; RV32I-NEXT: or a3, a3, a4 ; RV32I-NEXT: slli a5, a5, 16 -; RV32I-NEXT: slli a4, a6, 24 -; RV32I-NEXT: or a5, a4, a5 -; RV32I-NEXT: or a3, a5, a3 -; RV32I-NEXT: lbu a5, 1(a1) +; RV32I-NEXT: lbu a4, 1(a1) ; RV32I-NEXT: lbu a6, 0(a1) -; RV32I-NEXT: lbu a7, 2(a1) +; RV32I-NEXT: or a3, a5, a3 +; RV32I-NEXT: lbu a5, 7(a0) +; RV32I-NEXT: slli a4, a4, 8 +; RV32I-NEXT: or a4, a4, a6 +; RV32I-NEXT: lbu a6, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a5, a5, 8 -; RV32I-NEXT: or a5, a5, a6 -; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli a5, a5, 24 +; RV32I-NEXT: or a3, a5, a3 +; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, a7 -; RV32I-NEXT: or a5, a1, a5 -; RV32I-NEXT: addi a6, a5, -32 -; RV32I-NEXT: sra a1, a3, a5 +; RV32I-NEXT: or a4, a6, a4 +; RV32I-NEXT: or a4, a1, a4 +; RV32I-NEXT: addi a6, a4, -32 +; RV32I-NEXT: sra a1, a3, a4 ; RV32I-NEXT: bltz a6, .LBB5_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: srai a4, a4, 31 +; RV32I-NEXT: srai a5, a5, 31 ; RV32I-NEXT: mv a0, a1 -; RV32I-NEXT: mv a1, a4 +; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: j .LBB5_3 ; RV32I-NEXT: .LBB5_2: -; RV32I-NEXT: lbu a4, 1(a0) +; RV32I-NEXT: lbu a5, 1(a0) ; RV32I-NEXT: lbu a6, 0(a0) ; RV32I-NEXT: lbu a7, 2(a0) ; RV32I-NEXT: lbu a0, 3(a0) -; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: or a4, a4, a6 +; RV32I-NEXT: slli a5, a5, 8 +; RV32I-NEXT: or a5, a5, a6 ; RV32I-NEXT: slli a7, a7, 16 ; RV32I-NEXT: slli a0, a0, 24 ; RV32I-NEXT: or a0, a0, a7 -; RV32I-NEXT: or a0, a0, a4 -; RV32I-NEXT: srl a0, a0, a5 -; RV32I-NEXT: not a4, a5 +; RV32I-NEXT: or a0, a0, a5 +; RV32I-NEXT: srl a0, a0, a4 +; RV32I-NEXT: not a4, a4 ; RV32I-NEXT: slli a3, a3, 1 ; RV32I-NEXT: sll a3, a3, a4 ; RV32I-NEXT: or a0, a0, a3 @@ -598,19 +598,19 @@ ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a6, a6, 24 ; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 13(a0) -; RV64I-NEXT: lbu a5, 12(a0) -; RV64I-NEXT: lbu a6, 14(a0) -; RV64I-NEXT: lbu a7, 15(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 -; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a7, a6 -; RV64I-NEXT: or a4, a5, a4 -; RV64I-NEXT: slli a4, a4, 32 -; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a5, 13(a0) +; RV64I-NEXT: lbu a6, 12(a0) +; RV64I-NEXT: lbu a7, 14(a0) +; RV64I-NEXT: lbu t0, 15(a0) +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a5, a5, a6 +; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli t0, t0, 24 +; RV64I-NEXT: or a6, t0, a7 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a5, a5, 32 +; RV64I-NEXT: or a3, a5, a3 +; RV64I-NEXT: or a3, a3, a4 ; RV64I-NEXT: lbu a4, 1(a1) ; RV64I-NEXT: lbu a5, 0(a1) ; RV64I-NEXT: lbu a6, 2(a1) @@ -620,19 +620,19 @@ ; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli a7, a7, 24 ; RV64I-NEXT: or a5, a7, a6 -; RV64I-NEXT: or a4, a5, a4 -; RV64I-NEXT: lbu a5, 5(a1) -; RV64I-NEXT: lbu a6, 4(a1) -; RV64I-NEXT: lbu a7, 6(a1) +; RV64I-NEXT: lbu a6, 5(a1) +; RV64I-NEXT: lbu a7, 4(a1) +; RV64I-NEXT: lbu t0, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) -; RV64I-NEXT: slli a5, a5, 8 -; RV64I-NEXT: or a5, a5, a6 -; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a6, a6, a7 +; RV64I-NEXT: slli t0, t0, 16 ; RV64I-NEXT: slli a1, a1, 24 -; RV64I-NEXT: or a1, a1, a7 -; RV64I-NEXT: or a1, a1, a5 +; RV64I-NEXT: or a1, a1, t0 +; RV64I-NEXT: or a1, a1, a6 ; RV64I-NEXT: slli a1, a1, 32 -; RV64I-NEXT: or a5, a1, a4 +; RV64I-NEXT: or a1, a1, a4 +; RV64I-NEXT: or a5, a1, a5 ; RV64I-NEXT: addi a4, a5, -64 ; RV64I-NEXT: srl a1, a3, a5 ; RV64I-NEXT: bltz a4, .LBB6_2 @@ -881,19 +881,19 @@ ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a6, a6, 24 ; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 5(a0) -; RV64I-NEXT: lbu a5, 4(a0) -; RV64I-NEXT: lbu a6, 6(a0) -; RV64I-NEXT: lbu a7, 7(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 -; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a7, a6 -; RV64I-NEXT: or a4, a5, a4 -; RV64I-NEXT: slli a4, a4, 32 -; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a5, 5(a0) +; RV64I-NEXT: lbu a6, 4(a0) +; RV64I-NEXT: lbu a7, 6(a0) +; RV64I-NEXT: lbu t0, 7(a0) +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a5, a5, a6 +; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli t0, t0, 24 +; RV64I-NEXT: or a6, t0, a7 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a5, a5, 32 +; RV64I-NEXT: or a3, a5, a3 +; RV64I-NEXT: or a3, a3, a4 ; RV64I-NEXT: lbu a4, 1(a1) ; RV64I-NEXT: lbu a5, 0(a1) ; RV64I-NEXT: lbu a6, 2(a1) @@ -903,19 +903,19 @@ ; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli a7, a7, 24 ; RV64I-NEXT: or a5, a7, a6 -; RV64I-NEXT: or a4, a5, a4 -; RV64I-NEXT: lbu a5, 5(a1) -; RV64I-NEXT: lbu a6, 4(a1) -; RV64I-NEXT: lbu a7, 6(a1) +; RV64I-NEXT: lbu a6, 5(a1) +; RV64I-NEXT: lbu a7, 4(a1) +; RV64I-NEXT: lbu t0, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) -; RV64I-NEXT: slli a5, a5, 8 -; RV64I-NEXT: or a5, a5, a6 -; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a6, a6, a7 +; RV64I-NEXT: slli t0, t0, 16 ; RV64I-NEXT: slli a1, a1, 24 -; RV64I-NEXT: or a1, a1, a7 -; RV64I-NEXT: or a1, a1, a5 +; RV64I-NEXT: or a1, a1, t0 +; RV64I-NEXT: or a1, a1, a6 ; RV64I-NEXT: slli a1, a1, 32 -; RV64I-NEXT: or a5, a1, a4 +; RV64I-NEXT: or a1, a1, a4 +; RV64I-NEXT: or a5, a1, a5 ; RV64I-NEXT: addi a4, a5, -64 ; RV64I-NEXT: sll a1, a3, a5 ; RV64I-NEXT: bltz a4, .LBB7_2 @@ -1163,20 +1163,20 @@ ; RV64I-NEXT: or a3, a3, a4 ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a6, a6, 24 -; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: or a5, a6, a5 ; RV64I-NEXT: lbu a4, 13(a0) -; RV64I-NEXT: lbu a5, 12(a0) -; RV64I-NEXT: lbu a6, 14(a0) -; RV64I-NEXT: lbu a7, 15(a0) +; RV64I-NEXT: lbu a6, 12(a0) +; RV64I-NEXT: lbu a7, 14(a0) +; RV64I-NEXT: lbu t0, 15(a0) ; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: slli a6, a6, 16 -; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a7, a6 -; RV64I-NEXT: or a4, a5, a4 -; RV64I-NEXT: slli a5, a4, 32 -; RV64I-NEXT: or a3, a5, a3 +; RV64I-NEXT: or a4, a4, a6 +; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli t0, t0, 24 +; RV64I-NEXT: or a6, t0, a7 +; RV64I-NEXT: or a4, a6, a4 +; RV64I-NEXT: slli a6, a4, 32 +; RV64I-NEXT: or a3, a6, a3 +; RV64I-NEXT: or a3, a3, a5 ; RV64I-NEXT: lbu a5, 1(a1) ; RV64I-NEXT: lbu a6, 0(a1) ; RV64I-NEXT: lbu a7, 2(a1) @@ -1186,19 +1186,19 @@ ; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli t0, t0, 24 ; RV64I-NEXT: or a6, t0, a7 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: lbu a6, 5(a1) -; RV64I-NEXT: lbu a7, 4(a1) -; RV64I-NEXT: lbu t0, 6(a1) +; RV64I-NEXT: lbu a7, 5(a1) +; RV64I-NEXT: lbu t0, 4(a1) +; RV64I-NEXT: lbu t1, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a6, a6, a7 -; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or a7, a7, t0 +; RV64I-NEXT: slli t1, t1, 16 ; RV64I-NEXT: slli a1, a1, 24 -; RV64I-NEXT: or a1, a1, t0 -; RV64I-NEXT: or a1, a1, a6 +; RV64I-NEXT: or a1, a1, t1 +; RV64I-NEXT: or a1, a1, a7 ; RV64I-NEXT: slli a1, a1, 32 -; RV64I-NEXT: or a5, a1, a5 +; RV64I-NEXT: or a1, a1, a5 +; RV64I-NEXT: or a5, a1, a6 ; RV64I-NEXT: addi a6, a5, -64 ; RV64I-NEXT: sra a1, a3, a5 ; RV64I-NEXT: bltz a6, .LBB8_2 diff --git a/llvm/test/CodeGen/RISCV/xaluo.ll b/llvm/test/CodeGen/RISCV/xaluo.ll --- a/llvm/test/CodeGen/RISCV/xaluo.ll +++ b/llvm/test/CodeGen/RISCV/xaluo.ll @@ -177,10 +177,10 @@ define zeroext i1 @saddo1.i64(i64 %v1, i64 %v2, ptr %res) { ; RV32-LABEL: saddo1.i64: ; RV32: # %bb.0: # %entry -; RV32-NEXT: add a5, a1, a3 ; RV32-NEXT: add a2, a0, a2 ; RV32-NEXT: sltu a0, a2, a0 -; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: add a0, a3, a0 +; RV32-NEXT: add a5, a1, a0 ; RV32-NEXT: xor a0, a1, a5 ; RV32-NEXT: xor a1, a1, a3 ; RV32-NEXT: not a1, a1 @@ -201,10 +201,10 @@ ; ; RV32ZBA-LABEL: saddo1.i64: ; RV32ZBA: # %bb.0: # %entry -; RV32ZBA-NEXT: add a5, a1, a3 ; RV32ZBA-NEXT: add a2, a0, a2 ; RV32ZBA-NEXT: sltu a0, a2, a0 -; RV32ZBA-NEXT: add a5, a5, a0 +; RV32ZBA-NEXT: add a0, a3, a0 +; RV32ZBA-NEXT: add a5, a1, a0 ; RV32ZBA-NEXT: xor a0, a1, a5 ; RV32ZBA-NEXT: xor a1, a1, a3 ; RV32ZBA-NEXT: not a1, a1 @@ -614,8 +614,8 @@ ; RV32-LABEL: ssubo.i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: sltu a5, a0, a2 -; RV32-NEXT: sub a6, a1, a3 -; RV32-NEXT: sub a5, a6, a5 +; RV32-NEXT: add a5, a3, a5 +; RV32-NEXT: sub a5, a1, a5 ; RV32-NEXT: xor a6, a1, a5 ; RV32-NEXT: xor a1, a1, a3 ; RV32-NEXT: and a1, a1, a6 @@ -638,8 +638,8 @@ ; RV32ZBA-LABEL: ssubo.i64: ; RV32ZBA: # %bb.0: # %entry ; RV32ZBA-NEXT: sltu a5, a0, a2 -; RV32ZBA-NEXT: sub a6, a1, a3 -; RV32ZBA-NEXT: sub a5, a6, a5 +; RV32ZBA-NEXT: add a5, a3, a5 +; RV32ZBA-NEXT: sub a5, a1, a5 ; RV32ZBA-NEXT: xor a6, a1, a5 ; RV32ZBA-NEXT: xor a1, a1, a3 ; RV32ZBA-NEXT: and a1, a1, a6