diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -218,6 +218,10 @@ finalizeInsInstrs(MachineInstr &Root, MachineCombinerPattern &P, SmallVectorImpl &InsInstrs) const override; + bool shouldReduceRegisterPressure( + const MachineBasicBlock *MBB, + const RegisterClassInfo *RegClassInfo) const override; + void genAlternativeCodeSequence( MachineInstr &Root, MachineCombinerPattern Pattern, SmallVectorImpl &InsInstrs, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -26,6 +26,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineTraceMetrics.h" +#include "llvm/CodeGen/RegisterPressure.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/MC/MCInstBuilder.h" @@ -34,6 +35,8 @@ using namespace llvm; +#define DEBUG_TYPE "riscv-instr-info" + #define GEN_CHECK_COMPRESS_INSTR #include "RISCVGenCompressInstEmitter.inc" @@ -1368,6 +1371,49 @@ } } +static std::vector +getMBBPressure(const MachineBasicBlock *MBB, + const RegisterClassInfo *RegClassInfo, + const TargetRegisterInfo *TRI, const MachineRegisterInfo *MRI) { + RegionPressure Pressure; + RegPressureTracker RPTracker(Pressure); + + // Initialize the register pressure tracker. + RPTracker.init(MBB->getParent(), RegClassInfo, nullptr, MBB, MBB->end(), + /*TrackLaneMasks*/ false, /*TrackUntiedDefs=*/true); + + for (const auto &MI : reverse(*MBB)) { + if (MI.isDebugOrPseudoInstr()) + continue; + RegisterOperands RegOpers; + RegOpers.collect(MI, *TRI, *MRI, false, false); + RPTracker.recedeSkipDebugValues(); + assert(&*RPTracker.getPos() == &MI && "RPTracker sync error!"); + RPTracker.recede(RegOpers); + } + + // Close the RPTracker to finalize live ins. + RPTracker.closeRegion(); + + return RPTracker.getPressure().MaxSetPressure; +} + +bool RISCVInstrInfo::shouldReduceRegisterPressure( + const MachineBasicBlock *MBB, const RegisterClassInfo *RegClassInfo) const { + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); + const MachineFunction *MF = MBB->getParent(); + const MachineRegisterInfo *MRI = &MF->getRegInfo(); + + unsigned GPRLimit = TRI->getRegPressureSetLimit( + *MBB->getParent(), RISCV::RegisterPressureSets::GPR); + unsigned Pressure = getMBBPressure(MBB, RegClassInfo, TRI, + MRI)[RISCV::RegisterPressureSets::GPR]; + + LLVM_DEBUG(dbgs() << "Register Pressure: " << Pressure << "::" << GPRLimit + << "\n"); + return Pressure > GPRLimit; +} + static bool isFADD(unsigned Opc) { switch (Opc) { default: @@ -1527,42 +1573,28 @@ return RISCV::hasEqualFRM(Root, *MI); } -static bool -getFPFusedMultiplyPatterns(MachineInstr &Root, - SmallVectorImpl &Patterns, - bool DoRegPressureReduce) { - unsigned Opc = Root.getOpcode(); - bool IsFAdd = isFADD(Opc); - if (!IsFAdd && !isFSUB(Opc)) - return false; - bool Added = false; - if (canCombineFPFusedMultiply(Root, Root.getOperand(1), - DoRegPressureReduce)) { - Patterns.push_back(IsFAdd ? MachineCombinerPattern::FMADD_AX - : MachineCombinerPattern::FMSUB); - Added = true; - } - if (canCombineFPFusedMultiply(Root, Root.getOperand(2), - DoRegPressureReduce)) { - Patterns.push_back(IsFAdd ? MachineCombinerPattern::FMADD_XA - : MachineCombinerPattern::FNMSUB); - Added = true; - } - return Added; -} - -static bool getFPPatterns(MachineInstr &Root, - SmallVectorImpl &Patterns, - bool DoRegPressureReduce) { - return getFPFusedMultiplyPatterns(Root, Patterns, DoRegPressureReduce); -} - bool RISCVInstrInfo::getMachineCombinerPatterns( MachineInstr &Root, SmallVectorImpl &Patterns, bool DoRegPressureReduce) const { + unsigned Opc = Root.getOpcode(); + bool IsFAdd = isFADD(Opc); - if (getFPPatterns(Root, Patterns, DoRegPressureReduce)) - return true; + if (IsFAdd || isFSUB(Opc)) { + if (canCombineFPFusedMultiply(Root, Root.getOperand(1), + DoRegPressureReduce)) { + Patterns.push_back(IsFAdd ? MachineCombinerPattern::FMADD_AX + : MachineCombinerPattern::FMSUB); + return true; + } else if (canCombineFPFusedMultiply(Root, Root.getOperand(2), + DoRegPressureReduce)) { + Patterns.push_back(IsFAdd ? MachineCombinerPattern::FMADD_XA + : MachineCombinerPattern::FNMSUB); + return true; + } + } + + if (DoRegPressureReduce) + return false; return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns, DoRegPressureReduce); diff --git a/llvm/test/CodeGen/RISCV/machine-combiner.ll b/llvm/test/CodeGen/RISCV/machine-combiner.ll --- a/llvm/test/CodeGen/RISCV/machine-combiner.ll +++ b/llvm/test/CodeGen/RISCV/machine-combiner.ll @@ -1139,21 +1139,21 @@ define void @test_reg_pressure(ptr %state, ptr %input, ptr %k) { ; CHECK-LABEL: test_reg_pressure: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -272 -; CHECK-NEXT: .cfi_def_cfa_offset 272 -; CHECK-NEXT: sd ra, 264(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 256(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s1, 248(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s2, 240(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s3, 232(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s4, 224(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s5, 216(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s6, 208(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s7, 200(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s8, 192(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s9, 184(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s10, 176(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s11, 168(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi sp, sp, -288 +; CHECK-NEXT: .cfi_def_cfa_offset 288 +; CHECK-NEXT: sd ra, 280(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 272(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s1, 264(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s2, 256(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s3, 248(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s4, 240(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s5, 232(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s6, 224(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s7, 216(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s8, 208(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s9, 200(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s10, 192(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s11, 184(sp) # 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset ra, -8 ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 @@ -1167,642 +1167,639 @@ ; CHECK-NEXT: .cfi_offset s9, -88 ; CHECK-NEXT: .cfi_offset s10, -96 ; CHECK-NEXT: .cfi_offset s11, -104 -; CHECK-NEXT: lw t4, 132(sp) -; CHECK-NEXT: lw s8, 164(sp) -; CHECK-NEXT: li a3, 0 -; CHECK-NEXT: add t5, t4, t4 +; CHECK-NEXT: lw a7, 148(sp) +; CHECK-NEXT: lw s8, 180(sp) +; CHECK-NEXT: li a1, 0 +; CHECK-NEXT: add t0, a7, a7 ; CHECK-NEXT: addi s10, a2, 124 -; CHECK-NEXT: sd s8, 88(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s8, 48(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s8, 40(sp) # 8-byte Folded Spill -; CHECK-NEXT: mv t3, s8 -; CHECK-NEXT: mv s4, s8 +; CHECK-NEXT: sd s8, 80(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s8, 72(sp) # 8-byte Folded Spill ; CHECK-NEXT: mv s5, s8 +; CHECK-NEXT: mv t3, s8 +; CHECK-NEXT: sd s8, 48(sp) # 8-byte Folded Spill +; CHECK-NEXT: mv s6, s8 +; CHECK-NEXT: mv t2, s8 ; CHECK-NEXT: mv s7, s8 -; CHECK-NEXT: mv t1, s8 -; CHECK-NEXT: sd s8, 72(sp) # 8-byte Folded Spill +; CHECK-NEXT: mv a3, s8 ; CHECK-NEXT: sd s8, 64(sp) # 8-byte Folded Spill -; CHECK-NEXT: mv a7, s8 +; CHECK-NEXT: mv a6, s8 ; CHECK-NEXT: sd s8, 56(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s8, 96(sp) # 8-byte Folded Spill -; CHECK-NEXT: mv s3, t5 -; CHECK-NEXT: mv s2, t5 -; CHECK-NEXT: mv t6, t5 -; CHECK-NEXT: mv ra, t5 +; CHECK-NEXT: sd s8, 112(sp) # 8-byte Folded Spill +; CHECK-NEXT: mv t5, t0 +; CHECK-NEXT: mv s2, t0 +; CHECK-NEXT: mv t6, t0 +; CHECK-NEXT: mv ra, t0 ; CHECK-NEXT: .LBB77_1: # %do.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: sd s5, 8(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd a7, 80(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd a3, 32(sp) # 8-byte Folded Spill -; CHECK-NEXT: roriw s11, s3, 11 -; CHECK-NEXT: roriw s9, s3, 25 -; CHECK-NEXT: roriw a2, s3, 6 +; CHECK-NEXT: sd t3, 8(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd a6, 104(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd a1, 40(sp) # 8-byte Folded Spill +; CHECK-NEXT: roriw s11, t5, 11 +; CHECK-NEXT: roriw s9, t5, 25 +; CHECK-NEXT: roriw a0, t5, 6 ; CHECK-NEXT: add s9, s9, s11 -; CHECK-NEXT: add s11, s11, a2 -; CHECK-NEXT: add a5, t4, a2 -; CHECK-NEXT: add a5, a5, s9 -; CHECK-NEXT: add a2, a2, s11 -; CHECK-NEXT: roriw s0, s11, 2 +; CHECK-NEXT: add s11, s11, a0 +; CHECK-NEXT: add a5, s9, a7 +; CHECK-NEXT: add a5, a5, a0 +; CHECK-NEXT: roriw s1, s11, 2 ; CHECK-NEXT: roriw a4, s11, 13 -; CHECK-NEXT: add a2, a2, a5 -; CHECK-NEXT: roriw a5, s11, 22 -; CHECK-NEXT: xor a6, s11, ra -; CHECK-NEXT: xor a4, a4, s0 -; CHECK-NEXT: roriw s0, s7, 7 -; CHECK-NEXT: and a7, s11, ra -; CHECK-NEXT: roriw t4, s7, 18 -; CHECK-NEXT: and a3, a6, t6 -; CHECK-NEXT: roriw s1, s9, 17 -; CHECK-NEXT: xor a4, a4, a5 +; CHECK-NEXT: roriw a6, s9, 17 +; CHECK-NEXT: add a0, a0, a5 ; CHECK-NEXT: roriw a5, s9, 19 -; CHECK-NEXT: xor s0, s0, t4 -; CHECK-NEXT: srliw a1, s7, 3 -; CHECK-NEXT: xor a3, a3, a7 -; CHECK-NEXT: add a6, a2, s2 -; CHECK-NEXT: xor a5, a5, s1 -; CHECK-NEXT: srliw s1, s9, 10 -; CHECK-NEXT: xor a1, a1, s0 -; CHECK-NEXT: ld s0, 72(sp) # 8-byte Folded Reload -; CHECK-NEXT: mv s6, t1 -; CHECK-NEXT: add s0, s0, t1 -; CHECK-NEXT: add a3, a3, a4 -; CHECK-NEXT: roriw a4, a6, 6 -; CHECK-NEXT: lw a0, -52(s10) -; CHECK-NEXT: xor s1, s1, a5 -; CHECK-NEXT: add a1, a1, s0 -; CHECK-NEXT: add t4, a3, a2 -; CHECK-NEXT: roriw a2, a6, 11 -; CHECK-NEXT: andn a3, s3, a6 -; CHECK-NEXT: and s0, s9, a6 -; CHECK-NEXT: mv t0, s9 -; CHECK-NEXT: mv t1, t3 -; CHECK-NEXT: sd t3, 24(sp) # 8-byte Folded Spill -; CHECK-NEXT: add t3, s1, a1 -; CHECK-NEXT: add a0, a0, t5 -; CHECK-NEXT: xor a2, a2, a4 -; CHECK-NEXT: roriw a1, a6, 25 -; CHECK-NEXT: or a3, a3, s0 -; CHECK-NEXT: roriw a7, t4, 2 -; CHECK-NEXT: add a0, a0, t3 -; CHECK-NEXT: roriw s1, t4, 13 -; CHECK-NEXT: roriw s0, s4, 7 +; CHECK-NEXT: xor a4, a4, s1 +; CHECK-NEXT: xor s1, s11, ra +; CHECK-NEXT: roriw a7, s11, 22 +; CHECK-NEXT: and t4, s11, ra +; CHECK-NEXT: xor a5, a6, a5 +; CHECK-NEXT: srliw s0, s9, 10 +; CHECK-NEXT: and s1, s1, t6 +; CHECK-NEXT: roriw a1, t2, 7 +; CHECK-NEXT: add a0, a0, s11 +; CHECK-NEXT: roriw a2, t2, 18 +; CHECK-NEXT: xor a5, a5, s0 +; CHECK-NEXT: xor a4, a4, a7 +; CHECK-NEXT: xor s0, s1, t4 +; CHECK-NEXT: add t1, a0, s2 ; CHECK-NEXT: xor a1, a1, a2 -; CHECK-NEXT: roriw a2, s4, 18 -; CHECK-NEXT: add a0, a0, a3 -; CHECK-NEXT: roriw a3, s11, 17 -; CHECK-NEXT: roriw a4, s11, 19 -; CHECK-NEXT: xor a2, a2, s0 -; CHECK-NEXT: srliw s0, s4, 3 -; CHECK-NEXT: add a0, a0, a1 -; CHECK-NEXT: xor s1, a7, s1 -; CHECK-NEXT: roriw a5, t4, 22 -; CHECK-NEXT: xor a3, a3, a4 -; CHECK-NEXT: xor a4, t4, s11 -; CHECK-NEXT: xor a2, a2, s0 +; CHECK-NEXT: srliw a2, t2, 3 +; CHECK-NEXT: add a5, a5, a3 +; CHECK-NEXT: add a4, a4, s0 +; CHECK-NEXT: roriw a3, t1, 6 +; CHECK-NEXT: lw s0, -52(s10) +; CHECK-NEXT: xor a1, a1, a2 +; CHECK-NEXT: sd s7, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: add a2, a5, s7 +; CHECK-NEXT: add a6, a4, a0 +; CHECK-NEXT: roriw a0, t1, 11 +; CHECK-NEXT: andn a4, t5, t1 +; CHECK-NEXT: and a5, s9, t1 +; CHECK-NEXT: add s1, a2, a1 +; CHECK-NEXT: add s0, s0, t0 +; CHECK-NEXT: xor a0, a0, a3 +; CHECK-NEXT: roriw a1, t1, 25 +; CHECK-NEXT: or a4, a4, a5 +; CHECK-NEXT: roriw a2, a6, 2 +; CHECK-NEXT: add s0, s0, s1 +; CHECK-NEXT: roriw a3, a6, 13 +; CHECK-NEXT: xor a7, a0, a1 +; CHECK-NEXT: xor a1, a6, s11 +; CHECK-NEXT: add a4, a4, s0 +; CHECK-NEXT: xor a2, a2, a3 +; CHECK-NEXT: roriw a3, a6, 22 +; CHECK-NEXT: and a5, a6, s11 +; CHECK-NEXT: and a1, a1, ra +; CHECK-NEXT: roriw s0, s11, 17 +; CHECK-NEXT: roriw a0, s11, 19 +; CHECK-NEXT: add a4, a4, a7 +; CHECK-NEXT: xor a3, a3, a2 +; CHECK-NEXT: xor a1, a1, a5 +; CHECK-NEXT: xor a0, a0, s0 +; CHECK-NEXT: ld s4, 48(sp) # 8-byte Folded Reload +; CHECK-NEXT: roriw a5, s4, 7 ; CHECK-NEXT: srliw s0, s11, 10 -; CHECK-NEXT: add s7, s7, s5 -; CHECK-NEXT: add t6, t6, a0 -; CHECK-NEXT: xor a5, a5, s1 -; CHECK-NEXT: and s1, t4, s11 -; CHECK-NEXT: and a4, a4, ra -; CHECK-NEXT: xor a3, a3, s0 -; CHECK-NEXT: add a2, a2, s7 -; CHECK-NEXT: roriw s0, t6, 6 -; CHECK-NEXT: lw a1, -48(s10) -; CHECK-NEXT: xor a4, a4, s1 -; CHECK-NEXT: roriw s1, t6, 11 -; CHECK-NEXT: add s9, a2, a3 -; CHECK-NEXT: andn a2, t0, t6 -; CHECK-NEXT: add a1, a1, s3 -; CHECK-NEXT: and a3, a6, t6 -; CHECK-NEXT: add a4, a4, a5 -; CHECK-NEXT: xor s1, s1, s0 -; CHECK-NEXT: roriw a5, t6, 25 -; CHECK-NEXT: or a2, a2, a3 -; CHECK-NEXT: add a1, a1, s9 -; CHECK-NEXT: add s2, a4, a0 -; CHECK-NEXT: xor a5, a5, s1 -; CHECK-NEXT: add a1, a1, a2 -; CHECK-NEXT: roriw a0, s2, 2 -; CHECK-NEXT: roriw a2, s2, 13 -; CHECK-NEXT: add t5, a1, a5 -; CHECK-NEXT: roriw a7, s2, 22 -; CHECK-NEXT: xor a4, s2, t4 -; CHECK-NEXT: xor a0, a0, a2 -; CHECK-NEXT: ld t2, 40(sp) # 8-byte Folded Reload -; CHECK-NEXT: roriw a2, t2, 7 -; CHECK-NEXT: and a5, s2, t4 -; CHECK-NEXT: roriw s1, t2, 18 +; CHECK-NEXT: roriw a2, s4, 18 +; CHECK-NEXT: add t6, t6, a4 +; CHECK-NEXT: add a1, a1, a3 +; CHECK-NEXT: xor a0, a0, s0 +; CHECK-NEXT: xor a2, a2, a5 +; CHECK-NEXT: srliw a3, s4, 3 +; CHECK-NEXT: sd s6, 32(sp) # 8-byte Folded Spill +; CHECK-NEXT: add s6, s6, t2 +; CHECK-NEXT: add a1, a1, a4 +; CHECK-NEXT: roriw a4, t6, 6 +; CHECK-NEXT: xor a2, a2, a3 +; CHECK-NEXT: add a0, a0, s6 +; CHECK-NEXT: roriw a3, t6, 11 +; CHECK-NEXT: lw a5, -48(s10) +; CHECK-NEXT: andn a7, s9, t6 +; CHECK-NEXT: and s0, t1, t6 +; CHECK-NEXT: add t0, a0, a2 +; CHECK-NEXT: roriw a0, a1, 2 +; CHECK-NEXT: add a5, a5, t5 +; CHECK-NEXT: roriw a2, a1, 13 +; CHECK-NEXT: xor t4, a4, a3 +; CHECK-NEXT: xor a4, a1, a6 +; CHECK-NEXT: or a7, s0, a7 +; CHECK-NEXT: roriw t5, t6, 25 +; CHECK-NEXT: add a5, a5, t0 +; CHECK-NEXT: xor s2, a0, a2 +; CHECK-NEXT: roriw a2, a1, 22 ; CHECK-NEXT: and a4, a4, s11 -; CHECK-NEXT: roriw a3, t3, 17 -; CHECK-NEXT: xor a0, a0, a7 -; CHECK-NEXT: roriw a1, t3, 19 -; CHECK-NEXT: xor a2, a2, s1 -; CHECK-NEXT: srliw s0, t2, 3 -; CHECK-NEXT: xor a4, a4, a5 -; CHECK-NEXT: add ra, ra, t5 -; CHECK-NEXT: xor a1, a1, a3 -; CHECK-NEXT: srliw a3, t3, 10 -; CHECK-NEXT: xor a2, a2, s0 -; CHECK-NEXT: add s4, s4, t1 -; CHECK-NEXT: add a0, a0, a4 -; CHECK-NEXT: roriw a5, ra, 6 -; CHECK-NEXT: lw s0, -44(s10) -; CHECK-NEXT: xor a1, a1, a3 -; CHECK-NEXT: add a2, a2, s4 -; CHECK-NEXT: add t5, t5, a0 -; CHECK-NEXT: roriw a0, ra, 11 -; CHECK-NEXT: andn a3, a6, ra -; CHECK-NEXT: and a4, t6, ra -; CHECK-NEXT: add s5, a2, a1 -; CHECK-NEXT: add s0, s0, t0 -; CHECK-NEXT: mv s3, t0 -; CHECK-NEXT: sd t0, 16(sp) # 8-byte Folded Spill -; CHECK-NEXT: xor a0, a0, a5 -; CHECK-NEXT: roriw a1, ra, 25 -; CHECK-NEXT: or a3, a3, a4 -; CHECK-NEXT: add s0, s0, s5 -; CHECK-NEXT: roriw a7, t5, 2 -; CHECK-NEXT: roriw a4, t5, 13 -; CHECK-NEXT: xor a0, a0, a1 -; CHECK-NEXT: roriw a1, s9, 17 -; CHECK-NEXT: add a3, a3, s0 -; CHECK-NEXT: roriw a5, s9, 19 -; CHECK-NEXT: ld t1, 48(sp) # 8-byte Folded Reload -; CHECK-NEXT: roriw s0, t1, 7 -; CHECK-NEXT: roriw a2, t1, 18 -; CHECK-NEXT: add a0, a0, a3 -; CHECK-NEXT: xor a3, t5, s2 -; CHECK-NEXT: xor a4, a7, a4 -; CHECK-NEXT: xor a7, a1, a5 -; CHECK-NEXT: xor a2, a2, s0 -; CHECK-NEXT: srliw a5, t1, 3 -; CHECK-NEXT: and a3, a3, t4 -; CHECK-NEXT: roriw s0, t5, 22 -; CHECK-NEXT: and s1, t5, s2 -; CHECK-NEXT: srliw a1, s9, 10 -; CHECK-NEXT: xor a5, a5, a2 -; CHECK-NEXT: ld a2, 96(sp) # 8-byte Folded Reload -; CHECK-NEXT: add t2, t2, a2 +; CHECK-NEXT: and s0, a1, a6 +; CHECK-NEXT: sd s1, 96(sp) # 8-byte Folded Spill +; CHECK-NEXT: roriw a3, s1, 17 +; CHECK-NEXT: roriw a0, s1, 19 +; CHECK-NEXT: xor t4, t4, t5 +; CHECK-NEXT: add a5, a5, a7 +; CHECK-NEXT: xor a7, s2, a2 ; CHECK-NEXT: xor a4, a4, s0 -; CHECK-NEXT: xor a3, a3, s1 -; CHECK-NEXT: xor a1, a7, a1 -; CHECK-NEXT: add a2, a0, s11 -; CHECK-NEXT: add a5, a5, t2 -; CHECK-NEXT: lw s1, -40(s10) -; CHECK-NEXT: add a3, a3, a4 +; CHECK-NEXT: xor a0, a0, a3 +; CHECK-NEXT: roriw a3, s5, 7 +; CHECK-NEXT: srliw s0, s1, 10 +; CHECK-NEXT: roriw a2, s5, 18 +; CHECK-NEXT: add a5, a5, t4 +; CHECK-NEXT: add a4, a4, a7 +; CHECK-NEXT: xor s0, s0, a0 +; CHECK-NEXT: xor a2, a2, a3 +; CHECK-NEXT: srliw a3, s5, 3 +; CHECK-NEXT: add s4, s4, t3 +; CHECK-NEXT: add a7, a5, ra +; CHECK-NEXT: add t4, a4, a5 +; CHECK-NEXT: xor a2, a2, a3 +; CHECK-NEXT: add s0, s0, s4 +; CHECK-NEXT: roriw a3, a7, 6 +; CHECK-NEXT: lw a5, -44(s10) +; CHECK-NEXT: roriw a0, a7, 11 +; CHECK-NEXT: andn t5, t1, a7 +; CHECK-NEXT: and a4, t6, a7 +; CHECK-NEXT: add s7, s0, a2 +; CHECK-NEXT: sd s7, 48(sp) # 8-byte Folded Spill +; CHECK-NEXT: roriw s2, a7, 25 +; CHECK-NEXT: add a5, a5, s9 +; CHECK-NEXT: sd s9, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: xor ra, a3, a0 +; CHECK-NEXT: roriw a3, t4, 2 +; CHECK-NEXT: or t5, a4, t5 +; CHECK-NEXT: roriw s0, t4, 13 +; CHECK-NEXT: add a5, a5, s7 +; CHECK-NEXT: sd t0, 88(sp) # 8-byte Folded Spill +; CHECK-NEXT: roriw a2, t0, 17 +; CHECK-NEXT: roriw a0, t0, 19 +; CHECK-NEXT: xor a4, t4, a1 +; CHECK-NEXT: xor s2, ra, s2 +; CHECK-NEXT: add a5, a5, t5 +; CHECK-NEXT: xor ra, a3, s0 +; CHECK-NEXT: xor t5, a2, a0 +; CHECK-NEXT: roriw a2, t4, 22 +; CHECK-NEXT: and a4, a4, a6 +; CHECK-NEXT: and s0, t4, a1 +; CHECK-NEXT: ld s1, 72(sp) # 8-byte Folded Reload +; CHECK-NEXT: roriw a0, s1, 7 +; CHECK-NEXT: srliw a3, t0, 10 +; CHECK-NEXT: roriw s3, s1, 18 +; CHECK-NEXT: add a5, a5, s2 +; CHECK-NEXT: xor a2, ra, a2 +; CHECK-NEXT: xor a4, a4, s0 +; CHECK-NEXT: xor t0, t5, a3 +; CHECK-NEXT: xor a0, a0, s3 +; CHECK-NEXT: srliw s0, s1, 3 +; CHECK-NEXT: ld t2, 112(sp) # 8-byte Folded Reload +; CHECK-NEXT: add t2, t2, s5 +; CHECK-NEXT: add a4, a4, a2 +; CHECK-NEXT: add a2, a5, s11 +; CHECK-NEXT: xor a0, a0, s0 +; CHECK-NEXT: add t2, t2, t0 +; CHECK-NEXT: add t5, a4, a5 ; CHECK-NEXT: roriw a4, a2, 6 -; CHECK-NEXT: add s4, a5, a1 -; CHECK-NEXT: roriw a1, a2, 11 -; CHECK-NEXT: add a6, a6, s1 -; CHECK-NEXT: andn a5, t6, a2 -; CHECK-NEXT: and s1, ra, a2 -; CHECK-NEXT: add a7, a3, a0 -; CHECK-NEXT: xor a1, a1, a4 -; CHECK-NEXT: roriw a0, a2, 25 -; CHECK-NEXT: or a5, a5, s1 -; CHECK-NEXT: add a6, a6, s4 -; CHECK-NEXT: roriw a4, a7, 2 -; CHECK-NEXT: xor a0, a0, a1 -; CHECK-NEXT: roriw a1, a7, 13 -; CHECK-NEXT: add a5, a5, a6 -; CHECK-NEXT: ld t0, 88(sp) # 8-byte Folded Reload -; CHECK-NEXT: roriw s1, t0, 7 -; CHECK-NEXT: roriw s0, t0, 18 -; CHECK-NEXT: add a6, a5, a0 -; CHECK-NEXT: xor a1, a1, a4 -; CHECK-NEXT: roriw a4, a7, 22 -; CHECK-NEXT: xor s0, s0, s1 -; CHECK-NEXT: xor s1, a7, t5 -; CHECK-NEXT: roriw a0, s5, 17 -; CHECK-NEXT: roriw a5, s5, 19 -; CHECK-NEXT: srliw a3, t0, 3 -; CHECK-NEXT: xor a1, a1, a4 -; CHECK-NEXT: add t4, t4, a6 -; CHECK-NEXT: and a4, s1, s2 -; CHECK-NEXT: and s1, a7, t5 -; CHECK-NEXT: xor a0, a0, a5 -; CHECK-NEXT: xor a3, a3, s0 -; CHECK-NEXT: ld a5, 80(sp) # 8-byte Folded Reload -; CHECK-NEXT: add t1, t1, a5 -; CHECK-NEXT: srliw a5, s5, 10 +; CHECK-NEXT: lw a5, -40(s10) +; CHECK-NEXT: roriw s0, a2, 11 +; CHECK-NEXT: add s5, t2, a0 +; CHECK-NEXT: andn a0, t6, a2 +; CHECK-NEXT: and a3, a7, a2 +; CHECK-NEXT: add a5, a5, t1 +; CHECK-NEXT: roriw s2, a2, 25 +; CHECK-NEXT: xor s3, a4, s0 +; CHECK-NEXT: roriw ra, t5, 2 +; CHECK-NEXT: or a0, a0, a3 +; CHECK-NEXT: roriw t0, t5, 13 +; CHECK-NEXT: add a5, a5, s5 +; CHECK-NEXT: roriw a3, s7, 17 +; CHECK-NEXT: roriw a4, s7, 19 +; CHECK-NEXT: xor s0, t5, t4 +; CHECK-NEXT: xor s2, s3, s2 +; CHECK-NEXT: add a0, a0, a5 +; CHECK-NEXT: xor ra, ra, t0 +; CHECK-NEXT: xor s3, a3, a4 +; CHECK-NEXT: and t1, s0, a1 +; CHECK-NEXT: roriw s0, t5, 22 +; CHECK-NEXT: and s4, t5, t4 +; CHECK-NEXT: ld a3, 80(sp) # 8-byte Folded Reload +; CHECK-NEXT: roriw a4, a3, 7 +; CHECK-NEXT: srliw a5, s7, 10 +; CHECK-NEXT: roriw t0, a3, 18 +; CHECK-NEXT: add a0, a0, s2 +; CHECK-NEXT: xor t2, ra, s0 +; CHECK-NEXT: xor s0, t1, s4 +; CHECK-NEXT: xor s2, s3, a5 +; CHECK-NEXT: xor a4, a4, t0 +; CHECK-NEXT: ld t1, 104(sp) # 8-byte Folded Reload +; CHECK-NEXT: add t1, t1, s1 +; CHECK-NEXT: srliw s1, a3, 3 +; CHECK-NEXT: add a5, t2, s0 +; CHECK-NEXT: add ra, a0, a6 ; CHECK-NEXT: xor a4, a4, s1 -; CHECK-NEXT: roriw s0, t4, 6 -; CHECK-NEXT: roriw s1, t4, 11 -; CHECK-NEXT: xor a0, a0, a5 -; CHECK-NEXT: add a3, a3, t1 -; CHECK-NEXT: add a1, a1, a4 -; CHECK-NEXT: xor s0, s0, s1 -; CHECK-NEXT: lw a4, -36(s10) -; CHECK-NEXT: add t2, a3, a0 -; CHECK-NEXT: roriw a3, t4, 25 -; CHECK-NEXT: andn a5, ra, t4 -; CHECK-NEXT: and s1, a2, t4 -; CHECK-NEXT: add t6, t6, a4 +; CHECK-NEXT: add t1, t1, s2 +; CHECK-NEXT: add t3, a5, a0 +; CHECK-NEXT: roriw a0, ra, 6 +; CHECK-NEXT: lw s1, -36(s10) +; CHECK-NEXT: roriw s0, ra, 11 +; CHECK-NEXT: add s6, t1, a4 +; CHECK-NEXT: andn t1, a7, ra +; CHECK-NEXT: and a4, a2, ra +; CHECK-NEXT: add t6, t6, s1 +; CHECK-NEXT: roriw a6, ra, 25 +; CHECK-NEXT: xor t0, a0, s0 +; CHECK-NEXT: roriw s2, t3, 2 +; CHECK-NEXT: or s3, a4, t1 +; CHECK-NEXT: roriw a4, t3, 13 +; CHECK-NEXT: add t6, t6, s6 +; CHECK-NEXT: roriw s1, s5, 17 +; CHECK-NEXT: xor a0, t3, t5 +; CHECK-NEXT: roriw s0, s5, 19 +; CHECK-NEXT: xor a6, t0, a6 +; CHECK-NEXT: add t6, t6, s3 +; CHECK-NEXT: xor s2, s2, a4 +; CHECK-NEXT: xor t0, s1, s0 +; CHECK-NEXT: and s3, a0, t4 +; CHECK-NEXT: roriw s1, t3, 22 +; CHECK-NEXT: and t1, t3, t5 +; CHECK-NEXT: srliw s0, s5, 10 ; CHECK-NEXT: roriw a4, s8, 7 -; CHECK-NEXT: add a0, a1, a6 -; CHECK-NEXT: roriw a1, s8, 18 -; CHECK-NEXT: xor a6, s0, a3 -; CHECK-NEXT: or a5, a5, s1 -; CHECK-NEXT: add t6, t6, t2 +; CHECK-NEXT: roriw a0, s8, 18 +; CHECK-NEXT: add a6, a6, t6 +; CHECK-NEXT: xor t2, s2, s1 +; CHECK-NEXT: xor s1, s3, t1 +; CHECK-NEXT: xor s0, t0, s0 +; CHECK-NEXT: xor a0, a0, a4 +; CHECK-NEXT: srliw a4, s8, 3 +; CHECK-NEXT: add t0, a6, a1 +; CHECK-NEXT: add a1, a3, s9 +; CHECK-NEXT: add a3, t2, s1 +; CHECK-NEXT: xor a0, a0, a4 +; CHECK-NEXT: roriw a4, t0, 6 +; CHECK-NEXT: add a1, a1, s0 +; CHECK-NEXT: roriw s0, t0, 11 +; CHECK-NEXT: add s3, a3, a6 +; CHECK-NEXT: lw a3, -32(s10) +; CHECK-NEXT: add s9, a1, a0 +; CHECK-NEXT: xor a6, a4, s0 +; CHECK-NEXT: roriw t6, t0, 25 +; CHECK-NEXT: andn s2, a2, t0 +; CHECK-NEXT: and s0, ra, t0 +; CHECK-NEXT: add a3, a3, s9 +; CHECK-NEXT: ld t1, 56(sp) # 8-byte Folded Reload +; CHECK-NEXT: roriw a4, t1, 7 +; CHECK-NEXT: roriw a0, t1, 18 +; CHECK-NEXT: xor a6, a6, t6 +; CHECK-NEXT: or s0, s0, s2 +; CHECK-NEXT: add a3, a3, a7 +; CHECK-NEXT: roriw s1, s3, 2 +; CHECK-NEXT: xor a7, a4, a0 +; CHECK-NEXT: roriw a4, s3, 13 +; CHECK-NEXT: xor a1, s3, t3 +; CHECK-NEXT: srliw a0, t1, 3 +; CHECK-NEXT: add a3, a3, s0 +; CHECK-NEXT: roriw s0, s6, 17 +; CHECK-NEXT: xor t6, s1, a4 +; CHECK-NEXT: roriw s1, s6, 19 +; CHECK-NEXT: xor a0, a7, a0 +; CHECK-NEXT: and a7, a1, t5 +; CHECK-NEXT: roriw a4, s3, 22 +; CHECK-NEXT: and a1, s3, t3 +; CHECK-NEXT: add a3, a3, a6 +; CHECK-NEXT: xor s0, s0, s1 +; CHECK-NEXT: add a0, a0, s11 +; CHECK-NEXT: srliw s1, s6, 10 +; CHECK-NEXT: xor a4, t6, a4 +; CHECK-NEXT: xor a1, a7, a1 +; CHECK-NEXT: add t4, t4, a3 +; CHECK-NEXT: xor s0, s0, s1 +; CHECK-NEXT: add a0, a0, s8 +; CHECK-NEXT: add a1, a1, a4 +; CHECK-NEXT: roriw s1, t4, 6 +; CHECK-NEXT: lw a4, -28(s10) +; CHECK-NEXT: add s4, a0, s0 +; CHECK-NEXT: add s11, a1, a3 +; CHECK-NEXT: roriw a0, t4, 11 +; CHECK-NEXT: andn a3, ra, t4 +; CHECK-NEXT: and s0, t0, t4 +; CHECK-NEXT: add a4, a4, s4 +; CHECK-NEXT: roriw a6, t4, 25 +; CHECK-NEXT: xor a7, s1, a0 +; CHECK-NEXT: roriw t6, s11, 2 +; CHECK-NEXT: or s2, s0, a3 +; CHECK-NEXT: roriw s0, s11, 13 +; CHECK-NEXT: add a2, a2, a4 +; CHECK-NEXT: ld a5, 64(sp) # 8-byte Folded Reload +; CHECK-NEXT: roriw a4, a5, 7 +; CHECK-NEXT: roriw a0, a5, 18 +; CHECK-NEXT: xor a7, a7, a6 +; CHECK-NEXT: xor a3, s11, s3 +; CHECK-NEXT: add a2, a2, s2 +; CHECK-NEXT: xor a6, t6, s0 +; CHECK-NEXT: xor t6, a4, a0 +; CHECK-NEXT: roriw a4, s11, 22 +; CHECK-NEXT: and a3, a3, t3 +; CHECK-NEXT: srliw a1, a5, 3 +; CHECK-NEXT: and s0, s11, s3 +; CHECK-NEXT: roriw s1, s9, 17 +; CHECK-NEXT: add a2, a2, a7 +; CHECK-NEXT: roriw a0, s9, 19 +; CHECK-NEXT: xor a1, t6, a1 +; CHECK-NEXT: xor a4, a6, a4 +; CHECK-NEXT: xor a3, a3, s0 +; CHECK-NEXT: add s8, a2, t5 +; CHECK-NEXT: xor a0, a0, s1 +; CHECK-NEXT: srliw s1, s9, 10 +; CHECK-NEXT: add a1, a1, t1 +; CHECK-NEXT: add a3, a3, a4 +; CHECK-NEXT: roriw a4, s8, 6 +; CHECK-NEXT: xor s1, s1, a0 +; CHECK-NEXT: ld a0, 96(sp) # 8-byte Folded Reload +; CHECK-NEXT: add a1, a1, a0 +; CHECK-NEXT: roriw a0, s8, 11 +; CHECK-NEXT: add t6, a3, a2 +; CHECK-NEXT: lw a2, -24(s10) +; CHECK-NEXT: add t1, a1, s1 +; CHECK-NEXT: xor a6, a4, a0 +; CHECK-NEXT: roriw a7, s8, 25 +; CHECK-NEXT: andn t5, t0, s8 +; CHECK-NEXT: and s1, t4, s8 +; CHECK-NEXT: add a2, a2, t1 +; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: roriw a0, s0, 7 +; CHECK-NEXT: roriw a1, s0, 18 +; CHECK-NEXT: xor a6, a6, a7 +; CHECK-NEXT: or s1, s1, t5 +; CHECK-NEXT: add a2, a2, ra +; CHECK-NEXT: roriw a3, t6, 2 +; CHECK-NEXT: xor a7, a0, a1 +; CHECK-NEXT: roriw a1, t6, 13 +; CHECK-NEXT: xor a4, t6, s11 +; CHECK-NEXT: srliw a0, s0, 3 +; CHECK-NEXT: add s2, a2, s1 ; CHECK-NEXT: roriw s1, s4, 17 -; CHECK-NEXT: xor a1, a1, a4 -; CHECK-NEXT: roriw a4, s4, 19 -; CHECK-NEXT: roriw s0, a0, 2 -; CHECK-NEXT: srliw a3, s8, 3 -; CHECK-NEXT: add a5, a5, t6 -; CHECK-NEXT: roriw t6, a0, 13 -; CHECK-NEXT: xor a4, a4, s1 +; CHECK-NEXT: xor t5, a3, a1 +; CHECK-NEXT: roriw a3, s4, 19 +; CHECK-NEXT: xor a0, a7, a0 +; CHECK-NEXT: and a4, a4, s3 +; CHECK-NEXT: roriw a1, t6, 22 +; CHECK-NEXT: and a2, t6, s11 +; CHECK-NEXT: add a6, a6, s2 +; CHECK-NEXT: xor a3, a3, s1 +; CHECK-NEXT: add a0, a0, a5 ; CHECK-NEXT: srliw s1, s4, 10 -; CHECK-NEXT: xor a1, a1, a3 -; CHECK-NEXT: add t0, t0, s3 -; CHECK-NEXT: add a5, a5, a6 -; CHECK-NEXT: xor a3, s0, t6 -; CHECK-NEXT: xor a4, a4, s1 -; CHECK-NEXT: xor s1, a0, a7 -; CHECK-NEXT: add t0, t0, a1 -; CHECK-NEXT: roriw s0, a0, 22 -; CHECK-NEXT: add s2, s2, a5 -; CHECK-NEXT: and s1, s1, t5 -; CHECK-NEXT: and a1, a0, a7 -; CHECK-NEXT: xor a6, a3, s0 -; CHECK-NEXT: add t1, t0, a4 -; CHECK-NEXT: sd t1, 88(sp) # 8-byte Folded Spill -; CHECK-NEXT: roriw t6, s2, 6 -; CHECK-NEXT: lw s0, -32(s10) -; CHECK-NEXT: xor a1, a1, s1 -; CHECK-NEXT: roriw s1, s2, 11 -; CHECK-NEXT: andn a3, a2, s2 -; CHECK-NEXT: and a4, t4, s2 -; CHECK-NEXT: add s0, s0, t1 +; CHECK-NEXT: xor a1, t5, a1 +; CHECK-NEXT: xor a2, a2, a4 +; CHECK-NEXT: add a7, a6, t3 +; CHECK-NEXT: xor a3, a3, s1 +; CHECK-NEXT: ld a4, 88(sp) # 8-byte Folded Reload +; CHECK-NEXT: add a0, a0, a4 +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: roriw a2, a7, 6 +; CHECK-NEXT: lw a4, -20(s10) +; CHECK-NEXT: add t3, a0, a3 ; CHECK-NEXT: add a1, a1, a6 -; CHECK-NEXT: xor s1, t6, s1 -; CHECK-NEXT: or a3, a3, a4 -; CHECK-NEXT: add s0, s0, ra -; CHECK-NEXT: roriw a4, s2, 25 -; CHECK-NEXT: add a6, a1, a5 -; CHECK-NEXT: xor a4, a4, s1 -; CHECK-NEXT: add a3, a3, s0 -; CHECK-NEXT: mv s7, s6 -; CHECK-NEXT: ld s6, 56(sp) # 8-byte Folded Reload -; CHECK-NEXT: roriw a5, s6, 7 -; CHECK-NEXT: roriw s1, s6, 18 -; CHECK-NEXT: add a3, a3, a4 -; CHECK-NEXT: roriw ra, a6, 2 -; CHECK-NEXT: roriw s0, a6, 13 -; CHECK-NEXT: xor a1, a6, a0 -; CHECK-NEXT: xor t6, a5, s1 -; CHECK-NEXT: roriw s1, t2, 17 -; CHECK-NEXT: roriw a5, t2, 19 -; CHECK-NEXT: srliw a4, s6, 3 -; CHECK-NEXT: xor s0, ra, s0 -; CHECK-NEXT: and ra, a1, a7 -; CHECK-NEXT: roriw a1, a6, 22 -; CHECK-NEXT: and s3, a6, a0 -; CHECK-NEXT: xor a5, a5, s1 -; CHECK-NEXT: xor a4, t6, a4 -; CHECK-NEXT: add s8, s8, s11 -; CHECK-NEXT: srliw s1, t2, 10 -; CHECK-NEXT: xor a1, a1, s0 -; CHECK-NEXT: xor s0, ra, s3 -; CHECK-NEXT: add s11, a3, t5 -; CHECK-NEXT: xor a5, a5, s1 -; CHECK-NEXT: add a4, a4, s8 -; CHECK-NEXT: add a1, a1, s0 -; CHECK-NEXT: roriw s1, s11, 6 -; CHECK-NEXT: lw s0, -28(s10) -; CHECK-NEXT: add s8, a4, a5 -; CHECK-NEXT: add a4, a1, a3 -; CHECK-NEXT: roriw a1, s11, 11 -; CHECK-NEXT: andn a3, t4, s11 -; CHECK-NEXT: and a5, s2, s11 -; CHECK-NEXT: add s0, s0, s8 -; CHECK-NEXT: roriw t5, s11, 25 -; CHECK-NEXT: xor t6, s1, a1 -; CHECK-NEXT: roriw s3, a4, 2 +; CHECK-NEXT: roriw a0, a7, 11 +; CHECK-NEXT: andn a3, t4, a7 +; CHECK-NEXT: and a5, s8, a7 +; CHECK-NEXT: add a4, a4, t3 +; CHECK-NEXT: roriw a6, a7, 25 +; CHECK-NEXT: xor t5, a2, a0 +; CHECK-NEXT: roriw s2, a1, 2 ; CHECK-NEXT: or ra, a5, a3 -; CHECK-NEXT: roriw a5, a4, 13 -; CHECK-NEXT: add a2, a2, s0 -; CHECK-NEXT: ld t0, 64(sp) # 8-byte Folded Reload -; CHECK-NEXT: roriw s0, t0, 7 -; CHECK-NEXT: roriw a1, t0, 18 -; CHECK-NEXT: xor t6, t6, t5 -; CHECK-NEXT: xor a3, a4, a6 -; CHECK-NEXT: add a2, a2, ra -; CHECK-NEXT: xor t5, s3, a5 -; CHECK-NEXT: xor s3, s0, a1 -; CHECK-NEXT: roriw s0, a4, 22 -; CHECK-NEXT: and ra, a3, a0 -; CHECK-NEXT: srliw a5, t0, 3 -; CHECK-NEXT: and s1, a4, a6 -; CHECK-NEXT: roriw a1, t1, 17 -; CHECK-NEXT: add a2, a2, t6 +; CHECK-NEXT: roriw a5, a1, 13 +; CHECK-NEXT: add a4, a4, t0 +; CHECK-NEXT: ld a2, 32(sp) # 8-byte Folded Reload +; CHECK-NEXT: roriw s1, a2, 7 +; CHECK-NEXT: roriw a0, a2, 18 +; CHECK-NEXT: xor t0, t5, a6 +; CHECK-NEXT: xor a3, a1, t6 +; CHECK-NEXT: add a4, a4, ra +; CHECK-NEXT: xor a6, s2, a5 +; CHECK-NEXT: xor t5, s1, a0 +; CHECK-NEXT: roriw s1, a1, 22 +; CHECK-NEXT: and s2, a3, s11 +; CHECK-NEXT: srliw a5, a2, 3 +; CHECK-NEXT: mv ra, a2 +; CHECK-NEXT: and a2, a1, t6 +; CHECK-NEXT: roriw a0, t1, 17 +; CHECK-NEXT: add a4, a4, t0 ; CHECK-NEXT: roriw a3, t1, 19 -; CHECK-NEXT: xor a5, s3, a5 -; CHECK-NEXT: xor s0, t5, s0 -; CHECK-NEXT: xor s1, ra, s1 -; CHECK-NEXT: add a7, a7, a2 -; CHECK-NEXT: xor a1, a1, a3 +; CHECK-NEXT: xor a5, t5, a5 +; CHECK-NEXT: xor s1, a6, s1 +; CHECK-NEXT: xor a2, s2, a2 +; CHECK-NEXT: add s3, s3, a4 +; CHECK-NEXT: xor a0, a0, a3 +; CHECK-NEXT: sd t1, 56(sp) # 8-byte Folded Spill ; CHECK-NEXT: srliw a3, t1, 10 -; CHECK-NEXT: add a5, a5, s6 -; CHECK-NEXT: add s0, s0, s1 -; CHECK-NEXT: roriw s1, a7, 6 -; CHECK-NEXT: xor a1, a1, a3 -; CHECK-NEXT: sd t3, 72(sp) # 8-byte Folded Spill -; CHECK-NEXT: add a5, a5, t3 -; CHECK-NEXT: roriw a3, a7, 11 -; CHECK-NEXT: add ra, s0, a2 -; CHECK-NEXT: lw s0, -24(s10) -; CHECK-NEXT: add s6, a5, a1 -; CHECK-NEXT: xor t5, s1, a3 -; CHECK-NEXT: roriw t6, a7, 25 -; CHECK-NEXT: andn s3, s2, a7 -; CHECK-NEXT: and s1, s11, a7 -; CHECK-NEXT: add s0, s0, s6 -; CHECK-NEXT: roriw a3, s7, 7 -; CHECK-NEXT: roriw a1, s7, 18 -; CHECK-NEXT: xor t5, t5, t6 -; CHECK-NEXT: or s1, s1, s3 -; CHECK-NEXT: add t4, t4, s0 -; CHECK-NEXT: roriw s0, ra, 2 -; CHECK-NEXT: xor a1, a1, a3 -; CHECK-NEXT: roriw a3, ra, 13 -; CHECK-NEXT: xor a2, ra, a4 -; CHECK-NEXT: srliw a5, s7, 3 -; CHECK-NEXT: add t4, t4, s1 -; CHECK-NEXT: roriw s1, s8, 17 -; CHECK-NEXT: xor t6, s0, a3 -; CHECK-NEXT: roriw s0, s8, 19 -; CHECK-NEXT: xor a1, a1, a5 -; CHECK-NEXT: and a2, a2, a6 -; CHECK-NEXT: roriw a5, ra, 22 -; CHECK-NEXT: and a3, ra, a4 -; CHECK-NEXT: add t4, t4, t5 -; CHECK-NEXT: xor s0, s0, s1 -; CHECK-NEXT: add a1, a1, t0 -; CHECK-NEXT: mv t0, t2 -; CHECK-NEXT: mv t1, s4 -; CHECK-NEXT: mv s4, s5 -; CHECK-NEXT: srliw s1, s8, 10 -; CHECK-NEXT: xor a5, t6, a5 -; CHECK-NEXT: xor a2, a2, a3 -; CHECK-NEXT: add a0, a0, t4 -; CHECK-NEXT: xor s0, s0, s1 -; CHECK-NEXT: add a1, a1, s9 -; CHECK-NEXT: add a2, a2, a5 -; CHECK-NEXT: roriw a3, a0, 6 -; CHECK-NEXT: lw s1, -20(s10) -; CHECK-NEXT: add t3, a1, s0 -; CHECK-NEXT: add a5, a2, t4 -; CHECK-NEXT: roriw a1, a0, 11 -; CHECK-NEXT: andn a2, s11, a0 -; CHECK-NEXT: and s0, a7, a0 -; CHECK-NEXT: add s1, s1, t3 -; CHECK-NEXT: roriw t4, a0, 25 -; CHECK-NEXT: xor t5, a3, a1 -; CHECK-NEXT: roriw t6, a5, 2 -; CHECK-NEXT: or s3, s0, a2 -; CHECK-NEXT: roriw s0, a5, 13 -; CHECK-NEXT: add s2, s2, s1 -; CHECK-NEXT: ld a2, 8(sp) # 8-byte Folded Reload -; CHECK-NEXT: roriw a1, a2, 7 -; CHECK-NEXT: roriw a3, a2, 18 -; CHECK-NEXT: xor t5, t5, t4 -; CHECK-NEXT: xor s1, a5, ra -; CHECK-NEXT: add s2, s2, s3 -; CHECK-NEXT: xor t4, t6, s0 -; CHECK-NEXT: xor t6, a1, a3 -; CHECK-NEXT: roriw s3, a5, 22 -; CHECK-NEXT: and s1, s1, a4 -; CHECK-NEXT: srliw s0, a2, 3 -; CHECK-NEXT: mv s5, a2 -; CHECK-NEXT: and a2, a5, ra -; CHECK-NEXT: roriw a1, s6, 17 -; CHECK-NEXT: add t5, t5, s2 -; CHECK-NEXT: roriw a3, s6, 19 -; CHECK-NEXT: xor s0, t6, s0 -; CHECK-NEXT: xor t4, t4, s3 -; CHECK-NEXT: xor a2, a2, s1 -; CHECK-NEXT: add a6, a6, t5 -; CHECK-NEXT: xor a1, a1, a3 -; CHECK-NEXT: sd s6, 56(sp) # 8-byte Folded Spill -; CHECK-NEXT: srliw a3, s6, 10 -; CHECK-NEXT: add s0, s0, s7 -; CHECK-NEXT: mv s7, s9 -; CHECK-NEXT: add a2, a2, t4 -; CHECK-NEXT: roriw s1, a6, 6 -; CHECK-NEXT: xor a3, a3, a1 -; CHECK-NEXT: add s0, s0, s4 -; CHECK-NEXT: roriw a1, a6, 11 -; CHECK-NEXT: add t5, t5, a2 +; CHECK-NEXT: add a5, a5, s0 +; CHECK-NEXT: add a2, a2, s1 +; CHECK-NEXT: roriw s1, s3, 6 +; CHECK-NEXT: xor a0, a0, a3 +; CHECK-NEXT: add a5, a5, s7 +; CHECK-NEXT: roriw a3, s3, 11 +; CHECK-NEXT: add t1, a2, a4 ; CHECK-NEXT: lw a2, -16(s10) -; CHECK-NEXT: add t2, s0, a3 -; CHECK-NEXT: xor t4, s1, a1 -; CHECK-NEXT: roriw t6, a6, 25 -; CHECK-NEXT: andn s2, a7, a6 -; CHECK-NEXT: and s1, a0, a6 -; CHECK-NEXT: add a2, a2, t2 -; CHECK-NEXT: ld s0, 24(sp) # 8-byte Folded Reload -; CHECK-NEXT: roriw a1, s0, 7 -; CHECK-NEXT: roriw a3, s0, 18 -; CHECK-NEXT: xor t4, t4, t6 -; CHECK-NEXT: or s3, s1, s2 -; CHECK-NEXT: add a2, a2, s11 -; CHECK-NEXT: roriw s1, t5, 2 -; CHECK-NEXT: xor t6, a1, a3 -; CHECK-NEXT: roriw a3, t5, 13 -; CHECK-NEXT: xor s2, t5, a5 -; CHECK-NEXT: srliw a1, s0, 3 -; CHECK-NEXT: mv s9, s0 -; CHECK-NEXT: add s11, a2, s3 -; CHECK-NEXT: roriw s0, t3, 17 -; CHECK-NEXT: xor s3, s1, a3 +; CHECK-NEXT: add s7, a5, a0 +; CHECK-NEXT: xor a6, s1, a3 +; CHECK-NEXT: roriw a0, s3, 25 +; CHECK-NEXT: andn a4, s8, s3 +; CHECK-NEXT: and a5, a7, s3 +; CHECK-NEXT: add a2, a2, s7 +; CHECK-NEXT: ld t2, 8(sp) # 8-byte Folded Reload +; CHECK-NEXT: roriw s1, t2, 7 +; CHECK-NEXT: roriw a3, t2, 18 +; CHECK-NEXT: xor a6, a6, a0 +; CHECK-NEXT: or a4, a4, a5 +; CHECK-NEXT: add a2, a2, t4 +; CHECK-NEXT: roriw a5, t1, 2 +; CHECK-NEXT: xor t0, s1, a3 +; CHECK-NEXT: roriw s1, t1, 13 +; CHECK-NEXT: xor a0, t1, a1 +; CHECK-NEXT: srliw a3, t2, 3 +; CHECK-NEXT: add a2, a2, a4 +; CHECK-NEXT: roriw a4, t3, 17 +; CHECK-NEXT: xor t4, a5, s1 ; CHECK-NEXT: roriw s1, t3, 19 -; CHECK-NEXT: xor a1, t6, a1 -; CHECK-NEXT: and t6, s2, ra -; CHECK-NEXT: roriw a2, t5, 22 -; CHECK-NEXT: and a3, t5, a5 -; CHECK-NEXT: add t4, t4, s11 -; CHECK-NEXT: xor s1, s1, s0 -; CHECK-NEXT: add a1, a1, s5 +; CHECK-NEXT: xor a3, t0, a3 +; CHECK-NEXT: and t0, a0, t6 +; CHECK-NEXT: roriw a5, t1, 22 +; CHECK-NEXT: and a0, t1, a1 +; CHECK-NEXT: add a2, a2, a6 +; CHECK-NEXT: xor s1, s1, a4 +; CHECK-NEXT: add a3, a3, ra ; CHECK-NEXT: sd t3, 64(sp) # 8-byte Folded Spill -; CHECK-NEXT: srliw s0, t3, 10 -; CHECK-NEXT: xor a2, s3, a2 -; CHECK-NEXT: xor a3, t6, a3 -; CHECK-NEXT: add s3, t4, a4 -; CHECK-NEXT: xor s0, s0, s1 -; CHECK-NEXT: sd t1, 40(sp) # 8-byte Folded Spill -; CHECK-NEXT: add a1, a1, t1 -; CHECK-NEXT: add a2, a2, a3 -; CHECK-NEXT: roriw a3, s3, 6 +; CHECK-NEXT: srliw a4, t3, 10 +; CHECK-NEXT: xor a5, t4, a5 +; CHECK-NEXT: xor a0, t0, a0 +; CHECK-NEXT: add s11, s11, a2 +; CHECK-NEXT: xor a4, a4, s1 +; CHECK-NEXT: add a3, a3, s5 +; CHECK-NEXT: add a0, a0, a5 +; CHECK-NEXT: roriw a5, s11, 6 ; CHECK-NEXT: lw s1, -12(s10) -; CHECK-NEXT: add s5, a1, s0 -; CHECK-NEXT: add a4, a2, t4 -; CHECK-NEXT: roriw a1, s3, 11 -; CHECK-NEXT: andn a2, a0, s3 -; CHECK-NEXT: and s0, a6, s3 -; CHECK-NEXT: add s1, s1, s5 -; CHECK-NEXT: roriw t4, s3, 25 -; CHECK-NEXT: xor t6, a3, a1 -; CHECK-NEXT: roriw s2, a4, 2 -; CHECK-NEXT: or s11, s0, a2 -; CHECK-NEXT: roriw s0, a4, 13 -; CHECK-NEXT: add a7, a7, s1 -; CHECK-NEXT: ld s6, 96(sp) # 8-byte Folded Reload -; CHECK-NEXT: roriw s1, s6, 7 -; CHECK-NEXT: roriw a1, s6, 18 -; CHECK-NEXT: xor t6, t6, t4 -; CHECK-NEXT: xor a2, a4, t5 -; CHECK-NEXT: add a7, a7, s11 -; CHECK-NEXT: xor t4, s2, s0 -; CHECK-NEXT: xor s2, s1, a1 -; CHECK-NEXT: roriw s1, a4, 22 -; CHECK-NEXT: and s11, a2, a5 -; CHECK-NEXT: srliw s0, s6, 3 -; CHECK-NEXT: and a3, a4, t5 -; CHECK-NEXT: roriw a1, t2, 17 -; CHECK-NEXT: add a7, a7, t6 -; CHECK-NEXT: roriw a2, t2, 19 -; CHECK-NEXT: xor t6, s2, s0 -; CHECK-NEXT: xor s1, t4, s1 -; CHECK-NEXT: xor s0, s11, a3 -; CHECK-NEXT: add a3, a7, ra -; CHECK-NEXT: xor a1, a1, a2 -; CHECK-NEXT: mv t1, t2 -; CHECK-NEXT: srliw a2, t2, 10 -; CHECK-NEXT: add t3, s9, t6 -; CHECK-NEXT: add s0, s0, s1 -; CHECK-NEXT: roriw t4, a3, 6 -; CHECK-NEXT: xor a1, a1, a2 -; CHECK-NEXT: sd t0, 48(sp) # 8-byte Folded Spill -; CHECK-NEXT: add t3, t3, t0 -; CHECK-NEXT: roriw s1, a3, 11 -; CHECK-NEXT: add a2, s0, a7 -; CHECK-NEXT: lw s0, -8(s10) -; CHECK-NEXT: add t3, t3, a1 -; CHECK-NEXT: xor a7, t4, s1 -; CHECK-NEXT: roriw t4, a3, 25 -; CHECK-NEXT: andn t6, a6, a3 -; CHECK-NEXT: and s11, s3, a3 -; CHECK-NEXT: add s2, t3, s0 -; CHECK-NEXT: ld t0, 80(sp) # 8-byte Folded Reload -; CHECK-NEXT: roriw a1, t0, 7 -; CHECK-NEXT: roriw s0, t0, 18 -; CHECK-NEXT: xor a7, a7, t4 -; CHECK-NEXT: or s11, s11, t6 -; CHECK-NEXT: add a0, a0, s2 -; CHECK-NEXT: roriw t4, a2, 2 -; CHECK-NEXT: xor t6, a1, s0 -; CHECK-NEXT: roriw s0, a2, 13 -; CHECK-NEXT: xor s2, a2, a4 -; CHECK-NEXT: srliw a1, t0, 3 -; CHECK-NEXT: add s11, s11, a0 -; CHECK-NEXT: roriw ra, s5, 17 -; CHECK-NEXT: xor t4, t4, s0 -; CHECK-NEXT: roriw s0, s5, 19 -; CHECK-NEXT: xor a1, t6, a1 -; CHECK-NEXT: and t6, s2, t5 -; CHECK-NEXT: roriw s2, a2, 22 -; CHECK-NEXT: and s1, a2, a4 -; CHECK-NEXT: add a7, a7, s11 -; CHECK-NEXT: xor s0, ra, s0 -; CHECK-NEXT: add s11, a1, s6 -; CHECK-NEXT: srliw a0, s5, 10 -; CHECK-NEXT: xor a1, t4, s2 -; CHECK-NEXT: xor s1, t6, s1 -; CHECK-NEXT: add t4, a7, a5 -; CHECK-NEXT: xor a0, a0, s0 -; CHECK-NEXT: ld a5, 88(sp) # 8-byte Folded Reload -; CHECK-NEXT: add s11, s11, a5 -; CHECK-NEXT: add a1, a1, s1 -; CHECK-NEXT: roriw s1, t4, 6 -; CHECK-NEXT: lw s0, -4(s10) -; CHECK-NEXT: add a0, a0, s11 -; CHECK-NEXT: add s2, a1, a7 -; CHECK-NEXT: roriw a1, t4, 11 -; CHECK-NEXT: andn a7, s3, t4 -; CHECK-NEXT: and a5, a3, t4 -; CHECK-NEXT: sd a0, 96(sp) # 8-byte Folded Spill -; CHECK-NEXT: add s0, s0, a0 -; CHECK-NEXT: roriw t6, t4, 25 -; CHECK-NEXT: xor s9, s1, a1 -; CHECK-NEXT: roriw s11, s2, 2 -; CHECK-NEXT: or a5, a5, a7 -; CHECK-NEXT: roriw a7, s2, 13 -; CHECK-NEXT: add a6, a6, s0 -; CHECK-NEXT: ld a0, 16(sp) # 8-byte Folded Reload -; CHECK-NEXT: roriw s0, a0, 7 -; CHECK-NEXT: roriw s1, a0, 18 -; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: xor t6, s9, t6 -; CHECK-NEXT: add a6, a6, a5 -; CHECK-NEXT: xor a0, s2, a2 -; CHECK-NEXT: xor a7, s11, a7 -; CHECK-NEXT: xor s0, s0, s1 -; CHECK-NEXT: roriw s11, s2, 22 -; CHECK-NEXT: srliw a5, a1, 3 -; CHECK-NEXT: and s9, a0, a4 -; CHECK-NEXT: roriw a1, t3, 17 -; CHECK-NEXT: and s1, s2, a2 -; CHECK-NEXT: roriw a0, t3, 19 -; CHECK-NEXT: xor a5, a5, s0 -; CHECK-NEXT: add a6, a6, t6 -; CHECK-NEXT: xor s0, a7, s11 -; CHECK-NEXT: xor s1, s9, s1 -; CHECK-NEXT: xor a0, a0, a1 -; CHECK-NEXT: srliw a1, t3, 10 -; CHECK-NEXT: add a5, a5, t0 -; CHECK-NEXT: add t5, t5, a6 -; CHECK-NEXT: add t6, s0, s1 -; CHECK-NEXT: xor a0, a0, a1 -; CHECK-NEXT: add a5, a5, s8 -; CHECK-NEXT: add t6, t6, a6 -; CHECK-NEXT: roriw a1, t5, 6 -; CHECK-NEXT: lw s1, 0(s10) -; CHECK-NEXT: add a7, a5, a0 -; CHECK-NEXT: roriw a0, t5, 11 -; CHECK-NEXT: andn a3, a3, t5 -; CHECK-NEXT: and a5, t4, t5 -; CHECK-NEXT: add s1, s1, a7 -; CHECK-NEXT: roriw s0, t6, 2 -; CHECK-NEXT: xor a6, a1, a0 -; CHECK-NEXT: roriw a1, t6, 13 -; CHECK-NEXT: or a3, a3, a5 -; CHECK-NEXT: xor a5, t6, s2 +; CHECK-NEXT: add s2, a3, a4 +; CHECK-NEXT: add ra, a0, a2 +; CHECK-NEXT: roriw a0, s11, 11 +; CHECK-NEXT: andn a3, a7, s11 +; CHECK-NEXT: and a4, s3, s11 +; CHECK-NEXT: add s1, s1, s2 +; CHECK-NEXT: roriw a6, s11, 25 +; CHECK-NEXT: xor t0, a5, a0 +; CHECK-NEXT: roriw t4, ra, 2 +; CHECK-NEXT: or t5, a4, a3 +; CHECK-NEXT: roriw a4, ra, 13 +; CHECK-NEXT: add s0, s1, s8 +; CHECK-NEXT: ld s8, 112(sp) # 8-byte Folded Reload +; CHECK-NEXT: roriw s1, s8, 7 +; CHECK-NEXT: roriw a0, s8, 18 +; CHECK-NEXT: xor a5, t0, a6 +; CHECK-NEXT: xor a3, ra, t1 +; CHECK-NEXT: add t5, t5, s0 +; CHECK-NEXT: xor a6, t4, a4 +; CHECK-NEXT: xor a0, a0, s1 +; CHECK-NEXT: roriw s1, ra, 22 +; CHECK-NEXT: and t0, a3, a1 +; CHECK-NEXT: srliw s0, s8, 3 +; CHECK-NEXT: and a2, ra, t1 +; CHECK-NEXT: roriw a4, s7, 17 +; CHECK-NEXT: add a5, a5, t5 +; CHECK-NEXT: roriw a3, s7, 19 +; CHECK-NEXT: xor s0, s0, a0 +; CHECK-NEXT: xor s1, a6, s1 +; CHECK-NEXT: xor a2, t0, a2 +; CHECK-NEXT: add a0, a5, t6 +; CHECK-NEXT: xor a3, a3, a4 +; CHECK-NEXT: srliw a4, s7, 10 +; CHECK-NEXT: add t3, t2, s0 +; CHECK-NEXT: add a2, a2, s1 +; CHECK-NEXT: roriw s1, a0, 6 +; CHECK-NEXT: xor a3, a3, a4 +; CHECK-NEXT: sd s6, 72(sp) # 8-byte Folded Spill +; CHECK-NEXT: add t3, t3, s6 +; CHECK-NEXT: mv s6, s2 +; CHECK-NEXT: roriw a4, a0, 11 +; CHECK-NEXT: add s0, a2, a5 +; CHECK-NEXT: lw a2, -8(s10) +; CHECK-NEXT: add t3, t3, a3 +; CHECK-NEXT: xor a6, s1, a4 +; CHECK-NEXT: roriw t0, a0, 25 +; CHECK-NEXT: andn t4, s3, a0 +; CHECK-NEXT: and s1, s11, a0 +; CHECK-NEXT: add a2, a2, t3 +; CHECK-NEXT: ld a5, 104(sp) # 8-byte Folded Reload +; CHECK-NEXT: roriw a4, a5, 7 +; CHECK-NEXT: roriw a3, a5, 18 +; CHECK-NEXT: xor a6, a6, t0 +; CHECK-NEXT: or s1, s1, t4 +; CHECK-NEXT: add a2, a2, a7 +; CHECK-NEXT: roriw t4, s0, 2 +; CHECK-NEXT: xor a7, a4, a3 +; CHECK-NEXT: roriw a4, s0, 13 +; CHECK-NEXT: xor t0, s0, ra +; CHECK-NEXT: srliw a3, a5, 3 +; CHECK-NEXT: mv t6, a5 +; CHECK-NEXT: add t5, a2, s1 +; CHECK-NEXT: roriw s1, s2, 17 +; CHECK-NEXT: xor t4, t4, a4 +; CHECK-NEXT: roriw a5, s2, 19 +; CHECK-NEXT: xor a3, a7, a3 +; CHECK-NEXT: and a7, t0, t1 +; CHECK-NEXT: roriw a2, s0, 22 +; CHECK-NEXT: and a4, s0, ra +; CHECK-NEXT: add a6, a6, t5 +; CHECK-NEXT: xor s1, s1, a5 +; CHECK-NEXT: add a3, a3, s8 +; CHECK-NEXT: mv s8, s4 +; CHECK-NEXT: srliw a5, s2, 10 +; CHECK-NEXT: xor a2, t4, a2 +; CHECK-NEXT: xor a4, a7, a4 +; CHECK-NEXT: add a7, a6, a1 +; CHECK-NEXT: xor a5, a5, s1 +; CHECK-NEXT: sd s9, 80(sp) # 8-byte Folded Spill +; CHECK-NEXT: add a3, a3, s9 +; CHECK-NEXT: ld t2, 88(sp) # 8-byte Folded Reload +; CHECK-NEXT: add a2, a2, a4 +; CHECK-NEXT: roriw a4, a7, 6 +; CHECK-NEXT: lw s1, -4(s10) +; CHECK-NEXT: add a3, a3, a5 +; CHECK-NEXT: add s2, a2, a6 +; CHECK-NEXT: roriw a2, a7, 11 +; CHECK-NEXT: andn a5, s11, a7 +; CHECK-NEXT: and a1, a0, a7 +; CHECK-NEXT: sd a3, 112(sp) # 8-byte Folded Spill +; CHECK-NEXT: add s1, s1, a3 +; CHECK-NEXT: roriw a6, a7, 25 +; CHECK-NEXT: xor t0, a4, a2 +; CHECK-NEXT: roriw t4, s2, 2 +; CHECK-NEXT: or a1, a1, a5 +; CHECK-NEXT: roriw t5, s2, 13 ; CHECK-NEXT: add s1, s1, s3 -; CHECK-NEXT: roriw a0, t5, 25 -; CHECK-NEXT: xor a1, a1, s0 -; CHECK-NEXT: roriw s0, t6, 22 -; CHECK-NEXT: and a2, a2, a5 -; CHECK-NEXT: and a5, t6, s2 -; CHECK-NEXT: xor a0, a6, a0 -; CHECK-NEXT: add a3, a3, s1 -; CHECK-NEXT: xor a1, a1, s0 +; CHECK-NEXT: ld a3, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: roriw a2, a3, 7 +; CHECK-NEXT: roriw a4, a3, 18 +; CHECK-NEXT: mv a5, a3 +; CHECK-NEXT: xor a6, t0, a6 +; CHECK-NEXT: add t0, s1, a1 +; CHECK-NEXT: xor a3, s2, s0 +; CHECK-NEXT: xor t4, t4, t5 +; CHECK-NEXT: xor a2, a2, a4 +; CHECK-NEXT: roriw t5, s2, 22 +; CHECK-NEXT: srliw a5, a5, 3 +; CHECK-NEXT: and a3, a3, ra +; CHECK-NEXT: roriw a1, t3, 17 +; CHECK-NEXT: and s1, s2, s0 +; CHECK-NEXT: roriw a4, t3, 19 ; CHECK-NEXT: xor a2, a2, a5 -; CHECK-NEXT: add a0, a0, a3 -; CHECK-NEXT: ld a3, 32(sp) # 8-byte Folded Reload -; CHECK-NEXT: add a1, a1, a2 -; CHECK-NEXT: add s3, a0, a4 -; CHECK-NEXT: addiw a3, a3, 16 -; CHECK-NEXT: add ra, a1, a0 +; CHECK-NEXT: add a6, a6, t0 +; CHECK-NEXT: xor a5, t4, t5 +; CHECK-NEXT: xor a3, a3, s1 +; CHECK-NEXT: xor a1, a1, a4 +; CHECK-NEXT: srliw a4, t3, 10 +; CHECK-NEXT: add a2, a2, t6 +; CHECK-NEXT: add t0, a6, t1 +; CHECK-NEXT: add a3, a3, a5 +; CHECK-NEXT: xor a1, a1, a4 +; CHECK-NEXT: add a2, a2, s4 +; CHECK-NEXT: add t6, a3, a6 +; CHECK-NEXT: roriw a3, t0, 6 +; CHECK-NEXT: lw a4, 0(s10) +; CHECK-NEXT: add a6, a2, a1 +; CHECK-NEXT: roriw a1, t0, 11 +; CHECK-NEXT: andn a0, a0, t0 +; CHECK-NEXT: and a2, a7, t0 +; CHECK-NEXT: add a4, a4, a6 +; CHECK-NEXT: roriw a5, t6, 2 +; CHECK-NEXT: xor a1, a1, a3 +; CHECK-NEXT: roriw a3, t6, 13 +; CHECK-NEXT: or a0, a0, a2 +; CHECK-NEXT: xor a2, t6, s2 +; CHECK-NEXT: add a4, a4, s11 +; CHECK-NEXT: roriw s1, t0, 25 +; CHECK-NEXT: xor a3, a3, a5 +; CHECK-NEXT: roriw a5, t6, 22 +; CHECK-NEXT: and a2, a2, s0 +; CHECK-NEXT: and s0, t6, s2 +; CHECK-NEXT: xor a1, a1, s1 +; CHECK-NEXT: add a0, a0, a4 +; CHECK-NEXT: xor a3, a3, a5 +; CHECK-NEXT: xor a2, a2, s0 +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: ld a1, 40(sp) # 8-byte Folded Reload +; CHECK-NEXT: add a2, a2, a3 +; CHECK-NEXT: ld a3, 96(sp) # 8-byte Folded Reload +; CHECK-NEXT: add t5, a0, ra +; CHECK-NEXT: addiw a1, a1, 16 +; CHECK-NEXT: add ra, a2, a0 ; CHECK-NEXT: addi s10, s10, 64 ; CHECK-NEXT: li a0, 48 -; CHECK-NEXT: bltu a3, a0, .LBB77_1 +; CHECK-NEXT: bltu a1, a0, .LBB77_1 ; CHECK-NEXT: # %bb.2: # %end -; CHECK-NEXT: ld ra, 264(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s0, 256(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s1, 248(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s2, 240(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s3, 232(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s4, 224(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s5, 216(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s6, 208(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s7, 200(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s8, 192(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s9, 184(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s10, 176(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s11, 168(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 272 +; CHECK-NEXT: ld ra, 280(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 272(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s1, 264(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s2, 256(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s3, 248(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s4, 240(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s5, 232(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s6, 224(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s7, 216(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s8, 208(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s9, 200(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s10, 192(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s11, 184(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 288 ; CHECK-NEXT: ret entry: %data = alloca [16 x i32], align 4