diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -218,6 +218,10 @@ finalizeInsInstrs(MachineInstr &Root, MachineCombinerPattern &P, SmallVectorImpl &InsInstrs) const override; + bool shouldReduceRegisterPressure( + const MachineBasicBlock *MBB, + const RegisterClassInfo *RegClassInfo) const override; + void genAlternativeCodeSequence( MachineInstr &Root, MachineCombinerPattern Pattern, SmallVectorImpl &InsInstrs, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -26,6 +26,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineTraceMetrics.h" +#include "llvm/CodeGen/RegisterPressure.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/MC/MCInstBuilder.h" @@ -34,6 +35,8 @@ using namespace llvm; +#define DEBUG_TYPE "riscv-instr-info" + #define GEN_CHECK_COMPRESS_INSTR #include "RISCVGenCompressInstEmitter.inc" @@ -1368,6 +1371,46 @@ } } +bool RISCVInstrInfo::shouldReduceRegisterPressure( + const MachineBasicBlock *MBB, const RegisterClassInfo *RegClassInfo) const { + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); + const MachineFunction *MF = MBB->getParent(); + const MachineRegisterInfo *MRI = &MF->getRegInfo(); + + auto GetMBBPressure = + [&](const MachineBasicBlock *MBB) -> std::vector { + RegionPressure Pressure; + RegPressureTracker RPTracker(Pressure); + + // Initialize the register pressure tracker. + RPTracker.init(MBB->getParent(), RegClassInfo, nullptr, MBB, MBB->end(), + /*TrackLaneMasks*/ false, /*TrackUntiedDefs=*/true); + + for (const auto &MI : reverse(*MBB)) { + if (MI.isDebugOrPseudoInstr()) + continue; + RegisterOperands RegOpers; + RegOpers.collect(MI, *TRI, *MRI, false, false); + RPTracker.recedeSkipDebugValues(); + assert(&*RPTracker.getPos() == &MI && "RPTracker sync error!"); + RPTracker.recede(RegOpers); + } + + // Close the RPTracker to finalize live ins. + RPTracker.closeRegion(); + + return RPTracker.getPressure().MaxSetPressure; + }; + + unsigned GPRLimit = TRI->getRegPressureSetLimit( + *MBB->getParent(), RISCV::RegisterPressureSets::GPR); + + LLVM_DEBUG(dbgs() << "Register Pressure: " + << GetMBBPressure(MBB)[RISCV::RegisterPressureSets::GPR] + << "::" << GPRLimit << "\n"); + return GetMBBPressure(MBB)[RISCV::RegisterPressureSets::GPR] > GPRLimit; +} + static bool isFADD(unsigned Opc) { switch (Opc) { default: @@ -1527,42 +1570,28 @@ return RISCV::hasEqualFRM(Root, *MI); } -static bool -getFPFusedMultiplyPatterns(MachineInstr &Root, - SmallVectorImpl &Patterns, - bool DoRegPressureReduce) { - unsigned Opc = Root.getOpcode(); - bool IsFAdd = isFADD(Opc); - if (!IsFAdd && !isFSUB(Opc)) - return false; - bool Added = false; - if (canCombineFPFusedMultiply(Root, Root.getOperand(1), - DoRegPressureReduce)) { - Patterns.push_back(IsFAdd ? MachineCombinerPattern::FMADD_AX - : MachineCombinerPattern::FMSUB); - Added = true; - } - if (canCombineFPFusedMultiply(Root, Root.getOperand(2), - DoRegPressureReduce)) { - Patterns.push_back(IsFAdd ? MachineCombinerPattern::FMADD_XA - : MachineCombinerPattern::FNMSUB); - Added = true; - } - return Added; -} - -static bool getFPPatterns(MachineInstr &Root, - SmallVectorImpl &Patterns, - bool DoRegPressureReduce) { - return getFPFusedMultiplyPatterns(Root, Patterns, DoRegPressureReduce); -} - bool RISCVInstrInfo::getMachineCombinerPatterns( MachineInstr &Root, SmallVectorImpl &Patterns, bool DoRegPressureReduce) const { + unsigned Opc = Root.getOpcode(); + bool IsFAdd = isFADD(Opc); - if (getFPPatterns(Root, Patterns, DoRegPressureReduce)) - return true; + if (IsFAdd || isFSUB(Opc)) { + if (canCombineFPFusedMultiply(Root, Root.getOperand(1), + DoRegPressureReduce)) { + Patterns.push_back(IsFAdd ? MachineCombinerPattern::FMADD_AX + : MachineCombinerPattern::FMSUB); + return true; + } else if (canCombineFPFusedMultiply(Root, Root.getOperand(2), + DoRegPressureReduce)) { + Patterns.push_back(IsFAdd ? MachineCombinerPattern::FMADD_XA + : MachineCombinerPattern::FNMSUB); + return true; + } + } + + if (DoRegPressureReduce) + return false; return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns, DoRegPressureReduce); diff --git a/llvm/test/CodeGen/RISCV/machine-combiner.ll b/llvm/test/CodeGen/RISCV/machine-combiner.ll --- a/llvm/test/CodeGen/RISCV/machine-combiner.ll +++ b/llvm/test/CodeGen/RISCV/machine-combiner.ll @@ -1070,6 +1070,1291 @@ ret double %t2 } +; Since MachineCombiner patterns may lead to an increase in register pressure +; resulting in spills, test that MachineCombiner doesn't kick in if there are +; more live registers in the MBB than available on the target machine. +define void @test_reg_pressure(ptr %state, ptr %input, ptr %k) { +; CHECK-LABEL: test_reg_pressure: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -288 +; CHECK-NEXT: .cfi_def_cfa_offset 288 +; CHECK-NEXT: sd ra, 280(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 272(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s1, 264(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s2, 256(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s3, 248(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s4, 240(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s5, 232(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s6, 224(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s7, 216(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s8, 208(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s9, 200(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s10, 192(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s11, 184(sp) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset ra, -8 +; CHECK-NEXT: .cfi_offset s0, -16 +; CHECK-NEXT: .cfi_offset s1, -24 +; CHECK-NEXT: .cfi_offset s2, -32 +; CHECK-NEXT: .cfi_offset s3, -40 +; CHECK-NEXT: .cfi_offset s4, -48 +; CHECK-NEXT: .cfi_offset s5, -56 +; CHECK-NEXT: .cfi_offset s6, -64 +; CHECK-NEXT: .cfi_offset s7, -72 +; CHECK-NEXT: .cfi_offset s8, -80 +; CHECK-NEXT: .cfi_offset s9, -88 +; CHECK-NEXT: .cfi_offset s10, -96 +; CHECK-NEXT: .cfi_offset s11, -104 +; CHECK-NEXT: lw a7, 148(sp) +; CHECK-NEXT: lw s8, 180(sp) +; CHECK-NEXT: li a1, 0 +; CHECK-NEXT: add t0, a7, a7 +; CHECK-NEXT: addi s10, a2, 124 +; CHECK-NEXT: sd s8, 80(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s8, 72(sp) # 8-byte Folded Spill +; CHECK-NEXT: mv s5, s8 +; CHECK-NEXT: mv t3, s8 +; CHECK-NEXT: sd s8, 48(sp) # 8-byte Folded Spill +; CHECK-NEXT: mv s6, s8 +; CHECK-NEXT: mv t2, s8 +; CHECK-NEXT: mv s7, s8 +; CHECK-NEXT: mv a3, s8 +; CHECK-NEXT: sd s8, 64(sp) # 8-byte Folded Spill +; CHECK-NEXT: mv a6, s8 +; CHECK-NEXT: sd s8, 56(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s8, 112(sp) # 8-byte Folded Spill +; CHECK-NEXT: mv t5, t0 +; CHECK-NEXT: mv s2, t0 +; CHECK-NEXT: mv t6, t0 +; CHECK-NEXT: mv ra, t0 +; CHECK-NEXT: .LBB76_1: # %do.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: sd t3, 8(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd a6, 104(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd a1, 40(sp) # 8-byte Folded Spill +; CHECK-NEXT: roriw s11, t5, 11 +; CHECK-NEXT: roriw s9, t5, 25 +; CHECK-NEXT: roriw a0, t5, 6 +; CHECK-NEXT: add s9, s9, s11 +; CHECK-NEXT: add s11, s11, a0 +; CHECK-NEXT: add a5, s9, a7 +; CHECK-NEXT: add a5, a5, a0 +; CHECK-NEXT: roriw s1, s11, 2 +; CHECK-NEXT: roriw a4, s11, 13 +; CHECK-NEXT: roriw a6, s9, 17 +; CHECK-NEXT: add a0, a0, a5 +; CHECK-NEXT: roriw a5, s9, 19 +; CHECK-NEXT: xor a4, a4, s1 +; CHECK-NEXT: xor s1, s11, ra +; CHECK-NEXT: roriw a7, s11, 22 +; CHECK-NEXT: and t4, s11, ra +; CHECK-NEXT: xor a5, a6, a5 +; CHECK-NEXT: srliw s0, s9, 10 +; CHECK-NEXT: and s1, s1, t6 +; CHECK-NEXT: roriw a1, t2, 7 +; CHECK-NEXT: add a0, a0, s11 +; CHECK-NEXT: roriw a2, t2, 18 +; CHECK-NEXT: xor a5, a5, s0 +; CHECK-NEXT: xor a4, a4, a7 +; CHECK-NEXT: xor s0, s1, t4 +; CHECK-NEXT: add t1, a0, s2 +; CHECK-NEXT: xor a1, a1, a2 +; CHECK-NEXT: srliw a2, t2, 3 +; CHECK-NEXT: add a5, a5, a3 +; CHECK-NEXT: add a4, a4, s0 +; CHECK-NEXT: roriw a3, t1, 6 +; CHECK-NEXT: lw s0, -52(s10) +; CHECK-NEXT: xor a1, a1, a2 +; CHECK-NEXT: sd s7, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: add a2, a5, s7 +; CHECK-NEXT: add a6, a4, a0 +; CHECK-NEXT: roriw a0, t1, 11 +; CHECK-NEXT: andn a4, t5, t1 +; CHECK-NEXT: and a5, s9, t1 +; CHECK-NEXT: add s1, a2, a1 +; CHECK-NEXT: add s0, s0, t0 +; CHECK-NEXT: xor a0, a0, a3 +; CHECK-NEXT: roriw a1, t1, 25 +; CHECK-NEXT: or a4, a4, a5 +; CHECK-NEXT: roriw a2, a6, 2 +; CHECK-NEXT: add s0, s0, s1 +; CHECK-NEXT: roriw a3, a6, 13 +; CHECK-NEXT: xor a7, a0, a1 +; CHECK-NEXT: xor a1, a6, s11 +; CHECK-NEXT: add a4, a4, s0 +; CHECK-NEXT: xor a2, a2, a3 +; CHECK-NEXT: roriw a3, a6, 22 +; CHECK-NEXT: and a5, a6, s11 +; CHECK-NEXT: and a1, a1, ra +; CHECK-NEXT: roriw s0, s11, 17 +; CHECK-NEXT: roriw a0, s11, 19 +; CHECK-NEXT: add a4, a4, a7 +; CHECK-NEXT: xor a3, a3, a2 +; CHECK-NEXT: xor a1, a1, a5 +; CHECK-NEXT: xor a0, a0, s0 +; CHECK-NEXT: ld s4, 48(sp) # 8-byte Folded Reload +; CHECK-NEXT: roriw a5, s4, 7 +; CHECK-NEXT: srliw s0, s11, 10 +; CHECK-NEXT: roriw a2, s4, 18 +; CHECK-NEXT: add t6, t6, a4 +; CHECK-NEXT: add a1, a1, a3 +; CHECK-NEXT: xor a0, a0, s0 +; CHECK-NEXT: xor a2, a2, a5 +; CHECK-NEXT: srliw a3, s4, 3 +; CHECK-NEXT: sd s6, 32(sp) # 8-byte Folded Spill +; CHECK-NEXT: add s6, s6, t2 +; CHECK-NEXT: add a1, a1, a4 +; CHECK-NEXT: roriw a4, t6, 6 +; CHECK-NEXT: xor a2, a2, a3 +; CHECK-NEXT: add a0, a0, s6 +; CHECK-NEXT: roriw a3, t6, 11 +; CHECK-NEXT: lw a5, -48(s10) +; CHECK-NEXT: andn a7, s9, t6 +; CHECK-NEXT: and s0, t1, t6 +; CHECK-NEXT: add t0, a0, a2 +; CHECK-NEXT: roriw a0, a1, 2 +; CHECK-NEXT: add a5, a5, t5 +; CHECK-NEXT: roriw a2, a1, 13 +; CHECK-NEXT: xor t4, a4, a3 +; CHECK-NEXT: xor a4, a1, a6 +; CHECK-NEXT: or a7, s0, a7 +; CHECK-NEXT: roriw t5, t6, 25 +; CHECK-NEXT: add a5, a5, t0 +; CHECK-NEXT: xor s2, a0, a2 +; CHECK-NEXT: roriw a2, a1, 22 +; CHECK-NEXT: and a4, a4, s11 +; CHECK-NEXT: and s0, a1, a6 +; CHECK-NEXT: sd s1, 96(sp) # 8-byte Folded Spill +; CHECK-NEXT: roriw a3, s1, 17 +; CHECK-NEXT: roriw a0, s1, 19 +; CHECK-NEXT: xor t4, t4, t5 +; CHECK-NEXT: add a5, a5, a7 +; CHECK-NEXT: xor a7, s2, a2 +; CHECK-NEXT: xor a4, a4, s0 +; CHECK-NEXT: xor a0, a0, a3 +; CHECK-NEXT: roriw a3, s5, 7 +; CHECK-NEXT: srliw s0, s1, 10 +; CHECK-NEXT: roriw a2, s5, 18 +; CHECK-NEXT: add a5, a5, t4 +; CHECK-NEXT: add a4, a4, a7 +; CHECK-NEXT: xor s0, s0, a0 +; CHECK-NEXT: xor a2, a2, a3 +; CHECK-NEXT: srliw a3, s5, 3 +; CHECK-NEXT: add s4, s4, t3 +; CHECK-NEXT: add a7, a5, ra +; CHECK-NEXT: add t4, a4, a5 +; CHECK-NEXT: xor a2, a2, a3 +; CHECK-NEXT: add s0, s0, s4 +; CHECK-NEXT: roriw a3, a7, 6 +; CHECK-NEXT: lw a5, -44(s10) +; CHECK-NEXT: roriw a0, a7, 11 +; CHECK-NEXT: andn t5, t1, a7 +; CHECK-NEXT: and a4, t6, a7 +; CHECK-NEXT: add s7, s0, a2 +; CHECK-NEXT: sd s7, 48(sp) # 8-byte Folded Spill +; CHECK-NEXT: roriw s2, a7, 25 +; CHECK-NEXT: add a5, a5, s9 +; CHECK-NEXT: sd s9, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: xor ra, a3, a0 +; CHECK-NEXT: roriw a3, t4, 2 +; CHECK-NEXT: or t5, a4, t5 +; CHECK-NEXT: roriw s0, t4, 13 +; CHECK-NEXT: add a5, a5, s7 +; CHECK-NEXT: sd t0, 88(sp) # 8-byte Folded Spill +; CHECK-NEXT: roriw a2, t0, 17 +; CHECK-NEXT: roriw a0, t0, 19 +; CHECK-NEXT: xor a4, t4, a1 +; CHECK-NEXT: xor s2, ra, s2 +; CHECK-NEXT: add a5, a5, t5 +; CHECK-NEXT: xor ra, a3, s0 +; CHECK-NEXT: xor t5, a2, a0 +; CHECK-NEXT: roriw a2, t4, 22 +; CHECK-NEXT: and a4, a4, a6 +; CHECK-NEXT: and s0, t4, a1 +; CHECK-NEXT: ld s1, 72(sp) # 8-byte Folded Reload +; CHECK-NEXT: roriw a0, s1, 7 +; CHECK-NEXT: srliw a3, t0, 10 +; CHECK-NEXT: roriw s3, s1, 18 +; CHECK-NEXT: add a5, a5, s2 +; CHECK-NEXT: xor a2, ra, a2 +; CHECK-NEXT: xor a4, a4, s0 +; CHECK-NEXT: xor t0, t5, a3 +; CHECK-NEXT: xor a0, a0, s3 +; CHECK-NEXT: srliw s0, s1, 3 +; CHECK-NEXT: ld t2, 112(sp) # 8-byte Folded Reload +; CHECK-NEXT: add t2, t2, s5 +; CHECK-NEXT: add a4, a4, a2 +; CHECK-NEXT: add a2, a5, s11 +; CHECK-NEXT: xor a0, a0, s0 +; CHECK-NEXT: add t2, t2, t0 +; CHECK-NEXT: add t5, a4, a5 +; CHECK-NEXT: roriw a4, a2, 6 +; CHECK-NEXT: lw a5, -40(s10) +; CHECK-NEXT: roriw s0, a2, 11 +; CHECK-NEXT: add s5, t2, a0 +; CHECK-NEXT: andn a0, t6, a2 +; CHECK-NEXT: and a3, a7, a2 +; CHECK-NEXT: add a5, a5, t1 +; CHECK-NEXT: roriw s2, a2, 25 +; CHECK-NEXT: xor s3, a4, s0 +; CHECK-NEXT: roriw ra, t5, 2 +; CHECK-NEXT: or a0, a0, a3 +; CHECK-NEXT: roriw t0, t5, 13 +; CHECK-NEXT: add a5, a5, s5 +; CHECK-NEXT: roriw a3, s7, 17 +; CHECK-NEXT: roriw a4, s7, 19 +; CHECK-NEXT: xor s0, t5, t4 +; CHECK-NEXT: xor s2, s3, s2 +; CHECK-NEXT: add a0, a0, a5 +; CHECK-NEXT: xor ra, ra, t0 +; CHECK-NEXT: xor s3, a3, a4 +; CHECK-NEXT: and t1, s0, a1 +; CHECK-NEXT: roriw s0, t5, 22 +; CHECK-NEXT: and s4, t5, t4 +; CHECK-NEXT: ld a3, 80(sp) # 8-byte Folded Reload +; CHECK-NEXT: roriw a4, a3, 7 +; CHECK-NEXT: srliw a5, s7, 10 +; CHECK-NEXT: roriw t0, a3, 18 +; CHECK-NEXT: add a0, a0, s2 +; CHECK-NEXT: xor t2, ra, s0 +; CHECK-NEXT: xor s0, t1, s4 +; CHECK-NEXT: xor s2, s3, a5 +; CHECK-NEXT: xor a4, a4, t0 +; CHECK-NEXT: ld t1, 104(sp) # 8-byte Folded Reload +; CHECK-NEXT: add t1, t1, s1 +; CHECK-NEXT: srliw s1, a3, 3 +; CHECK-NEXT: add a5, t2, s0 +; CHECK-NEXT: add ra, a0, a6 +; CHECK-NEXT: xor a4, a4, s1 +; CHECK-NEXT: add t1, t1, s2 +; CHECK-NEXT: add t3, a5, a0 +; CHECK-NEXT: roriw a0, ra, 6 +; CHECK-NEXT: lw s1, -36(s10) +; CHECK-NEXT: roriw s0, ra, 11 +; CHECK-NEXT: add s6, t1, a4 +; CHECK-NEXT: andn t1, a7, ra +; CHECK-NEXT: and a4, a2, ra +; CHECK-NEXT: add t6, t6, s1 +; CHECK-NEXT: roriw a6, ra, 25 +; CHECK-NEXT: xor t0, a0, s0 +; CHECK-NEXT: roriw s2, t3, 2 +; CHECK-NEXT: or s3, a4, t1 +; CHECK-NEXT: roriw a4, t3, 13 +; CHECK-NEXT: add t6, t6, s6 +; CHECK-NEXT: roriw s1, s5, 17 +; CHECK-NEXT: xor a0, t3, t5 +; CHECK-NEXT: roriw s0, s5, 19 +; CHECK-NEXT: xor a6, t0, a6 +; CHECK-NEXT: add t6, t6, s3 +; CHECK-NEXT: xor s2, s2, a4 +; CHECK-NEXT: xor t0, s1, s0 +; CHECK-NEXT: and s3, a0, t4 +; CHECK-NEXT: roriw s1, t3, 22 +; CHECK-NEXT: and t1, t3, t5 +; CHECK-NEXT: srliw s0, s5, 10 +; CHECK-NEXT: roriw a4, s8, 7 +; CHECK-NEXT: roriw a0, s8, 18 +; CHECK-NEXT: add a6, a6, t6 +; CHECK-NEXT: xor t2, s2, s1 +; CHECK-NEXT: xor s1, s3, t1 +; CHECK-NEXT: xor s0, t0, s0 +; CHECK-NEXT: xor a0, a0, a4 +; CHECK-NEXT: srliw a4, s8, 3 +; CHECK-NEXT: add t0, a6, a1 +; CHECK-NEXT: add a1, a3, s9 +; CHECK-NEXT: add a3, t2, s1 +; CHECK-NEXT: xor a0, a0, a4 +; CHECK-NEXT: roriw a4, t0, 6 +; CHECK-NEXT: add a1, a1, s0 +; CHECK-NEXT: roriw s0, t0, 11 +; CHECK-NEXT: add s3, a3, a6 +; CHECK-NEXT: lw a3, -32(s10) +; CHECK-NEXT: add s9, a1, a0 +; CHECK-NEXT: xor a6, a4, s0 +; CHECK-NEXT: roriw t6, t0, 25 +; CHECK-NEXT: andn s2, a2, t0 +; CHECK-NEXT: and s0, ra, t0 +; CHECK-NEXT: add a3, a3, s9 +; CHECK-NEXT: ld t1, 56(sp) # 8-byte Folded Reload +; CHECK-NEXT: roriw a4, t1, 7 +; CHECK-NEXT: roriw a0, t1, 18 +; CHECK-NEXT: xor a6, a6, t6 +; CHECK-NEXT: or s0, s0, s2 +; CHECK-NEXT: add a3, a3, a7 +; CHECK-NEXT: roriw s1, s3, 2 +; CHECK-NEXT: xor a7, a4, a0 +; CHECK-NEXT: roriw a4, s3, 13 +; CHECK-NEXT: xor a1, s3, t3 +; CHECK-NEXT: srliw a0, t1, 3 +; CHECK-NEXT: add a3, a3, s0 +; CHECK-NEXT: roriw s0, s6, 17 +; CHECK-NEXT: xor t6, s1, a4 +; CHECK-NEXT: roriw s1, s6, 19 +; CHECK-NEXT: xor a0, a7, a0 +; CHECK-NEXT: and a7, a1, t5 +; CHECK-NEXT: roriw a4, s3, 22 +; CHECK-NEXT: and a1, s3, t3 +; CHECK-NEXT: add a3, a3, a6 +; CHECK-NEXT: xor s0, s0, s1 +; CHECK-NEXT: add a0, a0, s11 +; CHECK-NEXT: srliw s1, s6, 10 +; CHECK-NEXT: xor a4, t6, a4 +; CHECK-NEXT: xor a1, a7, a1 +; CHECK-NEXT: add t4, t4, a3 +; CHECK-NEXT: xor s0, s0, s1 +; CHECK-NEXT: add a0, a0, s8 +; CHECK-NEXT: add a1, a1, a4 +; CHECK-NEXT: roriw s1, t4, 6 +; CHECK-NEXT: lw a4, -28(s10) +; CHECK-NEXT: add s4, a0, s0 +; CHECK-NEXT: add s11, a1, a3 +; CHECK-NEXT: roriw a0, t4, 11 +; CHECK-NEXT: andn a3, ra, t4 +; CHECK-NEXT: and s0, t0, t4 +; CHECK-NEXT: add a4, a4, s4 +; CHECK-NEXT: roriw a6, t4, 25 +; CHECK-NEXT: xor a7, s1, a0 +; CHECK-NEXT: roriw t6, s11, 2 +; CHECK-NEXT: or s2, s0, a3 +; CHECK-NEXT: roriw s0, s11, 13 +; CHECK-NEXT: add a2, a2, a4 +; CHECK-NEXT: ld a5, 64(sp) # 8-byte Folded Reload +; CHECK-NEXT: roriw a4, a5, 7 +; CHECK-NEXT: roriw a0, a5, 18 +; CHECK-NEXT: xor a7, a7, a6 +; CHECK-NEXT: xor a3, s11, s3 +; CHECK-NEXT: add a2, a2, s2 +; CHECK-NEXT: xor a6, t6, s0 +; CHECK-NEXT: xor t6, a4, a0 +; CHECK-NEXT: roriw a4, s11, 22 +; CHECK-NEXT: and a3, a3, t3 +; CHECK-NEXT: srliw a1, a5, 3 +; CHECK-NEXT: and s0, s11, s3 +; CHECK-NEXT: roriw s1, s9, 17 +; CHECK-NEXT: add a2, a2, a7 +; CHECK-NEXT: roriw a0, s9, 19 +; CHECK-NEXT: xor a1, t6, a1 +; CHECK-NEXT: xor a4, a6, a4 +; CHECK-NEXT: xor a3, a3, s0 +; CHECK-NEXT: add s8, a2, t5 +; CHECK-NEXT: xor a0, a0, s1 +; CHECK-NEXT: srliw s1, s9, 10 +; CHECK-NEXT: add a1, a1, t1 +; CHECK-NEXT: add a3, a3, a4 +; CHECK-NEXT: roriw a4, s8, 6 +; CHECK-NEXT: xor s1, s1, a0 +; CHECK-NEXT: ld a0, 96(sp) # 8-byte Folded Reload +; CHECK-NEXT: add a1, a1, a0 +; CHECK-NEXT: roriw a0, s8, 11 +; CHECK-NEXT: add t6, a3, a2 +; CHECK-NEXT: lw a2, -24(s10) +; CHECK-NEXT: add t1, a1, s1 +; CHECK-NEXT: xor a6, a4, a0 +; CHECK-NEXT: roriw a7, s8, 25 +; CHECK-NEXT: andn t5, t0, s8 +; CHECK-NEXT: and s1, t4, s8 +; CHECK-NEXT: add a2, a2, t1 +; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: roriw a0, s0, 7 +; CHECK-NEXT: roriw a1, s0, 18 +; CHECK-NEXT: xor a6, a6, a7 +; CHECK-NEXT: or s1, s1, t5 +; CHECK-NEXT: add a2, a2, ra +; CHECK-NEXT: roriw a3, t6, 2 +; CHECK-NEXT: xor a7, a0, a1 +; CHECK-NEXT: roriw a1, t6, 13 +; CHECK-NEXT: xor a4, t6, s11 +; CHECK-NEXT: srliw a0, s0, 3 +; CHECK-NEXT: add s2, a2, s1 +; CHECK-NEXT: roriw s1, s4, 17 +; CHECK-NEXT: xor t5, a3, a1 +; CHECK-NEXT: roriw a3, s4, 19 +; CHECK-NEXT: xor a0, a7, a0 +; CHECK-NEXT: and a4, a4, s3 +; CHECK-NEXT: roriw a1, t6, 22 +; CHECK-NEXT: and a2, t6, s11 +; CHECK-NEXT: add a6, a6, s2 +; CHECK-NEXT: xor a3, a3, s1 +; CHECK-NEXT: add a0, a0, a5 +; CHECK-NEXT: srliw s1, s4, 10 +; CHECK-NEXT: xor a1, t5, a1 +; CHECK-NEXT: xor a2, a2, a4 +; CHECK-NEXT: add a7, a6, t3 +; CHECK-NEXT: xor a3, a3, s1 +; CHECK-NEXT: ld a4, 88(sp) # 8-byte Folded Reload +; CHECK-NEXT: add a0, a0, a4 +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: roriw a2, a7, 6 +; CHECK-NEXT: lw a4, -20(s10) +; CHECK-NEXT: add t3, a0, a3 +; CHECK-NEXT: add a1, a1, a6 +; CHECK-NEXT: roriw a0, a7, 11 +; CHECK-NEXT: andn a3, t4, a7 +; CHECK-NEXT: and a5, s8, a7 +; CHECK-NEXT: add a4, a4, t3 +; CHECK-NEXT: roriw a6, a7, 25 +; CHECK-NEXT: xor t5, a2, a0 +; CHECK-NEXT: roriw s2, a1, 2 +; CHECK-NEXT: or ra, a5, a3 +; CHECK-NEXT: roriw a5, a1, 13 +; CHECK-NEXT: add a4, a4, t0 +; CHECK-NEXT: ld a2, 32(sp) # 8-byte Folded Reload +; CHECK-NEXT: roriw s1, a2, 7 +; CHECK-NEXT: roriw a0, a2, 18 +; CHECK-NEXT: xor t0, t5, a6 +; CHECK-NEXT: xor a3, a1, t6 +; CHECK-NEXT: add a4, a4, ra +; CHECK-NEXT: xor a6, s2, a5 +; CHECK-NEXT: xor t5, s1, a0 +; CHECK-NEXT: roriw s1, a1, 22 +; CHECK-NEXT: and s2, a3, s11 +; CHECK-NEXT: srliw a5, a2, 3 +; CHECK-NEXT: mv ra, a2 +; CHECK-NEXT: and a2, a1, t6 +; CHECK-NEXT: roriw a0, t1, 17 +; CHECK-NEXT: add a4, a4, t0 +; CHECK-NEXT: roriw a3, t1, 19 +; CHECK-NEXT: xor a5, t5, a5 +; CHECK-NEXT: xor s1, a6, s1 +; CHECK-NEXT: xor a2, s2, a2 +; CHECK-NEXT: add s3, s3, a4 +; CHECK-NEXT: xor a0, a0, a3 +; CHECK-NEXT: sd t1, 56(sp) # 8-byte Folded Spill +; CHECK-NEXT: srliw a3, t1, 10 +; CHECK-NEXT: add a5, a5, s0 +; CHECK-NEXT: add a2, a2, s1 +; CHECK-NEXT: roriw s1, s3, 6 +; CHECK-NEXT: xor a0, a0, a3 +; CHECK-NEXT: add a5, a5, s7 +; CHECK-NEXT: roriw a3, s3, 11 +; CHECK-NEXT: add t1, a2, a4 +; CHECK-NEXT: lw a2, -16(s10) +; CHECK-NEXT: add s7, a5, a0 +; CHECK-NEXT: xor a6, s1, a3 +; CHECK-NEXT: roriw a0, s3, 25 +; CHECK-NEXT: andn a4, s8, s3 +; CHECK-NEXT: and a5, a7, s3 +; CHECK-NEXT: add a2, a2, s7 +; CHECK-NEXT: ld t2, 8(sp) # 8-byte Folded Reload +; CHECK-NEXT: roriw s1, t2, 7 +; CHECK-NEXT: roriw a3, t2, 18 +; CHECK-NEXT: xor a6, a6, a0 +; CHECK-NEXT: or a4, a4, a5 +; CHECK-NEXT: add a2, a2, t4 +; CHECK-NEXT: roriw a5, t1, 2 +; CHECK-NEXT: xor t0, s1, a3 +; CHECK-NEXT: roriw s1, t1, 13 +; CHECK-NEXT: xor a0, t1, a1 +; CHECK-NEXT: srliw a3, t2, 3 +; CHECK-NEXT: add a2, a2, a4 +; CHECK-NEXT: roriw a4, t3, 17 +; CHECK-NEXT: xor t4, a5, s1 +; CHECK-NEXT: roriw s1, t3, 19 +; CHECK-NEXT: xor a3, t0, a3 +; CHECK-NEXT: and t0, a0, t6 +; CHECK-NEXT: roriw a5, t1, 22 +; CHECK-NEXT: and a0, t1, a1 +; CHECK-NEXT: add a2, a2, a6 +; CHECK-NEXT: xor s1, s1, a4 +; CHECK-NEXT: add a3, a3, ra +; CHECK-NEXT: sd t3, 64(sp) # 8-byte Folded Spill +; CHECK-NEXT: srliw a4, t3, 10 +; CHECK-NEXT: xor a5, t4, a5 +; CHECK-NEXT: xor a0, t0, a0 +; CHECK-NEXT: add s11, s11, a2 +; CHECK-NEXT: xor a4, a4, s1 +; CHECK-NEXT: add a3, a3, s5 +; CHECK-NEXT: add a0, a0, a5 +; CHECK-NEXT: roriw a5, s11, 6 +; CHECK-NEXT: lw s1, -12(s10) +; CHECK-NEXT: add s2, a3, a4 +; CHECK-NEXT: add ra, a0, a2 +; CHECK-NEXT: roriw a0, s11, 11 +; CHECK-NEXT: andn a3, a7, s11 +; CHECK-NEXT: and a4, s3, s11 +; CHECK-NEXT: add s1, s1, s2 +; CHECK-NEXT: roriw a6, s11, 25 +; CHECK-NEXT: xor t0, a5, a0 +; CHECK-NEXT: roriw t4, ra, 2 +; CHECK-NEXT: or t5, a4, a3 +; CHECK-NEXT: roriw a4, ra, 13 +; CHECK-NEXT: add s0, s1, s8 +; CHECK-NEXT: ld s8, 112(sp) # 8-byte Folded Reload +; CHECK-NEXT: roriw s1, s8, 7 +; CHECK-NEXT: roriw a0, s8, 18 +; CHECK-NEXT: xor a5, t0, a6 +; CHECK-NEXT: xor a3, ra, t1 +; CHECK-NEXT: add t5, t5, s0 +; CHECK-NEXT: xor a6, t4, a4 +; CHECK-NEXT: xor a0, a0, s1 +; CHECK-NEXT: roriw s1, ra, 22 +; CHECK-NEXT: and t0, a3, a1 +; CHECK-NEXT: srliw s0, s8, 3 +; CHECK-NEXT: and a2, ra, t1 +; CHECK-NEXT: roriw a4, s7, 17 +; CHECK-NEXT: add a5, a5, t5 +; CHECK-NEXT: roriw a3, s7, 19 +; CHECK-NEXT: xor s0, s0, a0 +; CHECK-NEXT: xor s1, a6, s1 +; CHECK-NEXT: xor a2, t0, a2 +; CHECK-NEXT: add a0, a5, t6 +; CHECK-NEXT: xor a3, a3, a4 +; CHECK-NEXT: srliw a4, s7, 10 +; CHECK-NEXT: add t3, t2, s0 +; CHECK-NEXT: add a2, a2, s1 +; CHECK-NEXT: roriw s1, a0, 6 +; CHECK-NEXT: xor a3, a3, a4 +; CHECK-NEXT: sd s6, 72(sp) # 8-byte Folded Spill +; CHECK-NEXT: add t3, t3, s6 +; CHECK-NEXT: mv s6, s2 +; CHECK-NEXT: roriw a4, a0, 11 +; CHECK-NEXT: add s0, a2, a5 +; CHECK-NEXT: lw a2, -8(s10) +; CHECK-NEXT: add t3, t3, a3 +; CHECK-NEXT: xor a6, s1, a4 +; CHECK-NEXT: roriw t0, a0, 25 +; CHECK-NEXT: andn t4, s3, a0 +; CHECK-NEXT: and s1, s11, a0 +; CHECK-NEXT: add a2, a2, t3 +; CHECK-NEXT: ld a5, 104(sp) # 8-byte Folded Reload +; CHECK-NEXT: roriw a4, a5, 7 +; CHECK-NEXT: roriw a3, a5, 18 +; CHECK-NEXT: xor a6, a6, t0 +; CHECK-NEXT: or s1, s1, t4 +; CHECK-NEXT: add a2, a2, a7 +; CHECK-NEXT: roriw t4, s0, 2 +; CHECK-NEXT: xor a7, a4, a3 +; CHECK-NEXT: roriw a4, s0, 13 +; CHECK-NEXT: xor t0, s0, ra +; CHECK-NEXT: srliw a3, a5, 3 +; CHECK-NEXT: mv t6, a5 +; CHECK-NEXT: add t5, a2, s1 +; CHECK-NEXT: roriw s1, s2, 17 +; CHECK-NEXT: xor t4, t4, a4 +; CHECK-NEXT: roriw a5, s2, 19 +; CHECK-NEXT: xor a3, a7, a3 +; CHECK-NEXT: and a7, t0, t1 +; CHECK-NEXT: roriw a2, s0, 22 +; CHECK-NEXT: and a4, s0, ra +; CHECK-NEXT: add a6, a6, t5 +; CHECK-NEXT: xor s1, s1, a5 +; CHECK-NEXT: add a3, a3, s8 +; CHECK-NEXT: mv s8, s4 +; CHECK-NEXT: srliw a5, s2, 10 +; CHECK-NEXT: xor a2, t4, a2 +; CHECK-NEXT: xor a4, a7, a4 +; CHECK-NEXT: add a7, a6, a1 +; CHECK-NEXT: xor a5, a5, s1 +; CHECK-NEXT: sd s9, 80(sp) # 8-byte Folded Spill +; CHECK-NEXT: add a3, a3, s9 +; CHECK-NEXT: ld t2, 88(sp) # 8-byte Folded Reload +; CHECK-NEXT: add a2, a2, a4 +; CHECK-NEXT: roriw a4, a7, 6 +; CHECK-NEXT: lw s1, -4(s10) +; CHECK-NEXT: add a3, a3, a5 +; CHECK-NEXT: add s2, a2, a6 +; CHECK-NEXT: roriw a2, a7, 11 +; CHECK-NEXT: andn a5, s11, a7 +; CHECK-NEXT: and a1, a0, a7 +; CHECK-NEXT: sd a3, 112(sp) # 8-byte Folded Spill +; CHECK-NEXT: add s1, s1, a3 +; CHECK-NEXT: roriw a6, a7, 25 +; CHECK-NEXT: xor t0, a4, a2 +; CHECK-NEXT: roriw t4, s2, 2 +; CHECK-NEXT: or a1, a1, a5 +; CHECK-NEXT: roriw t5, s2, 13 +; CHECK-NEXT: add s1, s1, s3 +; CHECK-NEXT: ld a3, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: roriw a2, a3, 7 +; CHECK-NEXT: roriw a4, a3, 18 +; CHECK-NEXT: mv a5, a3 +; CHECK-NEXT: xor a6, t0, a6 +; CHECK-NEXT: add t0, s1, a1 +; CHECK-NEXT: xor a3, s2, s0 +; CHECK-NEXT: xor t4, t4, t5 +; CHECK-NEXT: xor a2, a2, a4 +; CHECK-NEXT: roriw t5, s2, 22 +; CHECK-NEXT: srliw a5, a5, 3 +; CHECK-NEXT: and a3, a3, ra +; CHECK-NEXT: roriw a1, t3, 17 +; CHECK-NEXT: and s1, s2, s0 +; CHECK-NEXT: roriw a4, t3, 19 +; CHECK-NEXT: xor a2, a2, a5 +; CHECK-NEXT: add a6, a6, t0 +; CHECK-NEXT: xor a5, t4, t5 +; CHECK-NEXT: xor a3, a3, s1 +; CHECK-NEXT: xor a1, a1, a4 +; CHECK-NEXT: srliw a4, t3, 10 +; CHECK-NEXT: add a2, a2, t6 +; CHECK-NEXT: add t0, a6, t1 +; CHECK-NEXT: add a3, a3, a5 +; CHECK-NEXT: xor a1, a1, a4 +; CHECK-NEXT: add a2, a2, s4 +; CHECK-NEXT: add t6, a3, a6 +; CHECK-NEXT: roriw a3, t0, 6 +; CHECK-NEXT: lw a4, 0(s10) +; CHECK-NEXT: add a6, a2, a1 +; CHECK-NEXT: roriw a1, t0, 11 +; CHECK-NEXT: andn a0, a0, t0 +; CHECK-NEXT: and a2, a7, t0 +; CHECK-NEXT: add a4, a4, a6 +; CHECK-NEXT: roriw a5, t6, 2 +; CHECK-NEXT: xor a1, a1, a3 +; CHECK-NEXT: roriw a3, t6, 13 +; CHECK-NEXT: or a0, a0, a2 +; CHECK-NEXT: xor a2, t6, s2 +; CHECK-NEXT: add a4, a4, s11 +; CHECK-NEXT: roriw s1, t0, 25 +; CHECK-NEXT: xor a3, a3, a5 +; CHECK-NEXT: roriw a5, t6, 22 +; CHECK-NEXT: and a2, a2, s0 +; CHECK-NEXT: and s0, t6, s2 +; CHECK-NEXT: xor a1, a1, s1 +; CHECK-NEXT: add a0, a0, a4 +; CHECK-NEXT: xor a3, a3, a5 +; CHECK-NEXT: xor a2, a2, s0 +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: ld a1, 40(sp) # 8-byte Folded Reload +; CHECK-NEXT: add a2, a2, a3 +; CHECK-NEXT: ld a3, 96(sp) # 8-byte Folded Reload +; CHECK-NEXT: add t5, a0, ra +; CHECK-NEXT: addiw a1, a1, 16 +; CHECK-NEXT: add ra, a2, a0 +; CHECK-NEXT: addi s10, s10, 64 +; CHECK-NEXT: li a0, 48 +; CHECK-NEXT: bltu a1, a0, .LBB76_1 +; CHECK-NEXT: # %bb.2: # %end +; CHECK-NEXT: ld ra, 280(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 272(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s1, 264(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s2, 256(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s3, 248(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s4, 240(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s5, 232(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s6, 224(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s7, 216(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s8, 208(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s9, 200(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s10, 192(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s11, 184(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 288 +; CHECK-NEXT: ret +entry: + %data = alloca [16 x i32], align 4 + %arrayidx11.15 = getelementptr inbounds [16 x i32], ptr %data, i32 0, i32 15 + %arrayidx322 = getelementptr inbounds i32, ptr %data, i32 7 + %load87 = load i32, ptr %arrayidx322 + %add341 = load i32, ptr %arrayidx322 + %add342 = add i32 %add341, %load87 + %add.ptr347 = getelementptr inbounds i32, ptr %k, i32 8 + %add.ptr348 = getelementptr inbounds i32, ptr %data, i32 8 + %uglygep = getelementptr i8, ptr %k, i32 64 + %data.promoted = load i32, ptr %data + %arrayidx899 = load i32, ptr %arrayidx11.15 + br label %do.body + +do.body: + %add9882245 = phi i32 [ %arrayidx899, %entry ], [ %add988, %do.body ] + %add9152244 = phi i32 [ %arrayidx899, %entry ], [ %add915, %do.body ] + %add8422243 = phi i32 [ %arrayidx899, %entry ], [ %add842, %do.body ] + %add7692242 = phi i32 [ %arrayidx899, %entry ], [ %add769, %do.body ] + %add13532241 = phi i32 [ %arrayidx899, %entry ], [ %add1353, %do.body ] + %add6962240 = phi i32 [ %arrayidx899, %entry ], [ %add696, %do.body ] + %add12802239 = phi i32 [ %arrayidx899, %entry ], [ %add1280, %do.body ] + %add6232238 = phi i32 [ %arrayidx899, %entry ], [ %add623, %do.body ] + %add12072237 = phi i32 [ %arrayidx899, %entry ], [ %add1207, %do.body ] + %add5502236 = phi i32 [ %arrayidx899, %entry ], [ %add550, %do.body ] + %add11342235 = phi i32 [ %arrayidx899, %entry ], [ %add1134, %do.body ] + %add14992234 = phi i32 [ %arrayidx899, %entry ], [ %add1499, %do.body ] + %add4772233 = phi i32 [ %arrayidx899, %entry ], [ %add477, %do.body ] + %add10612232 = phi i32 [ %arrayidx899, %entry ], [ %add1061, %do.body ] + %add14262231 = phi i32 [ %arrayidx899, %entry ], [ %add1426, %do.body ] + %i.22223 = phi i32 [ 16, %entry ], [ %add1523, %do.body ] + %H.12222 = phi i32 [ %load87, %entry ], [ %add1283, %do.body ] + %G.12221 = phi i32 [ %add341, %entry ], [ %add1356, %do.body ] + %F.12220 = phi i32 [ %add342, %entry ], [ %add1429, %do.body ] + %E.12219 = phi i32 [ %add342, %entry ], [ %add1502, %do.body ] + %D.12218 = phi i32 [ %add342, %entry ], [ %add1300, %do.body ] + %C.12217 = phi i32 [ %add342, %entry ], [ %add1373, %do.body ] + %B.12216 = phi i32 [ %add342, %entry ], [ %add1446, %do.body ] + %A.12215 = phi i32 [ %add342, %entry ], [ %add1519, %do.body ] + %k.addr.12214 = phi ptr [ %uglygep, %entry ], [ %add.ptr1524, %do.body ] + %phi88 = phi i32 [ %data.promoted, %entry ], [ %add404, %do.body ] + %or357 = tail call i32 @llvm.fshl.i32(i32 %E.12219, i32 %E.12219, i32 26) + %or360 = tail call i32 @llvm.fshl.i32(i32 %E.12219, i32 %E.12219, i32 21) + %or364 = tail call i32 @llvm.fshl.i32(i32 %E.12219, i32 %E.12219, i32 7) + %add404 = add i32 %or360, %or364 + %add477 = add i32 %or357, %or360 + %add442 = add i32 %add404, %G.12221 + %add444 = add i32 %add442, %or357 + %add478 = add i32 %add444, %or357 + %add479 = add i32 %add478, %add477 + %add480 = add i32 %add479, %C.12217 + %or483 = tail call i32 @llvm.fshl.i32(i32 %add477, i32 %add477, i32 30) + %or486 = tail call i32 @llvm.fshl.i32(i32 %add477, i32 %add477, i32 19) + %xor487 = xor i32 %or483, %or486 + %or490 = tail call i32 @llvm.fshl.i32(i32 %add477, i32 %add477, i32 10) + %xor491 = xor i32 %xor487, %or490 + %and492 = and i32 %add477, %A.12215 + %xor493 = xor i32 %add477, %A.12215 + %and494 = and i32 %xor493, %B.12216 + %xor495 = xor i32 %and494, %and492 + %add496 = add i32 %xor491, %xor495 + %add497 = add i32 %add496, %add479 + %or503 = tail call i32 @llvm.fshl.i32(i32 %add480, i32 %add480, i32 26) + %or506 = tail call i32 @llvm.fshl.i32(i32 %add480, i32 %add480, i32 21) + %xor507 = xor i32 %or503, %or506 + %or510 = tail call i32 @llvm.fshl.i32(i32 %add480, i32 %add480, i32 7) + %xor511 = xor i32 %xor507, %or510 + %xor512 = xor i32 %add404, %E.12219 + %and513 = and i32 %add480, %xor512 + %xor514 = xor i32 %and513, %E.12219 + %arrayidx516 = getelementptr inbounds i32, ptr %k.addr.12214, i32 2 + %load91 = load i32, ptr %arrayidx516 + %or522 = tail call i32 @llvm.fshl.i32(i32 %add404, i32 %add404, i32 15) + %or527 = tail call i32 @llvm.fshl.i32(i32 %add404, i32 %add404, i32 13) + %xor528 = xor i32 %or522, %or527 + %shr530 = lshr i32 %add404, 10 + %xor531 = xor i32 %xor528, %shr530 + %or538 = tail call i32 @llvm.fshl.i32(i32 %add6232238, i32 %add6232238, i32 25) + %or543 = tail call i32 @llvm.fshl.i32(i32 %add6232238, i32 %add6232238, i32 14) + %xor544 = xor i32 %or538, %or543 + %shr546 = lshr i32 %add6232238, 3 + %xor547 = xor i32 %xor544, %shr546 + %add533 = add i32 %xor531, %add5502236 + %add548 = add i32 %add533, %add12072237 + %add550 = add i32 %add548, %xor547 + %add515 = add i32 %load91, %F.12220 + %add517 = add i32 %add515, %add550 + %add551 = add i32 %add517, %xor514 + %add552 = add i32 %add551, %xor511 + %add553 = add i32 %add552, %B.12216 + %or556 = tail call i32 @llvm.fshl.i32(i32 %add497, i32 %add497, i32 30) + %or559 = tail call i32 @llvm.fshl.i32(i32 %add497, i32 %add497, i32 19) + %xor560 = xor i32 %or556, %or559 + %or563 = tail call i32 @llvm.fshl.i32(i32 %add497, i32 %add497, i32 10) + %xor564 = xor i32 %xor560, %or563 + %and565 = and i32 %add497, %add477 + %xor566 = xor i32 %add497, %add477 + %and567 = and i32 %xor566, %A.12215 + %xor568 = xor i32 %and567, %and565 + %add569 = add i32 %xor564, %xor568 + %add570 = add i32 %add569, %add552 + %or576 = tail call i32 @llvm.fshl.i32(i32 %add553, i32 %add553, i32 26) + %or579 = tail call i32 @llvm.fshl.i32(i32 %add553, i32 %add553, i32 21) + %xor580 = xor i32 %or576, %or579 + %or583 = tail call i32 @llvm.fshl.i32(i32 %add553, i32 %add553, i32 7) + %xor584 = xor i32 %xor580, %or583 + %xor585 = xor i32 %add480, %add404 + %and586 = and i32 %add553, %xor585 + %xor587 = xor i32 %and586, %add404 + %arrayidx589 = getelementptr inbounds i32, ptr %k.addr.12214, i32 3 + %load92 = load i32, ptr %arrayidx589 + %or595 = tail call i32 @llvm.fshl.i32(i32 %add477, i32 %add477, i32 15) + %or600 = tail call i32 @llvm.fshl.i32(i32 %add477, i32 %add477, i32 13) + %xor601 = xor i32 %or595, %or600 + %shr603 = lshr i32 %add477, 10 + %xor604 = xor i32 %xor601, %shr603 + %or611 = tail call i32 @llvm.fshl.i32(i32 %add6962240, i32 %add6962240, i32 25) + %or616 = tail call i32 @llvm.fshl.i32(i32 %add6962240, i32 %add6962240, i32 14) + %xor617 = xor i32 %or611, %or616 + %shr619 = lshr i32 %add6962240, 3 + %xor620 = xor i32 %xor617, %shr619 + %add606 = add i32 %add12802239, %add6232238 + %add621 = add i32 %add606, %xor604 + %add623 = add i32 %add621, %xor620 + %add588 = add i32 %load92, %E.12219 + %add590 = add i32 %add588, %add623 + %add624 = add i32 %add590, %xor587 + %add625 = add i32 %add624, %xor584 + %add626 = add i32 %add625, %A.12215 + %or629 = tail call i32 @llvm.fshl.i32(i32 %add570, i32 %add570, i32 30) + %or632 = tail call i32 @llvm.fshl.i32(i32 %add570, i32 %add570, i32 19) + %xor633 = xor i32 %or629, %or632 + %or636 = tail call i32 @llvm.fshl.i32(i32 %add570, i32 %add570, i32 10) + %xor637 = xor i32 %xor633, %or636 + %and638 = and i32 %add570, %add497 + %xor639 = xor i32 %add570, %add497 + %and640 = and i32 %xor639, %add477 + %xor641 = xor i32 %and640, %and638 + %add642 = add i32 %xor637, %xor641 + %add643 = add i32 %add642, %add625 + %or649 = tail call i32 @llvm.fshl.i32(i32 %add626, i32 %add626, i32 26) + %or652 = tail call i32 @llvm.fshl.i32(i32 %add626, i32 %add626, i32 21) + %xor653 = xor i32 %or649, %or652 + %or656 = tail call i32 @llvm.fshl.i32(i32 %add626, i32 %add626, i32 7) + %xor657 = xor i32 %xor653, %or656 + %xor658 = xor i32 %add553, %add480 + %and659 = and i32 %add626, %xor658 + %xor660 = xor i32 %and659, %add480 + %arrayidx662 = getelementptr inbounds i32, ptr %k.addr.12214, i32 4 + %load93 = load i32, ptr %arrayidx662 + %or668 = tail call i32 @llvm.fshl.i32(i32 %add550, i32 %add550, i32 15) + %or673 = tail call i32 @llvm.fshl.i32(i32 %add550, i32 %add550, i32 13) + %xor674 = xor i32 %or668, %or673 + %shr676 = lshr i32 %add550, 10 + %xor677 = xor i32 %xor674, %shr676 + %or684 = tail call i32 @llvm.fshl.i32(i32 %add7692242, i32 %add7692242, i32 25) + %or689 = tail call i32 @llvm.fshl.i32(i32 %add7692242, i32 %add7692242, i32 14) + %xor690 = xor i32 %or684, %or689 + %shr692 = lshr i32 %add7692242, 3 + %xor693 = xor i32 %xor690, %shr692 + %add679 = add i32 %add13532241, %add6962240 + %add694 = add i32 %add679, %xor677 + %add696 = add i32 %add694, %xor693 + %add661 = add i32 %load93, %add404 + %add663 = add i32 %add661, %add696 + %add697 = add i32 %add663, %xor660 + %add698 = add i32 %add697, %xor657 + %add699 = add i32 %add698, %add477 + %or702 = tail call i32 @llvm.fshl.i32(i32 %add643, i32 %add643, i32 30) + %or705 = tail call i32 @llvm.fshl.i32(i32 %add643, i32 %add643, i32 19) + %xor706 = xor i32 %or702, %or705 + %or709 = tail call i32 @llvm.fshl.i32(i32 %add643, i32 %add643, i32 10) + %xor710 = xor i32 %xor706, %or709 + %and711 = and i32 %add643, %add570 + %xor712 = xor i32 %add643, %add570 + %and713 = and i32 %xor712, %add497 + %xor714 = xor i32 %and713, %and711 + %add715 = add i32 %xor710, %xor714 + %add716 = add i32 %add715, %add698 + %or722 = tail call i32 @llvm.fshl.i32(i32 %add699, i32 %add699, i32 26) + %or725 = tail call i32 @llvm.fshl.i32(i32 %add699, i32 %add699, i32 21) + %xor726 = xor i32 %or722, %or725 + %or729 = tail call i32 @llvm.fshl.i32(i32 %add699, i32 %add699, i32 7) + %xor730 = xor i32 %xor726, %or729 + %xor731 = xor i32 %add626, %add553 + %and732 = and i32 %add699, %xor731 + %xor733 = xor i32 %and732, %add553 + %arrayidx735 = getelementptr inbounds i32, ptr %k.addr.12214, i32 5 + %load94 = load i32, ptr %arrayidx735 + %or741 = tail call i32 @llvm.fshl.i32(i32 %add623, i32 %add623, i32 15) + %or746 = tail call i32 @llvm.fshl.i32(i32 %add623, i32 %add623, i32 13) + %xor747 = xor i32 %or741, %or746 + %shr749 = lshr i32 %add623, 10 + %xor750 = xor i32 %xor747, %shr749 + %or757 = tail call i32 @llvm.fshl.i32(i32 %add8422243, i32 %add8422243, i32 25) + %or762 = tail call i32 @llvm.fshl.i32(i32 %add8422243, i32 %add8422243, i32 14) + %xor763 = xor i32 %or757, %or762 + %shr765 = lshr i32 %add8422243, 3 + %xor766 = xor i32 %xor763, %shr765 + %add752 = add i32 %add7692242, %add14262231 + %add767 = add i32 %add752, %xor750 + %add769 = add i32 %add767, %xor766 + %add734 = add i32 %load94, %add480 + %add736 = add i32 %add734, %add769 + %add770 = add i32 %add736, %xor733 + %add771 = add i32 %add770, %xor730 + %add772 = add i32 %add771, %add497 + %or775 = tail call i32 @llvm.fshl.i32(i32 %add716, i32 %add716, i32 30) + %or778 = tail call i32 @llvm.fshl.i32(i32 %add716, i32 %add716, i32 19) + %xor779 = xor i32 %or775, %or778 + %or782 = tail call i32 @llvm.fshl.i32(i32 %add716, i32 %add716, i32 10) + %xor783 = xor i32 %xor779, %or782 + %and784 = and i32 %add716, %add643 + %xor785 = xor i32 %add716, %add643 + %and786 = and i32 %xor785, %add570 + %xor787 = xor i32 %and786, %and784 + %add788 = add i32 %xor783, %xor787 + %add789 = add i32 %add788, %add771 + %or795 = tail call i32 @llvm.fshl.i32(i32 %add772, i32 %add772, i32 26) + %or798 = tail call i32 @llvm.fshl.i32(i32 %add772, i32 %add772, i32 21) + %xor799 = xor i32 %or795, %or798 + %or802 = tail call i32 @llvm.fshl.i32(i32 %add772, i32 %add772, i32 7) + %xor803 = xor i32 %xor799, %or802 + %xor804 = xor i32 %add699, %add626 + %and805 = and i32 %add772, %xor804 + %xor806 = xor i32 %and805, %add626 + %arrayidx808 = getelementptr inbounds i32, ptr %k.addr.12214, i32 6 + %load95 = load i32, ptr %arrayidx808 + %or814 = tail call i32 @llvm.fshl.i32(i32 %add696, i32 %add696, i32 15) + %or819 = tail call i32 @llvm.fshl.i32(i32 %add696, i32 %add696, i32 13) + %xor820 = xor i32 %or814, %or819 + %shr822 = lshr i32 %add696, 10 + %xor823 = xor i32 %xor820, %shr822 + %or830 = tail call i32 @llvm.fshl.i32(i32 %add9152244, i32 %add9152244, i32 25) + %or835 = tail call i32 @llvm.fshl.i32(i32 %add9152244, i32 %add9152244, i32 14) + %xor836 = xor i32 %or830, %or835 + %shr838 = lshr i32 %add9152244, 3 + %xor839 = xor i32 %xor836, %shr838 + %add825 = add i32 %add8422243, %add14992234 + %add840 = add i32 %add825, %xor823 + %add842 = add i32 %add840, %xor839 + %add807 = add i32 %add553, %load95 + %add809 = add i32 %add807, %add842 + %add843 = add i32 %add809, %xor806 + %add844 = add i32 %add843, %xor803 + %add845 = add i32 %add844, %add570 + %or848 = tail call i32 @llvm.fshl.i32(i32 %add789, i32 %add789, i32 30) + %or851 = tail call i32 @llvm.fshl.i32(i32 %add789, i32 %add789, i32 19) + %xor852 = xor i32 %or848, %or851 + %or855 = tail call i32 @llvm.fshl.i32(i32 %add789, i32 %add789, i32 10) + %xor856 = xor i32 %xor852, %or855 + %and857 = and i32 %add789, %add716 + %xor858 = xor i32 %add789, %add716 + %and859 = and i32 %xor858, %add643 + %xor860 = xor i32 %and859, %and857 + %add861 = add i32 %xor856, %xor860 + %add862 = add i32 %add861, %add844 + %or868 = tail call i32 @llvm.fshl.i32(i32 %add845, i32 %add845, i32 26) + %or871 = tail call i32 @llvm.fshl.i32(i32 %add845, i32 %add845, i32 21) + %xor872 = xor i32 %or868, %or871 + %or875 = tail call i32 @llvm.fshl.i32(i32 %add845, i32 %add845, i32 7) + %xor876 = xor i32 %xor872, %or875 + %xor877 = xor i32 %add772, %add699 + %and878 = and i32 %add845, %xor877 + %xor879 = xor i32 %and878, %add699 + %arrayidx881 = getelementptr inbounds i32, ptr %k.addr.12214, i32 7 + %load96 = load i32, ptr %arrayidx881 + %or887 = tail call i32 @llvm.fshl.i32(i32 %add769, i32 %add769, i32 15) + %or892 = tail call i32 @llvm.fshl.i32(i32 %add769, i32 %add769, i32 13) + %xor893 = xor i32 %or887, %or892 + %shr895 = lshr i32 %add769, 10 + %xor896 = xor i32 %xor893, %shr895 + %or903 = tail call i32 @llvm.fshl.i32(i32 %add9882245, i32 %add9882245, i32 25) + %or908 = tail call i32 @llvm.fshl.i32(i32 %add9882245, i32 %add9882245, i32 14) + %xor909 = xor i32 %or903, %or908 + %shr911 = lshr i32 %add9882245, 3 + %xor912 = xor i32 %xor909, %shr911 + %add898 = add i32 %add9152244, %add404 + %add913 = add i32 %add898, %xor896 + %add915 = add i32 %add913, %xor912 + %add880 = add i32 %add915, %load96 + %add882 = add i32 %add880, %add626 + %add916 = add i32 %add882, %xor879 + %add917 = add i32 %add916, %xor876 + %add918 = add i32 %add917, %add643 + %or921 = tail call i32 @llvm.fshl.i32(i32 %add862, i32 %add862, i32 30) + %or924 = tail call i32 @llvm.fshl.i32(i32 %add862, i32 %add862, i32 19) + %xor925 = xor i32 %or921, %or924 + %or928 = tail call i32 @llvm.fshl.i32(i32 %add862, i32 %add862, i32 10) + %xor929 = xor i32 %xor925, %or928 + %and930 = and i32 %add862, %add789 + %xor931 = xor i32 %add862, %add789 + %and932 = and i32 %xor931, %add716 + %xor933 = xor i32 %and932, %and930 + %add934 = add i32 %xor929, %xor933 + %add935 = add i32 %add934, %add917 + %or941 = tail call i32 @llvm.fshl.i32(i32 %add918, i32 %add918, i32 26) + %or944 = tail call i32 @llvm.fshl.i32(i32 %add918, i32 %add918, i32 21) + %xor945 = xor i32 %or941, %or944 + %or948 = tail call i32 @llvm.fshl.i32(i32 %add918, i32 %add918, i32 7) + %xor949 = xor i32 %xor945, %or948 + %xor950 = xor i32 %add845, %add772 + %and951 = and i32 %add918, %xor950 + %xor952 = xor i32 %and951, %add772 + %arrayidx954 = getelementptr inbounds i32, ptr %k.addr.12214, i32 8 + %load97 = load i32, ptr %arrayidx954 + %or960 = tail call i32 @llvm.fshl.i32(i32 %add842, i32 %add842, i32 15) + %or965 = tail call i32 @llvm.fshl.i32(i32 %add842, i32 %add842, i32 13) + %xor966 = xor i32 %or960, %or965 + %shr968 = lshr i32 %add842, 10 + %xor969 = xor i32 %xor966, %shr968 + %or976 = tail call i32 @llvm.fshl.i32(i32 %add10612232, i32 %add10612232, i32 25) + %or981 = tail call i32 @llvm.fshl.i32(i32 %add10612232, i32 %add10612232, i32 14) + %xor982 = xor i32 %or976, %or981 + %shr984 = lshr i32 %add10612232, 3 + %xor985 = xor i32 %xor982, %shr984 + %add971 = add i32 %add477, %xor985 + %add986 = add i32 %add971, %add9882245 + %add988 = add i32 %add986, %xor969 + %add953 = add i32 %add988, %load97 + %add955 = add i32 %add953, %add699 + %add989 = add i32 %add955, %xor952 + %add990 = add i32 %add989, %xor949 + %add991 = add i32 %add990, %add716 + %or994 = tail call i32 @llvm.fshl.i32(i32 %add935, i32 %add935, i32 30) + %or997 = tail call i32 @llvm.fshl.i32(i32 %add935, i32 %add935, i32 19) + %xor998 = xor i32 %or994, %or997 + %or1001 = tail call i32 @llvm.fshl.i32(i32 %add935, i32 %add935, i32 10) + %xor1002 = xor i32 %xor998, %or1001 + %and1003 = and i32 %add935, %add862 + %xor1004 = xor i32 %add935, %add862 + %and1005 = and i32 %xor1004, %add789 + %xor1006 = xor i32 %and1005, %and1003 + %add1007 = add i32 %xor1002, %xor1006 + %add1008 = add i32 %add1007, %add990 + %or1014 = tail call i32 @llvm.fshl.i32(i32 %add991, i32 %add991, i32 26) + %or1017 = tail call i32 @llvm.fshl.i32(i32 %add991, i32 %add991, i32 21) + %xor1018 = xor i32 %or1014, %or1017 + %or1021 = tail call i32 @llvm.fshl.i32(i32 %add991, i32 %add991, i32 7) + %xor1022 = xor i32 %xor1018, %or1021 + %xor1023 = xor i32 %add918, %add845 + %and1024 = and i32 %add991, %xor1023 + %xor1025 = xor i32 %and1024, %add845 + %arrayidx1027 = getelementptr inbounds i32, ptr %k.addr.12214, i32 9 + %load98 = load i32, ptr %arrayidx1027 + %or1033 = tail call i32 @llvm.fshl.i32(i32 %add915, i32 %add915, i32 15) + %or1038 = tail call i32 @llvm.fshl.i32(i32 %add915, i32 %add915, i32 13) + %xor1039 = xor i32 %or1033, %or1038 + %shr1041 = lshr i32 %add915, 10 + %xor1042 = xor i32 %xor1039, %shr1041 + %or1049 = tail call i32 @llvm.fshl.i32(i32 %add11342235, i32 %add11342235, i32 25) + %or1054 = tail call i32 @llvm.fshl.i32(i32 %add11342235, i32 %add11342235, i32 14) + %xor1055 = xor i32 %or1049, %or1054 + %shr1057 = lshr i32 %add11342235, 3 + %xor1058 = xor i32 %xor1055, %shr1057 + %add1044 = add i32 %xor1058, %add10612232 + %add1059 = add i32 %add1044, %add550 + %add1061 = add i32 %add1059, %xor1042 + %add1026 = add i32 %add1061, %load98 + %add1028 = add i32 %add1026, %add772 + %add1062 = add i32 %add1028, %xor1025 + %add1063 = add i32 %add1062, %xor1022 + %add1064 = add i32 %add1063, %add789 + %or1067 = tail call i32 @llvm.fshl.i32(i32 %add1008, i32 %add1008, i32 30) + %or1070 = tail call i32 @llvm.fshl.i32(i32 %add1008, i32 %add1008, i32 19) + %xor1071 = xor i32 %or1067, %or1070 + %or1074 = tail call i32 @llvm.fshl.i32(i32 %add1008, i32 %add1008, i32 10) + %xor1075 = xor i32 %xor1071, %or1074 + %and1076 = and i32 %add1008, %add935 + %xor1077 = xor i32 %add1008, %add935 + %and1078 = and i32 %xor1077, %add862 + %xor1079 = xor i32 %and1078, %and1076 + %add1080 = add i32 %xor1075, %xor1079 + %add1081 = add i32 %add1080, %add1063 + %or1087 = tail call i32 @llvm.fshl.i32(i32 %add1064, i32 %add1064, i32 26) + %or1090 = tail call i32 @llvm.fshl.i32(i32 %add1064, i32 %add1064, i32 21) + %xor1091 = xor i32 %or1087, %or1090 + %or1094 = tail call i32 @llvm.fshl.i32(i32 %add1064, i32 %add1064, i32 7) + %xor1095 = xor i32 %xor1091, %or1094 + %xor1096 = xor i32 %add991, %add918 + %and1097 = and i32 %add1064, %xor1096 + %xor1098 = xor i32 %and1097, %add918 + %arrayidx1100 = getelementptr inbounds i32, ptr %k.addr.12214, i32 10 + %load99 = load i32, ptr %arrayidx1100 + %or1106 = tail call i32 @llvm.fshl.i32(i32 %add988, i32 %add988, i32 15) + %or1111 = tail call i32 @llvm.fshl.i32(i32 %add988, i32 %add988, i32 13) + %xor1112 = xor i32 %or1106, %or1111 + %shr1114 = lshr i32 %add988, 10 + %xor1115 = xor i32 %xor1112, %shr1114 + %or1122 = tail call i32 @llvm.fshl.i32(i32 %add12072237, i32 %add12072237, i32 25) + %or1127 = tail call i32 @llvm.fshl.i32(i32 %add12072237, i32 %add12072237, i32 14) + %xor1128 = xor i32 %or1122, %or1127 + %shr1130 = lshr i32 %add12072237, 3 + %xor1131 = xor i32 %xor1128, %shr1130 + %add1117 = add i32 %xor1131, %add11342235 + %add1132 = add i32 %add1117, %add623 + %add1134 = add i32 %add1132, %xor1115 + %add1099 = add i32 %add1134, %load99 + %add1101 = add i32 %add1099, %add845 + %add1135 = add i32 %add1101, %xor1098 + %add1136 = add i32 %add1135, %xor1095 + %add1137 = add i32 %add1136, %add862 + %or1140 = tail call i32 @llvm.fshl.i32(i32 %add1081, i32 %add1081, i32 30) + %or1143 = tail call i32 @llvm.fshl.i32(i32 %add1081, i32 %add1081, i32 19) + %xor1144 = xor i32 %or1140, %or1143 + %or1147 = tail call i32 @llvm.fshl.i32(i32 %add1081, i32 %add1081, i32 10) + %xor1148 = xor i32 %xor1144, %or1147 + %and1149 = and i32 %add1081, %add1008 + %xor1150 = xor i32 %add1081, %add1008 + %and1151 = and i32 %xor1150, %add935 + %xor1152 = xor i32 %and1151, %and1149 + %add1153 = add i32 %xor1148, %xor1152 + %add1154 = add i32 %add1153, %add1136 + %or1160 = tail call i32 @llvm.fshl.i32(i32 %add1137, i32 %add1137, i32 26) + %or1163 = tail call i32 @llvm.fshl.i32(i32 %add1137, i32 %add1137, i32 21) + %xor1164 = xor i32 %or1160, %or1163 + %or1167 = tail call i32 @llvm.fshl.i32(i32 %add1137, i32 %add1137, i32 7) + %xor1168 = xor i32 %xor1164, %or1167 + %xor1169 = xor i32 %add1064, %add991 + %and1170 = and i32 %add1137, %xor1169 + %xor1171 = xor i32 %and1170, %add991 + %arrayidx1173 = getelementptr inbounds i32, ptr %k.addr.12214, i32 11 + %load100 = load i32, ptr %arrayidx1173 + %or1179 = tail call i32 @llvm.fshl.i32(i32 %add1061, i32 %add1061, i32 15) + %or1184 = tail call i32 @llvm.fshl.i32(i32 %add1061, i32 %add1061, i32 13) + %xor1185 = xor i32 %or1179, %or1184 + %shr1187 = lshr i32 %add1061, 10 + %xor1188 = xor i32 %xor1185, %shr1187 + %or1195 = tail call i32 @llvm.fshl.i32(i32 %add12802239, i32 %add12802239, i32 25) + %or1200 = tail call i32 @llvm.fshl.i32(i32 %add12802239, i32 %add12802239, i32 14) + %xor1201 = xor i32 %or1195, %or1200 + %shr1203 = lshr i32 %add12802239, 3 + %xor1204 = xor i32 %xor1201, %shr1203 + %add1190 = add i32 %xor1204, %add12072237 + %add1205 = add i32 %add1190, %add696 + %add1207 = add i32 %add1205, %xor1188 + %add1172 = add i32 %add1207, %load100 + %add1174 = add i32 %add1172, %add918 + %add1208 = add i32 %add1174, %xor1171 + %add1209 = add i32 %add1208, %xor1168 + %add1210 = add i32 %add1209, %add935 + %or1213 = tail call i32 @llvm.fshl.i32(i32 %add1154, i32 %add1154, i32 30) + %or1216 = tail call i32 @llvm.fshl.i32(i32 %add1154, i32 %add1154, i32 19) + %xor1217 = xor i32 %or1213, %or1216 + %or1220 = tail call i32 @llvm.fshl.i32(i32 %add1154, i32 %add1154, i32 10) + %xor1221 = xor i32 %xor1217, %or1220 + %and1222 = and i32 %add1154, %add1081 + %xor1223 = xor i32 %add1154, %add1081 + %and1224 = and i32 %xor1223, %add1008 + %xor1225 = xor i32 %and1224, %and1222 + %add1226 = add i32 %xor1221, %xor1225 + %add1227 = add i32 %add1226, %add1209 + %or1233 = tail call i32 @llvm.fshl.i32(i32 %add1210, i32 %add1210, i32 26) + %or1236 = tail call i32 @llvm.fshl.i32(i32 %add1210, i32 %add1210, i32 21) + %xor1237 = xor i32 %or1233, %or1236 + %or1240 = tail call i32 @llvm.fshl.i32(i32 %add1210, i32 %add1210, i32 7) + %xor1241 = xor i32 %xor1237, %or1240 + %xor1242 = xor i32 %add1137, %add1064 + %and1243 = and i32 %add1210, %xor1242 + %xor1244 = xor i32 %and1243, %add1064 + %arrayidx1246 = getelementptr inbounds i32, ptr %k.addr.12214, i32 12 + %load101 = load i32, ptr %arrayidx1246 + %or1252 = tail call i32 @llvm.fshl.i32(i32 %add1134, i32 %add1134, i32 15) + %or1257 = tail call i32 @llvm.fshl.i32(i32 %add1134, i32 %add1134, i32 13) + %xor1258 = xor i32 %or1252, %or1257 + %shr1260 = lshr i32 %add1134, 10 + %xor1261 = xor i32 %xor1258, %shr1260 + %or1268 = tail call i32 @llvm.fshl.i32(i32 %add13532241, i32 %add13532241, i32 25) + %or1273 = tail call i32 @llvm.fshl.i32(i32 %add13532241, i32 %add13532241, i32 14) + %xor1274 = xor i32 %or1268, %or1273 + %shr1276 = lshr i32 %add13532241, 3 + %xor1277 = xor i32 %xor1274, %shr1276 + %add1263 = add i32 %xor1277, %add12802239 + %add1278 = add i32 %add1263, %add769 + %add1280 = add i32 %add1278, %xor1261 + %add1245 = add i32 %add1280, %load101 + %add1247 = add i32 %add1245, %add991 + %add1281 = add i32 %add1247, %xor1244 + %add1282 = add i32 %add1281, %xor1241 + %add1283 = add i32 %add1282, %add1008 + %or1286 = tail call i32 @llvm.fshl.i32(i32 %add1227, i32 %add1227, i32 30) + %or1289 = tail call i32 @llvm.fshl.i32(i32 %add1227, i32 %add1227, i32 19) + %xor1290 = xor i32 %or1286, %or1289 + %or1293 = tail call i32 @llvm.fshl.i32(i32 %add1227, i32 %add1227, i32 10) + %xor1294 = xor i32 %xor1290, %or1293 + %and1295 = and i32 %add1227, %add1154 + %xor1296 = xor i32 %add1227, %add1154 + %and1297 = and i32 %xor1296, %add1081 + %xor1298 = xor i32 %and1297, %and1295 + %add1299 = add i32 %xor1294, %xor1298 + %add1300 = add i32 %add1299, %add1282 + %or1306 = tail call i32 @llvm.fshl.i32(i32 %add1283, i32 %add1283, i32 26) + %or1309 = tail call i32 @llvm.fshl.i32(i32 %add1283, i32 %add1283, i32 21) + %xor1310 = xor i32 %or1306, %or1309 + %or1313 = tail call i32 @llvm.fshl.i32(i32 %add1283, i32 %add1283, i32 7) + %xor1314 = xor i32 %xor1310, %or1313 + %xor1315 = xor i32 %add1210, %add1137 + %and1316 = and i32 %add1283, %xor1315 + %xor1317 = xor i32 %and1316, %add1137 + %arrayidx1319 = getelementptr inbounds i32, ptr %k.addr.12214, i32 13 + %load102 = load i32, ptr %arrayidx1319 + %or1325 = tail call i32 @llvm.fshl.i32(i32 %add1207, i32 %add1207, i32 15) + %or1330 = tail call i32 @llvm.fshl.i32(i32 %add1207, i32 %add1207, i32 13) + %xor1331 = xor i32 %or1325, %or1330 + %shr1333 = lshr i32 %add1207, 10 + %xor1334 = xor i32 %xor1331, %shr1333 + %or1341 = tail call i32 @llvm.fshl.i32(i32 %add14262231, i32 %add14262231, i32 25) + %or1346 = tail call i32 @llvm.fshl.i32(i32 %add14262231, i32 %add14262231, i32 14) + %xor1347 = xor i32 %or1341, %or1346 + %shr1349 = lshr i32 %add14262231, 3 + %xor1350 = xor i32 %xor1347, %shr1349 + %add1336 = add i32 %add13532241, %xor1350 + %add1351 = add i32 %add1336, %add842 + %add1353 = add i32 %add1351, %xor1334 + %add1318 = add i32 %add1353, %load102 + %add1320 = add i32 %add1318, %add1064 + %add1354 = add i32 %add1320, %xor1317 + %add1355 = add i32 %add1354, %xor1314 + %add1356 = add i32 %add1355, %add1081 + %or1359 = tail call i32 @llvm.fshl.i32(i32 %add1300, i32 %add1300, i32 30) + %or1362 = tail call i32 @llvm.fshl.i32(i32 %add1300, i32 %add1300, i32 19) + %xor1363 = xor i32 %or1359, %or1362 + %or1366 = tail call i32 @llvm.fshl.i32(i32 %add1300, i32 %add1300, i32 10) + %xor1367 = xor i32 %xor1363, %or1366 + %and1368 = and i32 %add1300, %add1227 + %xor1369 = xor i32 %add1300, %add1227 + %and1370 = and i32 %xor1369, %add1154 + %xor1371 = xor i32 %and1370, %and1368 + %add1372 = add i32 %xor1367, %xor1371 + %add1373 = add i32 %add1372, %add1355 + %or1379 = tail call i32 @llvm.fshl.i32(i32 %add1356, i32 %add1356, i32 26) + %or1382 = tail call i32 @llvm.fshl.i32(i32 %add1356, i32 %add1356, i32 21) + %xor1383 = xor i32 %or1379, %or1382 + %or1386 = tail call i32 @llvm.fshl.i32(i32 %add1356, i32 %add1356, i32 7) + %xor1387 = xor i32 %xor1383, %or1386 + %xor1388 = xor i32 %add1283, %add1210 + %and1389 = and i32 %add1356, %xor1388 + %xor1390 = xor i32 %and1389, %add1210 + %arrayidx1392 = getelementptr inbounds i32, ptr %k.addr.12214, i32 14 + %load103 = load i32, ptr %arrayidx1392 + %or1398 = tail call i32 @llvm.fshl.i32(i32 %add1280, i32 %add1280, i32 15) + %or1403 = tail call i32 @llvm.fshl.i32(i32 %add1280, i32 %add1280, i32 13) + %xor1404 = xor i32 %or1398, %or1403 + %shr1406 = lshr i32 %add1280, 10 + %xor1407 = xor i32 %xor1404, %shr1406 + %or1414 = tail call i32 @llvm.fshl.i32(i32 %add14992234, i32 %add14992234, i32 25) + %or1419 = tail call i32 @llvm.fshl.i32(i32 %add14992234, i32 %add14992234, i32 14) + %xor1420 = xor i32 %or1414, %or1419 + %shr1422 = lshr i32 %add14992234, 3 + %xor1423 = xor i32 %xor1420, %shr1422 + %add1409 = add i32 %xor1423, %add14262231 + %add1424 = add i32 %add1409, %add915 + %add1426 = add i32 %add1424, %xor1407 + %add1391 = add i32 %add1426, %load103 + %add1393 = add i32 %add1391, %add1137 + %add1427 = add i32 %add1393, %xor1390 + %add1428 = add i32 %add1427, %xor1387 + %add1429 = add i32 %add1428, %add1154 + %or1432 = tail call i32 @llvm.fshl.i32(i32 %add1373, i32 %add1373, i32 30) + %or1435 = tail call i32 @llvm.fshl.i32(i32 %add1373, i32 %add1373, i32 19) + %xor1436 = xor i32 %or1432, %or1435 + %or1439 = tail call i32 @llvm.fshl.i32(i32 %add1373, i32 %add1373, i32 10) + %xor1440 = xor i32 %xor1436, %or1439 + %and1441 = and i32 %add1373, %add1300 + %xor1442 = xor i32 %add1373, %add1300 + %and1443 = and i32 %xor1442, %add1227 + %xor1444 = xor i32 %and1443, %and1441 + %add1445 = add i32 %xor1440, %xor1444 + %add1446 = add i32 %add1445, %add1428 + %or1452 = tail call i32 @llvm.fshl.i32(i32 %add1429, i32 %add1429, i32 26) + %or1455 = tail call i32 @llvm.fshl.i32(i32 %add1429, i32 %add1429, i32 21) + %xor1456 = xor i32 %or1452, %or1455 + %or1459 = tail call i32 @llvm.fshl.i32(i32 %add1429, i32 %add1429, i32 7) + %xor1460 = xor i32 %xor1456, %or1459 + %xor1461 = xor i32 %add1356, %add1283 + %and1462 = and i32 %add1429, %xor1461 + %xor1463 = xor i32 %and1462, %add1283 + %arrayidx1465 = getelementptr inbounds i32, ptr %k.addr.12214, i32 15 + %load104 = load i32, ptr %arrayidx1465 + %or1471 = tail call i32 @llvm.fshl.i32(i32 %add1353, i32 %add1353, i32 15) + %or1476 = tail call i32 @llvm.fshl.i32(i32 %add1353, i32 %add1353, i32 13) + %xor1477 = xor i32 %or1471, %or1476 + %shr1479 = lshr i32 %add1353, 10 + %xor1480 = xor i32 %xor1477, %shr1479 + %or1487 = tail call i32 @llvm.fshl.i32(i32 %add404, i32 %add404, i32 25) + %or1492 = tail call i32 @llvm.fshl.i32(i32 %add404, i32 %add404, i32 14) + %xor1493 = xor i32 %or1487, %or1492 + %shr1495 = lshr i32 %add404, 3 + %xor1496 = xor i32 %xor1493, %shr1495 + %add1482 = add i32 %xor1496, %add14992234 + %add1497 = add i32 %add1482, %add988 + %add1499 = add i32 %add1497, %xor1480 + %add1464 = add i32 %add1499, %load104 + %add1466 = add i32 %add1464, %add1210 + %add1500 = add i32 %add1466, %xor1463 + %add1501 = add i32 %add1500, %xor1460 + %add1502 = add i32 %add1501, %add1227 + %or1505 = tail call i32 @llvm.fshl.i32(i32 %add1446, i32 %add1446, i32 30) + %or1508 = tail call i32 @llvm.fshl.i32(i32 %add1446, i32 %add1446, i32 19) + %xor1509 = xor i32 %or1505, %or1508 + %or1512 = tail call i32 @llvm.fshl.i32(i32 %add1446, i32 %add1446, i32 10) + %xor1513 = xor i32 %xor1509, %or1512 + %and1514 = and i32 %add1446, %add1373 + %xor1515 = xor i32 %add1446, %add1373 + %and1516 = and i32 %xor1515, %add1300 + %xor1517 = xor i32 %and1516, %and1514 + %add1518 = add i32 %xor1513, %xor1517 + %add1519 = add i32 %add1518, %add1501 + %add1523 = add nuw nsw i32 %i.22223, 16 + %add.ptr1524 = getelementptr inbounds i32, ptr %k.addr.12214, i32 16 + %cmp351 = icmp ult i32 %i.22223, 48 + br i1 %cmp351, label %do.body, label %end + +end: + ret void +} + declare i8 @llvm.umin.i8(i8 %a, i8 %b) declare i16 @llvm.umin.i16(i16 %a, i16 %b) declare i32 @llvm.umin.i32(i32 %a, i32 %b) @@ -1092,6 +2377,7 @@ declare half @llvm.maxnum.f16(half, half) declare float @llvm.maxnum.f32(float, float) declare double @llvm.maxnum.f64(double, double) +declare i32 @llvm.fshl.i32(i32, i32, i32) define double @test_fmadd_strategy(double %a0, double %a1, double %a2, double %a3, i64 %flag) { ; CHECK_LOCAL-LABEL: test_fmadd_strategy: @@ -1100,12 +2386,12 @@ ; CHECK_LOCAL-NEXT: fsub.d fa4, fa0, fa1 ; CHECK_LOCAL-NEXT: fmul.d fa0, fa4, fa2 ; CHECK_LOCAL-NEXT: andi a0, a0, 1 -; CHECK_LOCAL-NEXT: beqz a0, .LBB76_2 +; CHECK_LOCAL-NEXT: beqz a0, .LBB77_2 ; CHECK_LOCAL-NEXT: # %bb.1: # %entry ; CHECK_LOCAL-NEXT: fmul.d fa4, fa5, fa1 ; CHECK_LOCAL-NEXT: fmadd.d fa5, fa5, fa1, fa0 ; CHECK_LOCAL-NEXT: fsub.d fa0, fa5, fa4 -; CHECK_LOCAL-NEXT: .LBB76_2: # %entry +; CHECK_LOCAL-NEXT: .LBB77_2: # %entry ; CHECK_LOCAL-NEXT: ret ; ; CHECK_GLOBAL-LABEL: test_fmadd_strategy: @@ -1114,12 +2400,12 @@ ; CHECK_GLOBAL-NEXT: fsub.d fa4, fa0, fa1 ; CHECK_GLOBAL-NEXT: fmul.d fa0, fa4, fa2 ; CHECK_GLOBAL-NEXT: andi a0, a0, 1 -; CHECK_GLOBAL-NEXT: beqz a0, .LBB76_2 +; CHECK_GLOBAL-NEXT: beqz a0, .LBB77_2 ; CHECK_GLOBAL-NEXT: # %bb.1: # %entry ; CHECK_GLOBAL-NEXT: fmul.d fa5, fa5, fa1 ; CHECK_GLOBAL-NEXT: fadd.d fa4, fa5, fa0 ; CHECK_GLOBAL-NEXT: fsub.d fa0, fa4, fa5 -; CHECK_GLOBAL-NEXT: .LBB76_2: # %entry +; CHECK_GLOBAL-NEXT: .LBB77_2: # %entry ; CHECK_GLOBAL-NEXT: ret entry: %sub = fsub contract double %a0, %a1