Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -134,6 +134,7 @@ void initializeEarlyTailDuplicatePass(PassRegistry&); void initializeEdgeBundlesPass(PassRegistry&); void initializeEfficiencySanitizerPass(PassRegistry&); +void initializeElevatingLegacyPassPass(PassRegistry&); void initializeEliminateAvailableExternallyLegacyPassPass(PassRegistry&); void initializeEntryExitInstrumenterPass(PassRegistry&); void initializeExpandISelPseudosPass(PassRegistry&); Index: include/llvm/LinkAllPasses.h =================================================================== --- include/llvm/LinkAllPasses.h +++ include/llvm/LinkAllPasses.h @@ -200,6 +200,7 @@ (void) llvm::createModuleDebugInfoPrinterPass(); (void) llvm::createPartialInliningPass(); (void) llvm::createLintPass(); + (void) llvm::createElevatingPass(); (void) llvm::createSinkingPass(); (void) llvm::createLowerAtomicPass(); (void) llvm::createCorrelatedValuePropagationPass(); Index: include/llvm/Transforms/Scalar.h =================================================================== --- include/llvm/Transforms/Scalar.h +++ include/llvm/Transforms/Scalar.h @@ -341,6 +341,12 @@ //===----------------------------------------------------------------------===// // +// Elevate - Code Elevating +// +FunctionPass *createElevatingPass(); + +//===----------------------------------------------------------------------===// +// // Sink - Code Sinking // FunctionPass *createSinkingPass(); Index: include/llvm/Transforms/Scalar/Elevate.h =================================================================== --- /dev/null +++ include/llvm/Transforms/Scalar/Elevate.h @@ -0,0 +1,34 @@ +//===-- Elevate.h - Code Elevation ------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass moves instructions into predecessor blocks, when possible, so that +// we can do better constant folding. This is useful when variables are +// assigned constant values in multiple predecessor blocks and an expression is +// evaluated in the current block based on that. In that case, moving the +// expression evaluation to the predecessor blocks makes it a compile time +// constant folding operation. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_ELEVATE_H +#define LLVM_TRANSFORMS_SCALAR_ELEVATE_H + +#include "llvm/IR/Function.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { + +/// Move instructions into predecessor blocks when possible. +class ElevatingPass : public PassInfoMixin { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; +} + +#endif // LLVM_TRANSFORMS_SCALAR_ELEVATE_H Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -42,6 +42,7 @@ #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" #include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Vectorize.h" +#include "llvm/Transforms/Scalar/Elevate.h" using namespace llvm; @@ -269,6 +270,10 @@ FPM.add(createSROAPass()); FPM.add(createEarlyCSEPass()); FPM.add(createLowerExpectIntrinsicPass()); + FPM.add(createCFGSimplificationPass(1, true, true, false, true)); + FPM.add(createElevatingPass()); + FPM.add(createConstantPropagationPass()); + FPM.add(createElevatingPass()); } // Do PGO instrumentation generation or use pass as the option specified. Index: lib/Transforms/Scalar/CMakeLists.txt =================================================================== --- lib/Transforms/Scalar/CMakeLists.txt +++ lib/Transforms/Scalar/CMakeLists.txt @@ -10,6 +10,7 @@ DeadStoreElimination.cpp DivRemPairs.cpp EarlyCSE.cpp + Elevate.cpp FlattenCFGPass.cpp Float2Int.cpp GuardWidening.cpp Index: lib/Transforms/Scalar/Elevate.cpp =================================================================== --- /dev/null +++ lib/Transforms/Scalar/Elevate.cpp @@ -0,0 +1,441 @@ +//===-- Elevate.cpp - Code Elevation --------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass moves instructions into predecessor blocks, when possible, so that +// we can do better constant folding. This is useful when variables are +// assigned constant values in multiple predecessor blocks and an expression is +// evaluated in the current block based on that. In that case, moving the +// expression evaluation to the predecessor blocks makes it a compile time +// constant folding operation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar/Elevate.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" +#include +using namespace llvm; +using std::vector; + +#define DEBUG_TYPE "elevate" + +STATISTIC(NumElevated, "Number of instructions elevated"); +STATISTIC(NumElevateIter, "Number of elevation iterations"); + +static bool isSafeToMove(Instruction *Inst, AliasAnalysis &AA, + SmallPtrSetImpl &Stores) { + + if (Inst->mayWriteToMemory()) { + Stores.insert(Inst); + return false; + } + + if (LoadInst *L = dyn_cast(Inst)) { + MemoryLocation Loc = MemoryLocation::get(L); + for (Instruction *S : Stores) + if (isModSet(AA.getModRefInfo(S, Loc))) + return false; + } + + if (Inst->isTerminator() || isa(Inst) || Inst->isEHPad() || + Inst->mayThrow()) + return false; + + if (auto CS = CallSite(Inst)) { + // Convergent operations cannot be made control-dependent on additional + // values. + if (CS.hasFnAttr(Attribute::Convergent)) + return false; + + for (Instruction *S : Stores) + if (isModSet(AA.getModRefInfo(S, CS))) + return false; + } + + return true; +} + +/// IsAcceptableTarget - Return true if it is possible to elevate the +/// instruction to the specified basic block. +static bool IsAcceptableTarget(Instruction *Inst, BasicBlock *PredToElevateTo, + DominatorTree &DT, LoopInfo &LI) { + assert(Inst && "Instruction to be elevated is null"); + assert(PredToElevateTo && "Candidate elevate target is null"); + + // It is not possible to elevate an instruction into its own block. This can + // happen with loops. + if (Inst->getParent() == PredToElevateTo) + return false; + + // It's never legal to elevate an instruction into a block which terminates in an + // EH-pad. + if (PredToElevateTo->getTerminator()->isExceptionalTerminator()) + return false; + + // We cannot elevate a load across a critical edge - there may be stores in + // other code paths. + if (Inst->mayReadFromMemory()) + return false; + + // Don't elevate instructions into a loop. + Loop *pred = LI.getLoopFor(PredToElevateTo); + Loop *cur = LI.getLoopFor(Inst->getParent()); + if (cur != nullptr) + return false; + if (pred != nullptr && pred != cur) + return false; + + // Finally, check that all the defs for the uses of the instruction are + // actually dominated by the candidate + // return AllDefsDominateBlock(Inst, PredToElevateTo, DT); + return true; +} + +static bool blocksVectorMatches(vector predBlocksVector, + vector> inBlocksVector) { + if (predBlocksVector.size() == 0) + return false; + for (size_t i = 0; i < inBlocksVector.size(); i++) + if (inBlocksVector[i].size() == 0) + return false; + + for (size_t i = 0; i < inBlocksVector.size(); i++) + if (predBlocksVector != inBlocksVector[i] && + inBlocksVector[i].size() > 1) + return false; + return true; +} + +static bool validateBlocksVector(vector> blocksVector) { + if (blocksVector.size() == 0) + return false; + + vector CurBlocksVector; + size_t i; + if (blocksVector[0].size() == 1) { + CurBlocksVector = blocksVector[1]; + i = 2; + } else { + CurBlocksVector = blocksVector[0]; + i = 1; + } + assert(CurBlocksVector.size() > 1 && + "Block vector size should be greater than 1"); + + for (; i < blocksVector.size(); i++) + if (blocksVector[i] != CurBlocksVector && + blocksVector[i].size() > 1) + return false; + return true; +} + +static vector getVectorOfPredBlocks( + Instruction *Inst) { + vector bbVector; + + BasicBlock *curBlock = Inst->getParent(); + for (pred_iterator PI = pred_begin(curBlock), E = pred_end(curBlock); + PI != E; PI++) + bbVector.push_back(*PI); + + reverse(bbVector.begin(), bbVector.end()); + return bbVector; +} + +static vector> getVectorsOfIncomingBlocksOfPhiOperands( + Instruction *Inst) { + vector> bbVectors; + + for (Value *Op : Inst->operands()) { + Instruction *DefInst = dyn_cast(Op); + if (DefInst) { + if (PHINode *PN = dyn_cast(DefInst)) { + vector bbVec; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + BasicBlock *bb = PN->getIncomingBlock(i); + bbVec.push_back(bb); + } + bbVectors.push_back(bbVec); + } + else if(DefInst->getParent() != Inst->getParent()) { + vector bbVec; + bbVec.push_back(DefInst->getParent()); + bbVectors.push_back(bbVec); + } + } else { + ConstantInt *CI = dyn_cast(Op); + if (CI) { + vector bbVec; + bbVec.push_back(Inst->getParent()); + bbVectors.push_back(bbVec); + } + } + } + + return bbVectors; +} + +static bool allOperandDefsArePhiWithConstOperands(Instruction *Inst, + vector> &Vals) { + unsigned j = 0; + bool SeenPHIOperand = false; + for (Value *Op : Inst->operands()) { + vector V; + Vals.push_back(V); + Instruction *DefInst = dyn_cast(Op); + if (DefInst) { + if (PHINode *PN = dyn_cast(DefInst)) { + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *PHIOp = PN->getIncomingValue(i); + ConstantInt *CI = dyn_cast(PHIOp); + if (CI == NULL) { + SelectInst *Sel = dyn_cast(PHIOp); + if (Sel == NULL) { + Instruction *OpInst = dyn_cast(PHIOp); + PHINode *PN1 = dyn_cast(PHIOp); + if (OpInst && OpInst->getParent() != Inst->getParent() && PN1 == NULL) + Vals[j].push_back(PHIOp); + else + return false; + } else { + Value *TValue = Sel->getTrueValue(); + Value *FValue = Sel->getFalseValue(); + ConstantInt *CITrue = dyn_cast(TValue); + ConstantInt *CIFalse = dyn_cast(FValue); + + if (CITrue && CIFalse) + Vals[j].push_back(PHIOp); + else + return false; + } + } else + Vals[j].push_back(PHIOp); + } + SeenPHIOperand = true; + } else if (DefInst->getParent() != Inst->getParent()) { + Vals[j].push_back(Op); + } else + return false; + } else if (ConstantInt *CI = dyn_cast(Op)) { + Vals[j].push_back(Op); + } else if (Argument *Arg = dyn_cast(Op)) { + Vals[j].push_back(Op); + } else { + return false; + } + j++; + } + + if (! SeenPHIOperand) + return false; + + size_t i, MaxVecSize = 0; + for (i = 0; i < Vals.size(); i++) + if (Vals[i].size() > MaxVecSize) + MaxVecSize = Vals[i].size(); + + // Here we will take care of equalizing the number of elements only for a + // ConstantInt operand of Inst. If there are other PHI operands and the + // number of elements in the PHI operands do not match the MaxVecSize, we + // handle them in validateBlocksVector() and blocksVectorMatches(). + for (i = 0; i < Vals.size(); i++) + if (Vals[i].size() == 1) + Vals[i].insert(Vals[i].end(), MaxVecSize - 1, Vals[i][0]); + + return true; +} + +/// ElevateInstruction - Determine whether it is safe to elevate the specified machine +/// instruction out of its current block into a predecessor. +static bool ElevateInstruction(Instruction *Inst, + SmallPtrSetImpl &Stores, + DominatorTree &DT, LoopInfo &LI, AAResults &AA) { + + // Don't elevate static alloca instructions. CodeGen assumes allocas outside the + // entry block are dynamically sized stack objects. + if (AllocaInst *AI = dyn_cast(Inst)) + if (AI->isStaticAlloca()) + return false; + + // Check if it's safe to move the instruction. + if (!isSafeToMove(Inst, AA, Stores)) + return false; + + vector> PhiValues; + if (! allOperandDefsArePhiWithConstOperands(Inst, PhiValues)) + return false; + + vector> phiOperandsIncomingBlocksVector = + getVectorsOfIncomingBlocksOfPhiOperands(Inst); + + if (! validateBlocksVector(phiOperandsIncomingBlocksVector)) + return false; + + vector predBlocksVector = getVectorOfPredBlocks(Inst); + + if (! blocksVectorMatches(predBlocksVector, phiOperandsIncomingBlocksVector)) + return false; + + for (BasicBlock *BB : predBlocksVector) + if (! IsAcceptableTarget(Inst, BB, DT, LI)) + return false; + + // Now we are ready to move the instruction to its predecessors. Iterate + // over the predecessors, and for the I-th predecessor, first duplicate the + // instruction and then replace the operands with the I-th definition of the + // operands in the vector of phi-defs. Then move the instruction to the end + // of the I-th predecessor. Once the instruction has been moved to all its + // predecessors, delete it from the current block. + PHINode *newPhi = PHINode::Create(Inst->getType(), 0, "phi.elevated.insn", + Inst->getParent()->getFirstNonPHI()); + unsigned i = 0; + for (BasicBlock *BB : predBlocksVector) { + Instruction *newInst = Inst->clone(); + for (unsigned j = 0; j < Inst->getNumOperands(); j++) { + Value *Val = PhiValues[j][i]; + newInst->setOperand(j, Val); + } + newInst->insertBefore(BB->getTerminator()); + newPhi->addIncoming(newInst, BB); + LLVM_DEBUG(dbgs() << "Elevate" << *Inst << " ("; + Inst->getParent()->printAsOperand(dbgs(), false); + dbgs() << " -> "; + BB->printAsOperand(dbgs(), false); + dbgs() << " To " << *newInst; + dbgs() << ")\n"; + dbgs() << *newPhi <<"\n"); + i++; + } + + Inst->replaceAllUsesWith(newPhi); + Inst->removeFromParent(); + + return true; +} + +static bool ProcessBlock(BasicBlock &BB, DominatorTree &DT, LoopInfo &LI, + AAResults &AA) { + // No point in elevating anything out of a block that has less than two + // predecessors. + int numPreds = 0; + for (pred_iterator PI = pred_begin(&BB), E = pred_end(&BB); PI != E; PI++) + numPreds++; + if (numPreds <= 1) return false; + + // Don't bother elevating code out of unreachable blocks. + if (!DT.isReachableFromEntry(&BB)) return false; + + bool MadeChange = false; + + // Walk the basic block top-down. Remember if we saw a store. + BasicBlock::iterator I = BB.begin(); + SmallPtrSet Stores; + do { + Instruction *Inst = &*I; // The instruction to elevate. + + // Preincrement I so that it isn't invalidated by elevating. + ++I; + + if(isa(Inst) || isa(Inst) || isa(Inst)) + continue; + + if (ElevateInstruction(Inst, Stores, DT, LI, AA)) { + ++NumElevated; + MadeChange = true; + Inst->deleteValue(); // Since we have elevated the instruction we + // delete it. We keep it alive till this point + // so that any bookkeeping can take place, if + // needed. + } + + // If we just processed the first instruction in the block, we're done. + } while (I != BB.end()); + + return MadeChange; +} + +static bool iterativelyElevateInstructions(Function &F, DominatorTree &DT, + LoopInfo &LI, AAResults &AA) { + bool MadeChange, EverMadeChange = false; + + do { + MadeChange = false; + LLVM_DEBUG(dbgs() << "Elevating iteration " << NumElevateIter << "\n"); + // Process all basic blocks. + + for (BasicBlock &I : F) + MadeChange |= ProcessBlock(I, DT, LI, AA); + EverMadeChange |= MadeChange; + NumElevateIter++; + } while (MadeChange); + + return EverMadeChange; +} + +PreservedAnalyses ElevatingPass::run(Function &F, FunctionAnalysisManager &AM) { + auto &DT = AM.getResult(F); + auto &LI = AM.getResult(F); + auto &AA = AM.getResult(F); + + if (!iterativelyElevateInstructions(F, DT, LI, AA)) + return PreservedAnalyses::all(); + + PreservedAnalyses PA; + PA.preserveSet(); + return PA; +} + +namespace { + class ElevatingLegacyPass : public FunctionPass { + public: + static char ID; // Pass identification + ElevatingLegacyPass() : FunctionPass(ID) { + initializeElevatingLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + auto &DT = getAnalysis().getDomTree(); + auto &LI = getAnalysis().getLoopInfo(); + auto &AA = getAnalysis().getAAResults(); + + return iterativelyElevateInstructions(F, DT, LI, AA); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + FunctionPass::getAnalysisUsage(AU); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); + } + }; +} // end anonymous namespace + +char ElevatingLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(ElevatingLegacyPass, "elevate", "Code elevation", false, false) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_END(ElevatingLegacyPass, "elevate", "Code elevation", false, false) + +FunctionPass *llvm::createElevatingPass() { return new ElevatingLegacyPass(); } Index: lib/Transforms/Scalar/Scalar.cpp =================================================================== --- lib/Transforms/Scalar/Scalar.cpp +++ lib/Transforms/Scalar/Scalar.cpp @@ -51,6 +51,7 @@ initializeNewGVNLegacyPassPass(Registry); initializeEarlyCSELegacyPassPass(Registry); initializeEarlyCSEMemSSALegacyPassPass(Registry); + initializeElevatingLegacyPassPass(Registry); initializeGVNHoistLegacyPassPass(Registry); initializeGVNSinkLegacyPassPass(Registry); initializeFlattenCFGPassPass(Registry); Index: test/Transforms/Elevate/elevate-if-else.ll =================================================================== --- /dev/null +++ test/Transforms/Elevate/elevate-if-else.ll @@ -0,0 +1,132 @@ +; RUN: opt < %s -O3 -elevate -S | FileCheck %s + +%struct.foo = type opaque +%struct.bar = type { i8*, i64, i64 } + +define void @foo_bar(%struct.foo* %p, i32 %c, %struct.bar* %o) { +entry: + %cmp = icmp slt i32 %c, 128 + br i1 %cmp, label %if.end8, label %if.else +; CHECK-LABEL: entry +; CHECK-NEXT: %cmp = icmp slt +; CHECK-NEXT: %0 = trunc i32 %c to i8 + +if.else: + %cmp1 = icmp slt i32 %c, 2048 + br i1 %cmp1, label %if.end8, label %if.else3 +; CHECK-LABEL: if.else +; CHECK: %cmp1 = icmp slt +; CHECK-NEXT: %1 = lshr i32 %c, 6 +; CHECK-NEXT: %2 = trunc i32 %1 to i8 +; CHECK-NEXT: %3 = or i8 %2, -64 + +if.else3: + %cmp4 = icmp slt i32 %c, 65536 + %. = select i1 %cmp4, i32 2, i32 3 + %.35 = select i1 %cmp4, i32 224, i32 240 + br label %if.end8 +; CHECK-LABEL: if.else3 +; CHECK: %cmp4 = icmp slt +; CHECK-NEXT: %. = select i1 %cmp4, i32 2, i32 3 +; CHECK-NEXT: %.35 = select i1 %cmp4, i32 224, i32 240 +; CHECK-NEXT: %4 = add nuw nsw i32 %., 1 +; CHECK-NEXT: %5 = zext i32 %4 to i64 +; CHECK-NEXT: %6 = mul nuw nsw i32 %., 6 +; CHECK-NEXT: %7 = lshr i32 %c, %6 +; CHECK-NEXT: %8 = or i32 %7, %.35 +; CHECK-NEXT: %9 = trunc i32 %8 to i8 + +if.end8: + %foobar.0 = phi i32 [ 0, %entry ], [ 1, %if.else ], [ %., %if.else3 ] + %prefix.0 = phi i32 [ 0, %entry ], [ 192, %if.else ], [ %.35, %if.else3 ] + %add = add nuw nsw i32 %foobar.0, 1 + %0 = zext i32 %add to i64 + tail call void @baz(%struct.foo* %p, i64 %0, %struct.bar* %o) + %mul = mul nuw nsw i32 %foobar.0, 6 + %shr = ashr i32 %c, %mul + %or = or i32 %shr, %prefix.0 + %conv9 = trunc i32 %or to i8 + %data = getelementptr inbounds %struct.bar, %struct.bar* %o, i64 0, i32 0 + %1 = load i8*, i8** %data + %length = getelementptr inbounds %struct.bar, %struct.bar* %o, i64 0, i32 1 + %2 = load i64, i64* %length + %inc = add i64 %2, 1 + store i64 %inc, i64* %length + %arrayidx = getelementptr inbounds i8, i8* %1, i64 %2 + store i8 %conv9, i8* %arrayidx + %cmp1037 = icmp eq i32 %foobar.0, 0 + br i1 %cmp1037, label %for.cond.cleanup, label %for.body + +; CHECK-LABEL: if.end8 +; CHECK: %foobar.0 = phi i32 [ 0, %entry ], [ 1, %if.else ], [ %., %if.else3 ] +; CHECK-NOT: %prefix.0 = phi i32 [ 0, %entry ], [ 192, %if.else ], [ %.35, %if.else3 ] +; CHECK-NOT: %add = add nuw nsw i32 %foobar.0, 1 +; CHECK-NOT: %0 = zext i32 %add to i64 +; CHECK-NOT: tail call void @baz(%struct.foo* %p, i64 %0, %struct.bar* %o) +; CHECK-NOT: %mul = mul nuw nsw i32 %foobar.0, 6 +; CHECK-NOT: %shr = ashr i32 %c, %mul +; CHECK-NOT: %or = or i32 %shr, %prefix.0 +; CHECK-NOT: %conv9 = trunc i32 %or to i8 +; CHECK: %phi.elevated.insn1 = phi i64 [ 1, %entry ], [ 2, %if.else ], [ %5, %if.else3 ] +; CHECK-NEXT: %phi.elevated.insn2 = phi i32 [ 0, %entry ], [ 6, %if.else ], [ %6, %if.else3 ] +; CHECK-NEXT: %phi.elevated.insn5 = phi i8 [ %0, %entry ], [ %3, %if.else ], [ %9, %if.else3 ] +; CHECK-NEXT: %phi.elevated.insn6 = phi i1 [ true, %entry ], [ false, %if.else ], [ false, %if.else3 ] +; CHECK-NOT: %cmp1037 = icmp eq i32 %foobar.0, 0 + +for.cond.cleanup: + ret void + +for.body: + %3 = mul nuw nsw i32 %foobar.0, 6 + %mul12 = add nsw i32 %3, -6 + %shr13 = ashr i32 %c, %mul12 + %4 = trunc i32 %shr13 to i8 + %5 = and i8 %4, 63 + %conv15 = or i8 %5, -128 + %6 = load i8*, i8** %data + %7 = load i64, i64* %length + %inc18 = add i64 %7, 1 + store i64 %inc18, i64* %length + %arrayidx19 = getelementptr inbounds i8, i8* %6, i64 %7 + store i8 %conv15, i8* %arrayidx19 + %cmp10 = icmp ugt i32 %foobar.0, 1 + br i1 %cmp10, label %for.body.1, label %for.cond.cleanup + +; CHECK-LABEL: for.body +; CHECK-NOT: %3 = mul nuw nsw i32 %foobar.0, 6 + +for.body.1: + %8 = mul nuw nsw i32 %foobar.0, 6 + %mul12.1 = add nsw i32 %8, -12 + %shr13.1 = ashr i32 %c, %mul12.1 + %9 = trunc i32 %shr13.1 to i8 + %10 = and i8 %9, 63 + %conv15.1 = or i8 %10, -128 + %11 = load i8*, i8** %data + %12 = load i64, i64* %length + %inc18.1 = add i64 %12, 1 + store i64 %inc18.1, i64* %length + %arrayidx19.1 = getelementptr inbounds i8, i8* %11, i64 %12 + store i8 %conv15.1, i8* %arrayidx19.1 + %cmp10.1 = icmp eq i32 %foobar.0, 3 + br i1 %cmp10.1, label %for.body.2, label %for.cond.cleanup + +; CHECK-LABEL: for.body.1 +; CHECK-NOT: %8 = mul nuw nsw i32 %foobar.0, 6 + +for.body.2: + %mul12.2 = add nsw i32 %8, -18 + %shr13.2 = ashr i32 %c, %mul12.2 + %13 = trunc i32 %shr13.2 to i8 + %14 = and i8 %13, 63 + %conv15.2 = or i8 %14, -128 + %15 = load i8*, i8** %data + %16 = load i64, i64* %length + %inc18.2 = add i64 %16, 1 + store i64 %inc18.2, i64* %length + %arrayidx19.2 = getelementptr inbounds i8, i8* %15, i64 %16 + store i8 %conv15.2, i8* %arrayidx19.2 + br label %for.cond.cleanup +} + +declare void @baz(%struct.foo*, i64, %struct.bar*)