diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -6572,6 +6572,110 @@ return nullptr; } +static std::pair> +canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT, + const LoopInfo &LI) { + const std::pair> CantFold = { + false, {nullptr, nullptr}}; + + // Only inspect on simple loop structure + if (!L->isLoopSimplifyForm() || !L->isRecursivelyLCSSAForm(DT, LI) || + !L->isInnermost()) + return CantFold; + + BasicBlock *LoopLatch = L->getLoopLatch(); + BasicBlock *LoopPreheader = L->getLoopPreheader(); + + if (!LoopLatch || !LoopPreheader || !SE.hasLoopInvariantBackedgeTakenCount(L)) + return CantFold; + + // TODO: Can we do something for greater than and less than? + // Terminal condition is foldable when it is an eq/neq icmp + BranchInst *BI = cast(LoopLatch->getTerminator()); + if (BI->isUnconditional()) + return CantFold; + Value *TermCond = BI->getCondition(); + if (!isa(TermCond)) + return CantFold; + if (!cast(TermCond)->isEquality()) + return CantFold; + + std::function IsToFold = [&](PHINode &PN) -> bool { + if (PN.getNumIncomingValues() != 2) + return false; + Value *V = cast(&PN); + + while (V->getNumUses() == 1) + V = *V->user_begin(); + + if (V->getNumUses() != 2) + return false; + + Value *VToPN = nullptr; + Value *VToTermCond = nullptr; + for (User *U : V->users()) { + while (U->getNumUses() == 1) { + if (isa(U)) + VToPN = U; + if (U == TermCond) + VToTermCond = U; + U = *U->user_begin(); + } + } + return VToPN && VToTermCond; + }; + + std::function IsToHelpFold = [&](PHINode &PN) -> bool { + if (PN.getNumIncomingValues() != 2) + return false; + int FromPreheader = -1; + int FromLoopLatch = -1; + + for (unsigned I = 0; I < PN.getNumIncomingValues(); ++I) { + if (PN.getIncomingBlock(I) == LoopPreheader) + FromPreheader = I; + if (PN.getIncomingBlock(I) == LoopLatch) + FromLoopLatch = I; + } + + if (FromPreheader == -1 || FromLoopLatch == -1) + return false; + + Value *StartValue = PN.getIncomingValue(FromPreheader); + return isa(StartValue); + }; + + PHINode *ToFold = nullptr; + PHINode *ToHelpFold = nullptr; + + for (PHINode &PN : L->getHeader()->phis()) { + if (!SE.isSCEVable(PN.getType())) + continue; + const SCEV *S = SE.getSCEV(&PN); + const SCEVAddRecExpr *AddRec = dyn_cast(S); + // Only speculate on affine AddRec + if (!AddRec) + continue; + if (!AddRec->isAffine()) + continue; + + if (IsToFold(PN)) + ToFold = &PN; + else if (IsToHelpFold(PN)) + ToHelpFold = &PN; + } + + LLVM_DEBUG(if (ToFold && ToHelpFold) dbgs() + << "\nFound loop that can fold terminal condition\n" + << " BECount (SCEV): " << *SE.getBackedgeTakenCount(L) << "\n" + << " TermCond: " << *TermCond << "\n" + << " BrandInst: " << *BI << "\n" + << " ToFold: " << *ToFold << "\n" + << " ToHelpFold: " << *ToHelpFold << "\n"); + + return {ToFold != nullptr && ToHelpFold != nullptr, {ToFold, ToHelpFold}}; +} + static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT, LoopInfo &LI, const TargetTransformInfo &TTI, @@ -6630,6 +6734,93 @@ } } + auto CanFoldTerminalCondition = canFoldTermCondOfLoop(L, SE, DT, LI); + if (CanFoldTerminalCondition.first == true) { + Changed = true; + BasicBlock *LoopPreheader = L->getLoopPreheader(); + BasicBlock *LoopLatch = L->getLoopLatch(); + + PHINode *ToFold = CanFoldTerminalCondition.second.first; + PHINode *ToHelpFold = CanFoldTerminalCondition.second.second; + + LLVM_DEBUG(dbgs() << "To fold phi-node:\n" + << *ToFold << "\n" + << "New term-cond phi-node:\n" + << *ToHelpFold << "\n"); + + Value *StartValue = nullptr; + Value *LoopValue = nullptr; + + for (Use &U : ToHelpFold->operands()) { + if (ToHelpFold->getIncomingBlock(U) == LoopPreheader) + StartValue = cast(&U); + else + LoopValue = cast(&U); + } + + // SCEVExpander for both use in preheader and latch + const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); + SCEVExpander Expander(SE, DL, "lsr_fold_term_cond"); + SCEVExpanderCleaner ExpCleaner(Expander); + + // Create new terminal value at loop header + GetElementPtrInst *StartValueGEP = cast(StartValue); + Type *PtrTy = StartValueGEP->getPointerOperand()->getType(); + + const SCEV *BECount = SE.getBackedgeTakenCount(L); + const SCEVAddRecExpr *AddRec = cast(SE.getSCEV(ToHelpFold)); + + // TermValue = Start + Stride * (BackedgeCount + 1) + const SCEV *TermValueS = SE.getAddExpr( + AddRec->getOperand(0), + SE.getTruncateOrZeroExtend( + SE.getMulExpr( + AddRec->getOperand(1), + SE.getTruncateOrZeroExtend( + SE.getAddExpr(BECount, SE.getOne(BECount->getType())), + AddRec->getOperand(1)->getType())), + AddRec->getOperand(0)->getType())); + + // NOTE: If this is triggered, we should predicate to check for safety + if (!Expander.isSafeToExpand(TermValueS)) { + LLVMContext &Ctx = L->getHeader()->getContext(); + Ctx.emitError( + "Terminal value is not safe to expand, need to add it to predicate"); + } + + Value *TermValue = Expander.expandCodeFor(TermValueS, PtrTy, + LoopPreheader->getTerminator()); + + LLVM_DEBUG(dbgs() << "Start value of new term-cond phi-node:\n" + << *StartValue << "\n" + << "Terminal value of new term-cond phi-node:\n" + << *TermValue << "\n"); + + // Create new terminal condition at loop latch + BranchInst *BI = cast(LoopLatch->getTerminator()); + ICmpInst *OldTermCond = cast(BI->getCondition()); + IRBuilder<> LatchBuilder(LoopLatch->getTerminator()); + Value *NewTermCond = LatchBuilder.CreateICmp( + OldTermCond->getPredicate(), LoopValue, TermValue, + "lsr_fold_term_cond.replaced_term_cond"); + + LLVM_DEBUG(dbgs() << "Old term-cond:\n" + << *OldTermCond << "\n" + << "New term-cond:\b" << *NewTermCond << "\n"); + + BI->setCondition(NewTermCond); + + OldTermCond->replaceAllUsesWith(PoisonValue::get(OldTermCond->getType())); + OldTermCond->eraseFromParent(); + + // Cleanup the old terminal condition that is no longer used + // Clear the PHINode and DCE will do the rest... + while (ToFold->getNumIncomingValues()) + ToFold->removeIncomingValue(0u); + + ExpCleaner.markResultUsed(); + } + if (SalvageableDVIRecords.empty()) return Changed; diff --git a/llvm/test/Transforms/LoopStrengthReduce/lsr-fold-iv-complicate-add-rec.ll b/llvm/test/Transforms/LoopStrengthReduce/lsr-fold-iv-complicate-add-rec.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopStrengthReduce/lsr-fold-iv-complicate-add-rec.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; This is compiled from the following code (extracted from 400.perlbench) +; #include +; int *sv_2mortal(int*); +; void Perl_pp_slice(int **mark, int length) { +; int **dst = mark + 3; +; for (int i = length; i; i--) { +; sv_2mortal(*dst); /* free them eventualy */ +; dst++; +; } +; } + +; RUN: opt < %s -passes="loop-reduce" -S | FileCheck %s + +target datalayout = "e-p:32:32:32-n32" + +define void @Perl_pp_slice(ptr readonly %mark, i32 signext %length) { +; CHECK-LABEL: @Perl_pp_slice( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TOBOOL_NOT3:%.*]] = icmp eq i32 [[LENGTH:%.*]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT3]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds ptr, ptr [[MARK:%.*]], i64 3 +; CHECK-NEXT: [[TMP0:%.*]] = shl i32 [[LENGTH]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 12 +; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[MARK]], i32 [[TMP1]] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.cond.cleanup.loopexit: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: [[DST_04:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[ADD_PTR]], [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DST_04]], align 8 +; CHECK-NEXT: [[CALL:%.*]] = tail call ptr @sv_2mortal(ptr [[TMP2]]) +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds ptr, ptr [[DST_04]], i64 1 +; CHECK-NEXT: [[DEC:%.*]] = add i32 poison, -1 +; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[INCDEC_PTR]], [[UGLYGEP]] +; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]] +; +entry: + %tobool.not3 = icmp eq i32 %length, 0 + br i1 %tobool.not3, label %for.cond.cleanup, label %for.body.preheader + +for.body.preheader: ; preds = %entry + %add.ptr = getelementptr inbounds ptr, ptr %mark, i64 3 + br label %for.body + +for.cond.cleanup.loopexit: ; preds = %for.body + br label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry + ret void + +for.body: ; preds = %for.body, %for.body.preheader + %i.05 = phi i32 [ %dec, %for.body ], [ %length, %for.body.preheader ] + %dst.04 = phi ptr [ %incdec.ptr, %for.body ], [ %add.ptr, %for.body.preheader ] + %0 = load ptr, ptr %dst.04, align 8 + %call = tail call ptr @sv_2mortal(ptr %0) + %incdec.ptr = getelementptr inbounds ptr, ptr %dst.04, i64 1 + %dec = add nsw i32 %i.05, -1 + %tobool.not = icmp eq i32 %dec, 0 + br i1 %tobool.not, label %for.cond.cleanup.loopexit, label %for.body +} + +declare ptr @sv_2mortal(ptr) diff --git a/llvm/test/Transforms/LoopStrengthReduce/lsr-fold-iv-const-tripcount.ll b/llvm/test/Transforms/LoopStrengthReduce/lsr-fold-iv-const-tripcount.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopStrengthReduce/lsr-fold-iv-const-tripcount.ll @@ -0,0 +1,114 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; This is compiled from the following code (extracted from 445.gobmk) +; typedef unsigned char Intersection; +; extern Intersection board[421]; +; void close_bubbles(int gb[400], int bubbles[400]) { +; int ii; +; for (ii = 21; ii < 400; ii++) { +; if (!(board[ii] != 3) || gb[ii]) +; continue; +; if (bubbles[ii] == 1) +; gb[ii] = 1; +; if (bubbles[ii] == 2) +; gb[ii] = -1; +; } +; } + +; RUN: opt < %s -passes="loop-reduce" -S | FileCheck %s + +target datalayout = "e-p:32:32:32-n32" + +@board = external local_unnamed_addr global [421 x i8], align 8 + +define void @close_bubbles(ptr nocapture noundef %gb, ptr nocapture noundef readonly %bubbles) { +; CHECK-LABEL: @close_bubbles( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[BUBBLES:%.*]], i64 84 +; CHECK-NEXT: [[UGLYGEP3:%.*]] = getelementptr i8, ptr [[GB:%.*]], i64 84 +; CHECK-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[BUBBLES]], i32 1600 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[LSR_IV6:%.*]] = phi ptr [ [[UGLYGEP7:%.*]], [[FOR_INC:%.*]] ], [ getelementptr (i8, ptr @board, i64 21), [[ENTRY:%.*]] ] +; CHECK-NEXT: [[LSR_IV4:%.*]] = phi ptr [ [[UGLYGEP5:%.*]], [[FOR_INC]] ], [ [[UGLYGEP3]], [[ENTRY]] ] +; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_INC]] ], [ [[UGLYGEP]], [[ENTRY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[LSR_IV6]], align 1 +; CHECK-NEXT: [[CMP1_NOT:%.*]] = icmp eq i8 [[TMP0]], 3 +; CHECK-NEXT: br i1 [[CMP1_NOT]], label [[FOR_INC]], label [[LOR_LHS_FALSE:%.*]] +; CHECK: lor.lhs.false: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[LSR_IV4]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[FOR_INC]] +; CHECK: if.end: +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[LSR_IV1]], align 4 +; CHECK-NEXT: [[CMP7:%.*]] = icmp eq i32 [[TMP2]], 1 +; CHECK-NEXT: br i1 [[CMP7]], label [[IF_THEN9:%.*]], label [[IF_END12:%.*]] +; CHECK: if.then9: +; CHECK-NEXT: store i32 1, ptr [[LSR_IV4]], align 4 +; CHECK-NEXT: [[DOTPR:%.*]] = load i32, ptr [[LSR_IV1]], align 4 +; CHECK-NEXT: br label [[IF_END12]] +; CHECK: if.end12: +; CHECK-NEXT: [[TMP3:%.*]] = phi i32 [ [[DOTPR]], [[IF_THEN9]] ], [ [[TMP2]], [[IF_END]] ] +; CHECK-NEXT: [[CMP15:%.*]] = icmp eq i32 [[TMP3]], 2 +; CHECK-NEXT: br i1 [[CMP15]], label [[IF_THEN17:%.*]], label [[FOR_INC]] +; CHECK: if.then17: +; CHECK-NEXT: store i32 -1, ptr [[LSR_IV4]], align 4 +; CHECK-NEXT: br label [[FOR_INC]] +; CHECK: for.inc: +; CHECK-NEXT: [[LSR_IV_NEXT:%.*]] = add nsw i64 poison, -1 +; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i64 4 +; CHECK-NEXT: [[UGLYGEP5]] = getelementptr i8, ptr [[LSR_IV4]], i64 4 +; CHECK-NEXT: [[UGLYGEP7]] = getelementptr i8, ptr [[LSR_IV6]], i64 1 +; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[UGLYGEP1]] +; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + %uglygep = getelementptr i8, ptr %bubbles, i64 84 + %uglygep3 = getelementptr i8, ptr %gb, i64 84 + br label %for.body + +for.body: ; preds = %for.inc, %entry + %lsr.iv6 = phi ptr [ %uglygep7, %for.inc ], [ getelementptr (i8, ptr @board, i64 21), %entry ] + %lsr.iv4 = phi ptr [ %uglygep5, %for.inc ], [ %uglygep3, %entry ] + %lsr.iv1 = phi ptr [ %uglygep2, %for.inc ], [ %uglygep, %entry ] + %lsr.iv = phi i64 [ %lsr.iv.next, %for.inc ], [ 379, %entry ] + %0 = load i8, ptr %lsr.iv6, align 1 + %cmp1.not = icmp eq i8 %0, 3 + br i1 %cmp1.not, label %for.inc, label %lor.lhs.false + +lor.lhs.false: ; preds = %for.body + %1 = load i32, ptr %lsr.iv4, align 4 + %tobool.not = icmp eq i32 %1, 0 + br i1 %tobool.not, label %if.end, label %for.inc + +if.end: ; preds = %lor.lhs.false + %2 = load i32, ptr %lsr.iv1, align 4 + %cmp7 = icmp eq i32 %2, 1 + br i1 %cmp7, label %if.then9, label %if.end12 + +if.then9: ; preds = %if.end + store i32 1, ptr %lsr.iv4, align 4 + %.pr = load i32, ptr %lsr.iv1, align 4 + br label %if.end12 + +if.end12: ; preds = %if.then9, %if.end + %3 = phi i32 [ %.pr, %if.then9 ], [ %2, %if.end ] + %cmp15 = icmp eq i32 %3, 2 + br i1 %cmp15, label %if.then17, label %for.inc + +if.then17: ; preds = %if.end12 + store i32 -1, ptr %lsr.iv4, align 4 + br label %for.inc + +for.inc: ; preds = %if.then17, %if.end12, %lor.lhs.false, %for.body + %lsr.iv.next = add nsw i64 %lsr.iv, -1 + %uglygep2 = getelementptr i8, ptr %lsr.iv1, i64 4 + %uglygep5 = getelementptr i8, ptr %lsr.iv4, i64 4 + %uglygep7 = getelementptr i8, ptr %lsr.iv6, i64 1 + %exitcond.not = icmp eq i64 %lsr.iv.next, 0 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: ; preds = %for.inc + ret void +} diff --git a/llvm/test/Transforms/LoopStrengthReduce/lsr-fold-iv-runtime-tripcount.ll b/llvm/test/Transforms/LoopStrengthReduce/lsr-fold-iv-runtime-tripcount.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopStrengthReduce/lsr-fold-iv-runtime-tripcount.ll @@ -0,0 +1,131 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; This is compiled from the following code (extracted from 445.gobmk) +; typedef unsigned char Intersection; +; extern Intersection board[421]; +; void close_bubbles(int gb[400], int bubbles[400]) { +; int ii; +; for (ii = 21; ii < 400; ii++) { +; if (!(board[ii] != 3) || gb[ii]) +; continue; +; if (bubbles[ii] == 1) +; gb[ii] = 1; +; if (bubbles[ii] == 2) +; gb[ii] = -1; +; } +; } + +; RUN: opt < %s -passes="loop-reduce" -S | FileCheck %s + +target datalayout = "e-p:32:32:32-n32" + +@Alphabet_iupac = global i32 0, align 4 +@Alphabet = global [25 x i8] zeroinitializer, align 8 + +define ptr @DigitizeSequence(ptr readonly %seq, i32 signext %L) { +; CHECK-LABEL: @DigitizeSequence( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[L:%.*]], 2 +; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[ADD]] to i64 +; CHECK-NEXT: [[CALL:%.*]] = tail call noalias ptr @malloc(i64 [[CONV]]) +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @Alphabet_iupac, align 4 +; CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[TMP0]] to i8 +; CHECK-NEXT: [[ADD2:%.*]] = add i32 [[L]], 1 +; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD2]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[CALL]], i64 [[IDXPROM]] +; CHECK-NEXT: store i8 [[CONV1]], ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: store i8 [[CONV1]], ptr [[CALL]], align 1 +; CHECK-NEXT: [[CMP_NOT31:%.*]] = icmp slt i32 [[L]], 1 +; CHECK-NEXT: br i1 [[CMP_NOT31]], label [[FOR_END:%.*]], label [[FOR_BODY_LR_PH:%.*]] +; CHECK: for.body.lr.ph: +; CHECK-NEXT: [[CALL5:%.*]] = tail call ptr @__ctype_toupper_loc() +; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[CALL5]], align 8 +; CHECK-NEXT: [[SUB16:%.*]] = add i32 [[TMP0]], 255 +; CHECK-NEXT: [[CONV1730:%.*]] = zext i32 [[SUB16]] to i64 +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[ADD2]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = add nsw i64 [[WIDE_TRIP_COUNT]], -1 +; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[CALL]], i64 1 +; CHECK-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[CALL]], i32 [[ADD2]] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[LSR_IV3:%.*]] = phi ptr [ [[UGLYGEP4:%.*]], [[FOR_BODY]] ], [ [[SEQ:%.*]], [[FOR_BODY_LR_PH]] ] +; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[UGLYGEP]], [[FOR_BODY_LR_PH]] ] +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[LSR_IV3]], align 1 +; CHECK-NEXT: [[IDXPROM9:%.*]] = zext i8 [[TMP3]] to i64 +; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM9]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX10]], align 4 +; CHECK-NEXT: [[CONV12:%.*]] = and i32 [[TMP4]], 255 +; CHECK-NEXT: [[CALL13:%.*]] = tail call ptr @strchr(ptr @Alphabet, i32 signext [[CONV12]]) +; CHECK-NEXT: [[CMP14:%.*]] = icmp eq ptr [[CALL13]], null +; CHECK-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[CALL13]] to i64 +; CHECK-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], ptrtoint (ptr @Alphabet to i64) +; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP14]], i64 [[CONV1730]], i64 [[SUB_PTR_SUB]] +; CHECK-NEXT: [[CONV18:%.*]] = trunc i64 [[COND]] to i8 +; CHECK-NEXT: store i8 [[CONV18]], ptr [[LSR_IV1]], align 1 +; CHECK-NEXT: [[LSR_IV_NEXT:%.*]] = add nsw i64 poison, -1 +; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i64 1 +; CHECK-NEXT: [[UGLYGEP4]] = getelementptr i8, ptr [[LSR_IV3]], i64 1 +; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[UGLYGEP1]] +; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]] +; CHECK: for.end.loopexit: +; CHECK-NEXT: br label [[FOR_END]] +; CHECK: for.end: +; CHECK-NEXT: ret ptr [[CALL]] +; +entry: + %add = add nsw i32 %L, 2 + %conv = sext i32 %add to i64 + %call = tail call noalias ptr @malloc(i64 %conv) + %0 = load i32, ptr @Alphabet_iupac, align 4 + %conv1 = trunc i32 %0 to i8 + %add2 = add i32 %L, 1 + %idxprom = sext i32 %add2 to i64 + %arrayidx = getelementptr inbounds i8, ptr %call, i64 %idxprom + store i8 %conv1, ptr %arrayidx, align 1 + store i8 %conv1, ptr %call, align 1 + %cmp.not31 = icmp slt i32 %L, 1 + br i1 %cmp.not31, label %for.end, label %for.body.lr.ph + +for.body.lr.ph: ; preds = %entry + %call5 = tail call ptr @__ctype_toupper_loc() + %1 = load ptr, ptr %call5, align 8 + %sub16 = add i32 %0, 255 + %conv1730 = zext i32 %sub16 to i64 + %wide.trip.count = zext i32 %add2 to i64 + %2 = add nsw i64 %wide.trip.count, -1 + %uglygep = getelementptr i8, ptr %call, i64 1 + br label %for.body + +for.body: ; preds = %for.body, %for.body.lr.ph + %lsr.iv3 = phi ptr [ %uglygep4, %for.body ], [ %seq, %for.body.lr.ph ] + %lsr.iv1 = phi ptr [ %uglygep2, %for.body ], [ %uglygep, %for.body.lr.ph ] + %lsr.iv = phi i64 [ %lsr.iv.next, %for.body ], [ %2, %for.body.lr.ph ] + %3 = load i8, ptr %lsr.iv3, align 1 + %idxprom9 = zext i8 %3 to i64 + %arrayidx10 = getelementptr inbounds i32, ptr %1, i64 %idxprom9 + %4 = load i32, ptr %arrayidx10, align 4 + %conv12 = and i32 %4, 255 + %call13 = tail call ptr @strchr(ptr @Alphabet, i32 signext %conv12) + %cmp14 = icmp eq ptr %call13, null + %sub.ptr.lhs.cast = ptrtoint ptr %call13 to i64 + %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, ptrtoint (ptr @Alphabet to i64) + %cond = select i1 %cmp14, i64 %conv1730, i64 %sub.ptr.sub + %conv18 = trunc i64 %cond to i8 + store i8 %conv18, ptr %lsr.iv1, align 1 + %lsr.iv.next = add nsw i64 %lsr.iv, -1 + %uglygep2 = getelementptr i8, ptr %lsr.iv1, i64 1 + %uglygep4 = getelementptr i8, ptr %lsr.iv3, i64 1 + %exitcond.not = icmp eq i64 %lsr.iv.next, 0 + br i1 %exitcond.not, label %for.end.loopexit, label %for.body + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret ptr %call +} + +declare ptr @malloc(i64) + +declare ptr @strchr(ptr, i32 signext) + +declare ptr @__ctype_toupper_loc()