Index: include/llvm/Transforms/Utils/LoopUtils.h =================================================================== --- include/llvm/Transforms/Utils/LoopUtils.h +++ include/llvm/Transforms/Utils/LoopUtils.h @@ -87,6 +87,10 @@ : StartValue(nullptr), LoopExitInstr(nullptr), Kind(RK_NoRecurrence), MinMaxKind(MRK_Invalid), UnsafeAlgebraInst(nullptr) {} + RecurrenceDescriptor(Value *Start, Instruction *Exit, RecurrenceKind K) + : StartValue(Start), LoopExitInstr(Exit), Kind(K), + MinMaxKind(MRK_Invalid), UnsafeAlgebraInst(nullptr) {} + RecurrenceDescriptor(Value *Start, Instruction *Exit, RecurrenceKind K, MinMaxRecurrenceKind MK, Instruction *UAI /*Unsafe Algebra Inst*/) @@ -164,6 +168,12 @@ bool HasFunNoNaNAttr, RecurrenceDescriptor &RedDes); + /// Return true and setup the recurrence descriptor if Phi nodes is a simple + /// floating-point recurrence of the form r = r + c, where c is a constant + /// floa ting-point step. + static bool isBasicRecurrence(PHINode *Phi, Loop *TheLoop, + RecurrenceDescriptor &RedDes); + /// Returns true if Phi is a reduction in TheLoop. The RecurrenceDescriptor is /// returned in RedDes. static bool isReductionPHI(PHINode *Phi, Loop *TheLoop, Index: lib/Transforms/Utils/LoopUtils.cpp =================================================================== --- lib/Transforms/Utils/LoopUtils.cpp +++ lib/Transforms/Utils/LoopUtils.cpp @@ -315,6 +315,99 @@ return false; } + +bool RecurrenceDescriptor::isBasicRecurrence(PHINode *Phi, Loop *TheLoop, + RecurrenceDescriptor &RedDes) { + // This method identifies a specific type of simple floating-point recurrences + // that are not reductions. Such recurrences are not handled by induction + // variable simplification because they are floating-point and scalar + // evolution doesn't handle floating-point types. + + // A simple basic recurrence is of the form (r = r + c); Where c is a loop + // invariant. + + // Phi node must have two inputs. + if (Phi->getNumIncomingValues() != 2) + return false; + + // Recurrence variables is found in the loop header. + if (Phi->getParent() != TheLoop->getHeader()) + return false; + + // Recurrence is a floating-point type. + if (!Phi->getType()->isFloatingPointTy()) + return false; + + // Phi is only used by one instruction. The instruction is used in the loop + // but outside the loop. That would result in another use of the phi. + if (!Phi->hasOneUse()) + return false; + + User *U = *Phi->user_begin(); + + // Use must be a binary operator. + if (!isa(U)) + return false; + + BinaryOperator *Op = dyn_cast(U); + + // Verify that the user of the phi is also one of its 2 operands. + if (std::find(Phi->op_begin(), Phi->op_end(), Op) == Phi->op_end()) + return false; + + RecurrenceKind RecKind; + // Operator must be an FAdd, FSub, FMul, or FDiv instruction. + if (Op->getOpcode() == Instruction::FAdd || + Op->getOpcode() == Instruction::FSub) + RecKind = RK_FloatAdd; + else if (Op->getOpcode() == Instruction::FMul || + Op->getOpcode() == Instruction::FDiv) + RecKind = RK_FloatMult; + else + return false; + + // For FSub and FDiv the Phi node must be in the left-hand (first) operand. + bool FirstOperandOnly = Op->getOpcode() == Instruction::FSub || + Op->getOpcode() == Instruction::FDiv; + + // Select the step operand, and double-check the Phi is the other operand. + Value *Step; + if (Op->getOperand(0) == cast(Phi)) + Step = Op->getOperand(1); + else if (Op->getOperand(1) == cast(Phi) && !FirstOperandOnly) + Step = Op->getOperand(0); + else + return false; + + // We want to know Step is loop invariant, SCEV doesn't work for floats so + // instead we check if it is constant or an instruction that is not in the + // loop. + if (!isa(Step) && + !(isa(Step) || (isa(Step) && + !TheLoop->contains(dyn_cast(Step))))) + return false; + + // Verify the uses of the Op is in the loop. This tells us the recurrence is + // not a reduction. + for (User *OpUser : Op->users()) { + if (isa(OpUser) && + !TheLoop->contains(dyn_cast(OpUser))) + return false; + } + + Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader()); + + // Verify we have a start value for this recurrence. + if (!RdxStart) + return false; + + // Found a simple basic recurrence! + Instruction *LoopExitInst = dyn_cast(Op); + RedDes = RecurrenceDescriptor(RdxStart, LoopExitInst, RecKind); + + return true; +} + bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes) { @@ -325,6 +418,11 @@ HasFunNoNaNAttr = F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true"; + if (isBasicRecurrence(Phi, TheLoop, RedDes)) { + DEBUG(dbgs() << "Found a basic recurrence PHI." << *Phi << "\n"); + return true; + } + if (AddReductionVar(Phi, RK_IntegerAdd, TheLoop, HasFunNoNaNAttr, RedDes)) { DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n"); return true; Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3244,6 +3244,7 @@ // to do it in the vector-loop preheader. Builder.SetInsertPoint(LoopBypassBlocks[1]->getTerminator()); + assert(LoopExitInst && "LoopExitInst should not be null.\n"); // This is the vector-clone of the value that leaves the loop. VectorParts &VectorExit = getVectorValue(LoopExitInst); Type *VecTy = VectorExit[0]->getType(); Index: test/Transforms/LoopVectorize/X86/recurrence.ll =================================================================== --- /dev/null +++ test/Transforms/LoopVectorize/X86/recurrence.ll @@ -0,0 +1,136 @@ +; RUN: opt < %s -loop-vectorize -mtriple=x86_64-unknown-linux -force-vector-width=4 -force-vector-interleave=1 -S -pass-remarks='loop-vectorize' 2>&1 | FileCheck %s + +; CHECK: remark: recurrence.c:11:4: vectorized loop (vectorization width: 4, interleaved count: 1) +; CHECK: remark: recurrence.c:31:4: vectorized loop (vectorization width: 4, interleaved count: 1) +; CHECK: remark: recurrence.c:50:4: vectorized loop (vectorization width: 4, interleaved count: 1) +; CHECK: remark: recurrence.c:69:4: vectorized loop (vectorization width: 4, interleaved count: 1) + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +; Function Attrs: nounwind readnone ssp uwtable +define double @add_recurrence() #0 { +entry: + br label %for.body, !dbg !3 + +for.cond.cleanup: ; preds = %for.body + ret double %add2, !dbg !8 + +for.body: ; preds = %for.body, %entry + %s.017 = phi double [ 0.000000e+00, %entry ], [ %add2, %for.body ] + %i.016 = phi i32 [ 1, %entry ], [ %inc, %for.body ] + %v.015 = phi double [ 0.000000e+00, %entry ], [ %add, %for.body ] + %add = fadd double %v.015, 1.000000e+00, !dbg !9 + %mul = fmul double %add, 3.200000e-08, !dbg !10 + %add2 = fadd double %s.017, %mul, !dbg !11 + %inc = add nuw nsw i32 %i.016, 1, !dbg !12 + %exitcond = icmp eq i32 %inc, 31250000, !dbg !3 + br i1 %exitcond, label %for.cond.cleanup, label %for.body, !dbg !3, !llvm.loop !13 +} + +; Function Attrs: nounwind readnone ssp uwtable +define double @sub_recurrence() #0 { +entry: + br label %for.body, !dbg !15 + +for.cond.cleanup: ; preds = %for.body + ret double %add16, !dbg !17 + +for.body: ; preds = %for.body, %entry + %s.019 = phi double [ 0.000000e+00, %entry ], [ %add16, %for.body ] + %i.018 = phi i32 [ 1, %entry ], [ %inc, %for.body ] + %v.017 = phi double [ 0.000000e+00, %entry ], [ %sub3, %for.body ] + %sub3 = fadd double %v.017, -1.000000e+00, !dbg !18 + %mul = fmul double %sub3, 3.200000e-08, !dbg !19 + %add16 = fsub double %s.019, %mul, !dbg !19 + %inc = add nuw nsw i32 %i.018, 1, !dbg !20 + %exitcond = icmp eq i32 %inc, 31250000, !dbg !15 + br i1 %exitcond, label %for.cond.cleanup, label %for.body, !dbg !15, !llvm.loop !21 +} + +; Function Attrs: nounwind readnone ssp uwtable +define double @mul_recurrence(double %scalar) #0 { +entry: + %div = fdiv double %scalar, 3.125000e+07, !dbg !22 + br label %for.body, !dbg !24 + +for.cond.cleanup: ; preds = %for.body + ret double %add, !dbg !25 + +for.body: ; preds = %for.body, %entry + %s.016 = phi double [ 0.000000e+00, %entry ], [ %add, %for.body ] + %i.015 = phi i32 [ 1, %entry ], [ %inc, %for.body ] + %v.014 = phi double [ 0.000000e+00, %entry ], [ %mul, %for.body ] + %mul = fmul double %v.014, %scalar, !dbg !26 + %mul2 = fmul double %div, %mul, !dbg !27 + %add = fadd double %s.016, %mul2, !dbg !28 + %inc = add nuw nsw i32 %i.015, 1, !dbg !29 + %exitcond = icmp eq i32 %inc, 31250000, !dbg !24 + br i1 %exitcond, label %for.cond.cleanup, label %for.body, !dbg !24, !llvm.loop !30 +} + +; Function Attrs: nounwind readnone ssp uwtable +define double @div_recurrence(double %scalar) #0 { +entry: + %div = fdiv double %scalar, 3.125000e+07, !dbg !31 + br label %for.body, !dbg !33 + +for.cond.cleanup: ; preds = %for.body + ret double %add, !dbg !34 + +for.body: ; preds = %for.body, %entry + %s.016 = phi double [ 0.000000e+00, %entry ], [ %add, %for.body ] + %i.015 = phi i32 [ 1, %entry ], [ %inc, %for.body ] + %v.014 = phi double [ 1.000000e+04, %entry ], [ %div2, %for.body ] + %div2 = fdiv double %v.014, %scalar, !dbg !35 + %mul = fmul double %div, %div2, !dbg !36 + %add = fadd double %s.016, %mul, !dbg !37 + %inc = add nuw nsw i32 %i.015, 1, !dbg !38 + %exitcond = icmp eq i32 %inc, 31250000, !dbg !33 + br i1 %exitcond, label %for.cond.cleanup, label %for.body, !dbg !33, !llvm.loop !39 +} + +attributes #0 = { nounwind } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !{i32 1, !"PIC Level", i32 2} +!2 = !{!"clang version 3.8.0"} +!3 = !DILocation(line: 11, column: 4, scope: !4) +!4 = !DISubprogram(name: "add_recurrence", scope: !5, file: !5, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 2, isOptimized: true, function: double ()* @add_recurrence, variables: !7) +!5 = !DIFile(filename: "recurrence.c", directory: "") +!6 = !DISubroutineType(types: !7) +!7 = !{} +!8 = !DILocation(line: 18, column: 4, scope: !4) +!9 = !DILocation(line: 13, column: 12, scope: !4) +!10 = !DILocation(line: 14, column: 19, scope: !4) +!11 = !DILocation(line: 15, column: 12, scope: !4) +!12 = !DILocation(line: 11, column: 32, scope: !4) +!13 = distinct !{!13, !14} +!14 = !{!"llvm.loop.unroll.disable"} +!15 = !DILocation(line: 31, column: 4, scope: !16) +!16 = !DISubprogram(name: "sub_recurrence", scope: !5, file: !5, line: 21, type: !6, isLocal: false, isDefinition: true, scopeLine: 22, isOptimized: true, function: double ()* @sub_recurrence, variables: !7) +!17 = !DILocation(line: 38, column: 4, scope: !16) +!18 = !DILocation(line: 33, column: 12, scope: !16) +!19 = !DILocation(line: 34, column: 19, scope: !16) +!20 = !DILocation(line: 31, column: 32, scope: !16) +!21 = distinct !{!21, !14} +!22 = !DILocation(line: 45, column: 22, scope: !23) +!23 = !DISubprogram(name: "mul_recurrence", scope: !5, file: !5, line: 41, type: !6, isLocal: false, isDefinition: true, scopeLine: 42, flags: DIFlagPrototyped, isOptimized: true, function: double (double)* @mul_recurrence, variables: !7) +!24 = !DILocation(line: 50, column: 4, scope: !23) +!25 = !DILocation(line: 57, column: 4, scope: !23) +!26 = !DILocation(line: 52, column: 12, scope: !23) +!27 = !DILocation(line: 53, column: 19, scope: !23) +!28 = !DILocation(line: 54, column: 12, scope: !23) +!29 = !DILocation(line: 50, column: 32, scope: !23) +!30 = distinct !{!30, !14} +!31 = !DILocation(line: 64, column: 22, scope: !32) +!32 = !DISubprogram(name: "div_recurrence", scope: !5, file: !5, line: 60, type: !6, isLocal: false, isDefinition: true, scopeLine: 61, flags: DIFlagPrototyped, isOptimized: true, function: double (double)* @div_recurrence, variables: !7) +!33 = !DILocation(line: 69, column: 4, scope: !32) +!34 = !DILocation(line: 76, column: 4, scope: !32) +!35 = !DILocation(line: 71, column: 12, scope: !32) +!36 = !DILocation(line: 72, column: 19, scope: !32) +!37 = !DILocation(line: 73, column: 12, scope: !32) +!38 = !DILocation(line: 69, column: 32, scope: !32) +!39 = distinct !{!39, !14}