Index: llvm/include/llvm/Analysis/IVDescriptors.h =================================================================== --- llvm/include/llvm/Analysis/IVDescriptors.h +++ llvm/include/llvm/Analysis/IVDescriptors.h @@ -77,11 +77,13 @@ RecurrenceDescriptor(Value *Start, Instruction *Exit, RecurKind K, FastMathFlags FMF, Instruction *ExactFP, Type *RT, bool Signed, bool Ordered, - SmallPtrSetImpl &CI) + SmallPtrSetImpl &CIFromRecurTy, + SmallPtrSetImpl &CIToRecurTy) : StartValue(Start), LoopExitInstr(Exit), Kind(K), FMF(FMF), ExactFPMathInst(ExactFP), RecurrenceType(RT), IsSigned(Signed), IsOrdered(Ordered) { - CastInsts.insert(CI.begin(), CI.end()); + CastsFromRecurrenceType.insert(CIFromRecurTy.begin(), CIFromRecurTy.end()); + CastsToRecurrenceType.insert(CIToRecurTy.begin(), CIToRecurTy.end()); } /// This POD struct holds information about a potential recurrence operation. @@ -249,7 +251,15 @@ /// Returns a reference to the instructions used for type-promoting the /// recurrence. - const SmallPtrSet &getCastInsts() const { return CastInsts; } + const SmallPtrSet &getCastsFromRecurrenceType() const { + return CastsFromRecurrenceType; + } + + /// Returns a reference to the instructions used to cast operands of the + /// recurrence to the recurrence type. + const SmallPtrSet &getCastsToRecurrenceType() const { + return CastsToRecurrenceType; + } /// Returns true if all source operands of the recurrence are SExtInsts. bool isSigned() const { return IsSigned; } @@ -290,7 +300,10 @@ // if it is also the only FAdd in the PHI's use chain. bool IsOrdered = false; // Instructions used for type-promoting the recurrence. - SmallPtrSet CastInsts; + SmallPtrSet CastsFromRecurrenceType; + // Instructions used to cast operands of the recurrence to the recurrence + // type. + SmallPtrSet CastsToRecurrenceType; }; /// A struct for saving information about induction variables. Index: llvm/lib/Analysis/IVDescriptors.cpp =================================================================== --- llvm/lib/Analysis/IVDescriptors.cpp +++ llvm/lib/Analysis/IVDescriptors.cpp @@ -159,12 +159,12 @@ IsSigned); } -/// Collect cast instructions that can be ignored in the vectorizer's cost -/// model, given a reduction exit value and the minimal type in which the -/// reduction can be represented. -static void collectCastsToIgnore(Loop *TheLoop, Instruction *Exit, - Type *RecurrenceType, - SmallPtrSetImpl &Casts) { +/// Collect cast instructions given a reduction exit value and the minimal +/// type in which the reduction can be represented. +static void collectCastInstrs(Loop *TheLoop, Instruction *Exit, + Type *RecurrenceType, + SmallPtrSetImpl &CastsFromRecurTy, + SmallPtrSetImpl &CastsToRecurTy) { SmallVector Worklist; SmallPtrSet Visited; @@ -173,15 +173,22 @@ while (!Worklist.empty()) { Instruction *Val = Worklist.pop_back_val(); Visited.insert(Val); - if (auto *Cast = dyn_cast(Val)) + if (auto *Cast = dyn_cast(Val)) { if (Cast->getSrcTy() == RecurrenceType) { // If the source type of a cast instruction is equal to the recurrence - // type, it will be eliminated, and should be ignored in the vectorizer - // cost model. - Casts.insert(Cast); + // type, it will be eliminated, and should be ignored in the + // vectorizer cost model. + CastsFromRecurTy.insert(Cast); continue; } - + if (Cast->getDestTy() == RecurrenceType) { + // Add casts with destination type equal to the recurrence type. These + // are checked by the vectorizer when finding the widest type for + // in-loop reductions without any loads/stores. + CastsToRecurTy.insert(Cast); + continue; + } + } // Add all operands to the work list if they are loop-varying values that // we haven't yet visited. for (Value *O : cast(Val)->operands()) @@ -264,7 +271,7 @@ // Data used for determining if the recurrence has been type-promoted. Type *RecurrenceType = Phi->getType(); - SmallPtrSet CastInsts; + SmallPtrSet CastsFromRecurTy; Instruction *Start = Phi; bool IsSigned = false; @@ -283,7 +290,8 @@ if (!isIntegerRecurrenceKind(Kind)) return false; if (!isMinMaxRecurrenceKind(Kind)) - Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts); + Start = + lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastsFromRecurTy); } else { // Pointer min/max may exist, but it is not supported as a reduction op. return false; @@ -470,6 +478,8 @@ const bool IsOrdered = checkOrderedReduction( Kind, ReduxDesc.getExactFPMathInst(), ExitInstruction, Phi); + SmallPtrSet CastsToRecurTy; + if (Start != Phi) { // If the starting value is not the same as the phi node, we speculatively // looked through an 'and' instruction when evaluating a potential @@ -500,21 +510,24 @@ computeRecurrenceType(ExitInstruction, DB, AC, DT); if (ComputedType != RecurrenceType) return false; - - // The recurrence expression will be represented in a narrower type. If - // there are any cast instructions that will be unnecessary, collect them - // in CastInsts. Note that the 'and' instruction was already included in - // this list. - // - // TODO: A better way to represent this may be to tag in some way all the - // instructions that are a part of the reduction. The vectorizer cost - // model could then apply the recurrence type to these instructions, - // without needing a white list of instructions to ignore. - // This may also be useful for the inloop reductions, if it can be - // kept simple enough. - collectCastsToIgnore(TheLoop, ExitInstruction, RecurrenceType, CastInsts); } + // Collect cast instructions. + // If the starting value is not the same as the phi node and the computed + // recurrence type is equal to the recurrence type, the recurrence expression + // will be represented in a narrower type. If there are any cast instructions + // that will be unnecessary, collect them in CastsFromRecurTy. Note that the + // 'and' instruction was already included in this list. + // + // TODO: A better way to represent this may be to tag in some way all the + // instructions that are a part of the reduction. The vectorizer cost + // model could then apply the recurrence type to these instructions, + // without needing a white list of instructions to ignore. + // This may also be useful for the inloop reductions, if it can be + // kept simple enough. + collectCastInstrs(TheLoop, ExitInstruction, RecurrenceType, CastsFromRecurTy, + CastsToRecurTy); + // We found a reduction var if we have reached the original phi node and we // only have a single instruction with out-of-loop users. @@ -522,9 +535,9 @@ // is saved as part of the RecurrenceDescriptor. // Save the description of this reduction variable. - RecurrenceDescriptor RD(RdxStart, ExitInstruction, Kind, FMF, - ReduxDesc.getExactFPMathInst(), RecurrenceType, - IsSigned, IsOrdered, CastInsts); + RecurrenceDescriptor RD( + RdxStart, ExitInstruction, Kind, FMF, ReduxDesc.getExactFPMathInst(), + RecurrenceType, IsSigned, IsOrdered, CastsFromRecurTy, CastsToRecurTy); RedDes = RD; return true; Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5975,11 +5975,35 @@ unsigned MinWidth = -1U; unsigned MaxWidth = 8; const DataLayout &DL = TheFunction->getParent()->getDataLayout(); - for (Type *T : ElementTypesInLoop) { - MinWidth = std::min( - MinWidth, DL.getTypeSizeInBits(T->getScalarType()).getFixedSize()); - MaxWidth = std::max( - MaxWidth, DL.getTypeSizeInBits(T->getScalarType()).getFixedSize()); + // For in-loop reductions, no element types are added to ElementTypesInLoop + // if there are no loads/stores in the loop. In this case, check through the + // reduction variables to determine the maximum width. + if (ElementTypesInLoop.empty() && !Legal->getReductionVars().empty()) { + // Reset MaxWidth so that we can find the smallest type used by recurrences + // in the loop. + MaxWidth = -1U; + for (auto &PhiDescriptorPair : Legal->getReductionVars()) { + const RecurrenceDescriptor &RdxDesc = PhiDescriptorPair.second; + MaxWidth = std::min( + MaxWidth, + DL.getTypeSizeInBits(RdxDesc.getRecurrenceType()->getScalarType()) + .getFixedSize()); + // We also need to check the cast instructions in the loop as there may be + // extends on the input operands of the recurrence. + for (const Instruction *I : RdxDesc.getCastsToRecurrenceType()) { + MaxWidth = std::min( + MaxWidth, + DL.getTypeSizeInBits(cast(I)->getSrcTy()->getScalarType()) + .getFixedSize()); + } + } + } else { + for (Type *T : ElementTypesInLoop) { + MinWidth = std::min( + MinWidth, DL.getTypeSizeInBits(T->getScalarType()).getFixedSize()); + MaxWidth = std::max( + MaxWidth, DL.getTypeSizeInBits(T->getScalarType()).getFixedSize()); + } } return {MinWidth, MaxWidth}; } @@ -7725,7 +7749,8 @@ // detection. for (auto &Reduction : Legal->getReductionVars()) { const RecurrenceDescriptor &RedDes = Reduction.second; - const SmallPtrSetImpl &Casts = RedDes.getCastInsts(); + const SmallPtrSetImpl &Casts = + RedDes.getCastsFromRecurrenceType(); VecValuesToIgnore.insert(Casts.begin(), Casts.end()); } // Ignore type-casting instructions we identified during induction Index: llvm/test/Transforms/LoopVectorize/AArch64/smallest-and-widest-types.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/AArch64/smallest-and-widest-types.ll +++ llvm/test/Transforms/LoopVectorize/AArch64/smallest-and-widest-types.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: opt < %s -loop-vectorize -debug-only=loop-vectorize -disable-output 2>&1 | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-target-instruction-cost=1 -debug-only=loop-vectorize -disable-output 2>&1 | FileCheck %s target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64--linux-gnu" @@ -31,3 +31,31 @@ for.end: ret void } + +; For in-loop reductions with no loads or stores in the loop the widest type is +; determined by looking through the recurrences, which allows a sensible VF to be +; chosen. + +; CHECK-LABEL: Checking a loop in "no_loads_stores" +; CHECK: The Smallest and Widest types: 4294967295 / 16 bits +; CHECK: Selecting VF: 8 + +define double @no_loads_stores() { +entry: + br label %for.body + +for.body: + %s.09 = phi double [ 0.000000e+00, %entry ], [ %add, %for.body ] + %i.08 = phi i16 [ 0, %entry ], [ %inc, %for.body ] + %conv = sitofp i16 %i.08 to double + %mul = fmul double %conv, %conv + %add = fadd double %s.09, %mul + %inc = add nuw nsw i16 %i.08, 1 + %exitcond.not = icmp eq i16 %inc, 12345 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: + %.lcssa = phi double [ %add, %for.body ] + ret double %.lcssa +} + Index: llvm/test/Transforms/LoopVectorize/X86/funclet.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/X86/funclet.ll +++ llvm/test/Transforms/LoopVectorize/X86/funclet.ll @@ -33,7 +33,7 @@ ; CHECK-LABEL: define void @test1( ; CHECK: %[[cpad:.*]] = catchpad within {{.*}} [i8* null, i32 64, i8* null] -; CHECK: call <16 x double> @llvm.floor.v16f64(<16 x double> {{.*}}) [ "funclet"(token %[[cpad]]) ] +; CHECK: call <8 x double> @llvm.floor.v8f64(<8 x double> {{.*}}) [ "funclet"(token %[[cpad]]) ] declare x86_stdcallcc void @_CxxThrowException(i8*, i8*)