diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8093,9 +8093,9 @@ assert(!State.Instance && "Reduction being replicated."); for (unsigned Part = 0; Part < State.UF; ++Part) { RecurrenceDescriptor::RecurrenceKind Kind = RdxDesc->getRecurrenceKind(); - Value *NewVecOp = State.get(VecOp, Part); - if (CondOp) { - Value *NewCond = State.get(CondOp, Part); + Value *NewVecOp = State.get(getVecOp(), Part); + if (VPValue *Cond = getCondOp()) { + Value *NewCond = State.get(Cond, Part); VectorType *VecTy = cast(NewVecOp->getType()); Constant *Iden = RecurrenceDescriptor::getRecurrenceIdentity( Kind, RdxDesc->getMinMaxRecurrenceKind(), VecTy->getElementType()); @@ -8106,7 +8106,7 @@ } Value *NewRed = createTargetReduction(State.Builder, TTI, *RdxDesc, NewVecOp, NoNaN); - Value *PrevInChain = State.get(ChainOp, Part); + Value *PrevInChain = State.get(getChainOp(), Part); Value *NextInChain; if (Kind == RecurrenceDescriptor::RK_IntegerMinMax || Kind == RecurrenceDescriptor::RK_FloatMinMax) { @@ -8115,9 +8115,10 @@ NewRed, PrevInChain); } else { NextInChain = State.Builder.CreateBinOp( - (Instruction::BinaryOps)I->getOpcode(), NewRed, PrevInChain); + (Instruction::BinaryOps)getUnderlyingInstr()->getOpcode(), NewRed, + PrevInChain); } - State.ValueMap.setVectorValue(I, Part, NextInChain); + State.set(this, getUnderlyingInstr(), NextInChain, Part); } } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1071,17 +1071,10 @@ /// A recipe to represent inloop reduction operations, performing a reduction on /// a vector operand into a scalar value, and adding the result to a chain. -class VPReductionRecipe : public VPRecipeBase { +/// The Operands are {ChainOp, VecOp, [Condition]}. +class VPReductionRecipe : public VPRecipeBase, public VPValue, public VPUser { /// The recurrence decriptor for the reduction in question. RecurrenceDescriptor *RdxDesc; - /// The original instruction being converted to a reduction. - Instruction *I; - /// The VPValue of the vector value to be reduced. - VPValue *VecOp; - /// The VPValue of the scalar Chain being accumulated. - VPValue *ChainOp; - /// The VPValue of the condition for the block. - VPValue *CondOp; /// Fast math flags to use for the resulting reduction operation. bool NoNaN; /// Pointer to the TTI, needed to create the target reduction @@ -1091,12 +1084,19 @@ VPReductionRecipe(RecurrenceDescriptor *R, Instruction *I, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, bool NoNaN, const TargetTransformInfo *TTI) - : VPRecipeBase(VPReductionSC), RdxDesc(R), I(I), VecOp(VecOp), - ChainOp(ChainOp), CondOp(CondOp), NoNaN(NoNaN), TTI(TTI) {} + : VPRecipeBase(VPRecipeBase::VPReductionSC), + VPValue(VPValue::VPReductionSC, I), VPUser({ChainOp, VecOp}), + RdxDesc(R), NoNaN(NoNaN), TTI(TTI) { + if (CondOp) + addOperand(CondOp); + } ~VPReductionRecipe() override = default; /// Method to support type inquiry through isa, cast, and dyn_cast. + static inline bool classof(const VPValue *V) { + return V->getVPValueID() == VPValue::VPReductionSC; + } static inline bool classof(const VPRecipeBase *V) { return V->getVPRecipeID() == VPRecipeBase::VPReductionSC; } @@ -1107,6 +1107,15 @@ /// Print the recipe. void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override; + + /// The VPValue of the scalar Chain being accumulated. + VPValue *getChainOp() const { return getOperand(0); } + /// The VPValue of the vector value to be reduced. + VPValue *getVecOp() const { return getOperand(1); } + /// The VPValue of the condition for the block. + VPValue *getCondOp() const { + return getNumOperands() > 2 ? getOperand(2) : nullptr; + } }; /// VPReplicateRecipe replicates a given instruction producing multiple scalar diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -22,6 +22,7 @@ #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Twine.h" +#include "llvm/Analysis/IVDescriptors.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" @@ -110,12 +111,16 @@ return U; if (auto *U = dyn_cast(this)) return U; + if (auto *U = dyn_cast(this)) + return U; return nullptr; } VPValue *VPRecipeBase::toVPValue() { if (auto *V = dyn_cast(this)) return V; + if (auto *V = dyn_cast(this)) + return V; if (auto *V = dyn_cast(this)) return V; if (auto *V = dyn_cast(this)) @@ -130,6 +135,8 @@ const VPValue *VPRecipeBase::toVPValue() const { if (auto *V = dyn_cast(this)) return V; + if (auto *V = dyn_cast(this)) + return V; if (auto *V = dyn_cast(this)) return V; if (auto *V = dyn_cast(this)) @@ -932,13 +939,16 @@ void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { - O << "\"REDUCE of" << *I << " as "; - ChainOp->printAsOperand(O, SlotTracker); - O << " + reduce("; - VecOp->printAsOperand(O, SlotTracker); - if (CondOp) { + O << "\"REDUCE "; + printAsOperand(O, SlotTracker); + O << " = "; + getChainOp()->printAsOperand(O, SlotTracker); + O << " + reduce." << Instruction::getOpcodeName(RdxDesc->getRecurrenceBinOp()) + << " ("; + getVecOp()->printAsOperand(O, SlotTracker); + if (getCondOp()) { O << ", "; - CondOp->printAsOperand(O, SlotTracker); + getCondOp()->printAsOperand(O, SlotTracker); } O << ")"; } diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -89,9 +89,10 @@ VPValueSC, VPInstructionSC, VPMemoryInstructionSC, + VPReductionSC, VPVWidenCallSC, + VPVWidenGEPSC, VPVWidenSelectSC, - VPVWidenGEPSC }; VPValue(Value *UV = nullptr, VPDef *Def = nullptr) diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -1,6 +1,6 @@ ; REQUIRES: asserts -; RUN: opt -loop-vectorize -debug-only=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -disable-output %s 2>&1 | FileCheck %s +; RUN: opt -loop-vectorize -debug-only=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -prefer-inloop-reductions -disable-output %s 2>&1 | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -70,4 +70,31 @@ ret void } +define float @print_reduction(i64 %n, float* noalias %y) { +; CHECK: N0 [label = +; CHECK-NEXT: "for.body:\n" + +; CHECK-NEXT: "WIDEN-INDUCTION %iv = phi %iv.next, 0\l" + +; CHECK-NEXT: "WIDEN-PHI %red = phi %red.next, 0.000000e+00\l" + +; CHECK-NEXT: "CLONE %arrayidx = getelementptr %y, %iv\l" + +; CHECK-NEXT: "WIDEN ir<%lv> = load ir<%arrayidx>\l" + +; CHECK-NEXT: "REDUCE ir<%red.next> = ir<%red> + reduce.fadd (ir<%lv>)\l" +; CHECK-NEXT: ] + +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] + %red = phi float [ %red.next, %for.body ], [ 0.0, %entry ] + %arrayidx = getelementptr inbounds float, float* %y, i64 %iv + %lv = load float, float* %arrayidx, align 4 + %red.next = fadd fast float %lv, %red + %iv.next = add i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret float %red.next +} + declare float @llvm.sqrt.f32(float) nounwind readnone