diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -441,9 +441,11 @@ /// A helper function to scalarize a single Instruction in the innermost loop. /// Generates a sequence of scalar instances for each lane between \p MinLane /// and \p MaxLane, times each part between \p MinPart and \p MaxPart, - /// inclusive.. - void scalarizeInstruction(Instruction *Instr, const VPIteration &Instance, - bool IfPredicateInstr); + /// inclusive. Uses the VPValue operands from \p Operands instead of \p + /// Instr's operands. + void scalarizeInstruction(Instruction *Instr, VPUser &Operands, + const VPIteration &Instance, bool IfPredicateInstr, + VPTransformState &State); /// Widen an integer or floating-point induction variable \p IV. If \p Trunc /// is provided, the integer induction variable will first be truncated to @@ -2496,9 +2498,10 @@ } } -void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, +void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, VPUser &User, const VPIteration &Instance, - bool IfPredicateInstr) { + bool IfPredicateInstr, + VPTransformState &State) { assert(!Instr->getType()->isAggregateType() && "Can't handle vectors"); setDebugLocFromInst(Builder, Instr); @@ -2512,8 +2515,8 @@ // Replace the operands of the cloned instructions with their scalar // equivalents in the new loop. - for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) { - auto *NewOp = getOrCreateScalarValue(Instr->getOperand(op), Instance); + for (unsigned op = 0, e = User.getNumOperands(); op != e; ++op) { + auto *NewOp = State.get(User.getOperand(op), Instance); Cloned->setOperand(op, NewOp); } addNewMetadata(Cloned, Instr); @@ -7045,7 +7048,8 @@ bool IsPredicated = LoopVectorizationPlanner::getDecisionAndClampRange( [&](unsigned VF) { return CM.isScalarWithPredication(I, VF); }, Range); - auto *Recipe = new VPReplicateRecipe(I, IsUniform, IsPredicated); + auto *Recipe = new VPReplicateRecipe(I, Plan->mapToVPValues(I->operands()), + IsUniform, IsPredicated); setRecipe(I, Recipe); // Find if I uses a predicated instruction. If so, it will use its scalar @@ -7498,7 +7502,8 @@ void VPReplicateRecipe::execute(VPTransformState &State) { if (State.Instance) { // Generate a single instance. - State.ILV->scalarizeInstruction(Ingredient, *State.Instance, IsPredicated); + State.ILV->scalarizeInstruction(Ingredient, User, *State.Instance, + IsPredicated, State); // Insert scalar instance packing it into a vector. if (AlsoPack && State.VF > 1) { // If we're constructing lane 0, initialize to start from undef. @@ -7518,7 +7523,8 @@ unsigned EndLane = IsUniform ? 1 : State.VF; for (unsigned Part = 0; Part < State.UF; ++Part) for (unsigned Lane = 0; Lane < EndLane; ++Lane) - State.ILV->scalarizeInstruction(Ingredient, {Part, Lane}, IsPredicated); + State.ILV->scalarizeInstruction(Ingredient, User, {Part, Lane}, + IsPredicated, State); } void VPBranchOnMaskRecipe::execute(VPTransformState &State) { diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1021,6 +1021,9 @@ /// The instruction being replicated. Instruction *Ingredient; + /// Hold VPValues for the operands of the ingredient. + VPUser User; + /// Indicator if only a single replica per lane is needed. bool IsUniform; @@ -1031,9 +1034,11 @@ bool AlsoPack; public: - VPReplicateRecipe(Instruction *I, bool IsUniform, bool IsPredicated = false) - : VPRecipeBase(VPReplicateSC), Ingredient(I), IsUniform(IsUniform), - IsPredicated(IsPredicated) { + template + VPReplicateRecipe(Instruction *I, iterator_range Operands, + bool IsUniform, bool IsPredicated = false) + : VPRecipeBase(VPReplicateSC), Ingredient(I), User(Operands), + IsUniform(IsUniform), IsPredicated(IsPredicated) { // Retain the previous behavior of predicateInstructions(), where an // insert-element of a predicated instruction got hoisted into the // predicated basic block iff it was its only user. This is achieved by