diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -524,15 +524,6 @@ ArrayRef StoredValues, VPValue *BlockInMask = nullptr); - /// Vectorize Load and Store instructions with the base address given in \p - /// Addr, optionally masking the vector operations if \p BlockInMask is - /// non-null. Use \p State to translate given VPValues to IR values in the - /// vectorized loop. - void vectorizeMemoryInstruction(Instruction *Instr, VPTransformState &State, - VPValue *Def, VPValue *Addr, - VPValue *StoredValue, VPValue *BlockInMask, - bool ConsecutiveStride, bool Reverse); - /// Set the debug location in the builder \p Ptr using the debug location in /// \p V. If \p Ptr is None then it uses the class member's Builder. void setDebugLocFromInst(const Value *V, @@ -3004,130 +2995,6 @@ } } -void InnerLoopVectorizer::vectorizeMemoryInstruction( - Instruction *Instr, VPTransformState &State, VPValue *Def, VPValue *Addr, - VPValue *StoredValue, VPValue *BlockInMask, bool ConsecutiveStride, - bool Reverse) { - // Attempt to issue a wide load. - LoadInst *LI = dyn_cast(Instr); - StoreInst *SI = dyn_cast(Instr); - - assert((LI || SI) && "Invalid Load/Store instruction"); - assert((!SI || StoredValue) && "No stored value provided for widened store"); - assert((!LI || !StoredValue) && "Stored value provided for widened load"); - - Type *ScalarDataTy = getLoadStoreType(Instr); - - auto *DataTy = VectorType::get(ScalarDataTy, VF); - const Align Alignment = getLoadStoreAlignment(Instr); - bool CreateGatherScatter = !ConsecutiveStride; - - VectorParts BlockInMaskParts(UF); - bool isMaskRequired = BlockInMask; - if (isMaskRequired) - for (unsigned Part = 0; Part < UF; ++Part) - BlockInMaskParts[Part] = State.get(BlockInMask, Part); - - const auto CreateVecPtr = [&](unsigned Part, Value *Ptr) -> Value * { - // Calculate the pointer for the specific unroll-part. - GetElementPtrInst *PartPtr = nullptr; - - bool InBounds = false; - if (auto *gep = dyn_cast(Ptr->stripPointerCasts())) - InBounds = gep->isInBounds(); - if (Reverse) { - // If the address is consecutive but reversed, then the - // wide store needs to start at the last vector element. - // RunTimeVF = VScale * VF.getKnownMinValue() - // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue() - Value *RunTimeVF = getRuntimeVF(Builder, Builder.getInt32Ty(), VF); - // NumElt = -Part * RunTimeVF - Value *NumElt = Builder.CreateMul(Builder.getInt32(-Part), RunTimeVF); - // LastLane = 1 - RunTimeVF - Value *LastLane = Builder.CreateSub(Builder.getInt32(1), RunTimeVF); - PartPtr = - cast(Builder.CreateGEP(ScalarDataTy, Ptr, NumElt)); - PartPtr->setIsInBounds(InBounds); - PartPtr = cast( - Builder.CreateGEP(ScalarDataTy, PartPtr, LastLane)); - PartPtr->setIsInBounds(InBounds); - if (isMaskRequired) // Reverse of a null all-one mask is a null mask. - BlockInMaskParts[Part] = reverseVector(BlockInMaskParts[Part]); - } else { - Value *Increment = - createStepForVF(Builder, Builder.getInt32Ty(), VF, Part); - PartPtr = cast( - Builder.CreateGEP(ScalarDataTy, Ptr, Increment)); - PartPtr->setIsInBounds(InBounds); - } - - unsigned AddressSpace = Ptr->getType()->getPointerAddressSpace(); - return Builder.CreateBitCast(PartPtr, DataTy->getPointerTo(AddressSpace)); - }; - - // Handle Stores: - if (SI) { - setDebugLocFromInst(SI); - - for (unsigned Part = 0; Part < UF; ++Part) { - Instruction *NewSI = nullptr; - Value *StoredVal = State.get(StoredValue, Part); - if (CreateGatherScatter) { - Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr; - Value *VectorGep = State.get(Addr, Part); - NewSI = Builder.CreateMaskedScatter(StoredVal, VectorGep, Alignment, - MaskPart); - } else { - if (Reverse) { - // If we store to reverse consecutive memory locations, then we need - // to reverse the order of elements in the stored value. - StoredVal = reverseVector(StoredVal); - // We don't want to update the value in the map as it might be used in - // another expression. So don't call resetVectorValue(StoredVal). - } - auto *VecPtr = CreateVecPtr(Part, State.get(Addr, VPIteration(0, 0))); - if (isMaskRequired) - NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment, - BlockInMaskParts[Part]); - else - NewSI = Builder.CreateAlignedStore(StoredVal, VecPtr, Alignment); - } - addMetadata(NewSI, SI); - } - return; - } - - // Handle loads. - assert(LI && "Must have a load instruction"); - setDebugLocFromInst(LI); - for (unsigned Part = 0; Part < UF; ++Part) { - Value *NewLI; - if (CreateGatherScatter) { - Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr; - Value *VectorGep = State.get(Addr, Part); - NewLI = Builder.CreateMaskedGather(DataTy, VectorGep, Alignment, MaskPart, - nullptr, "wide.masked.gather"); - addMetadata(NewLI, LI); - } else { - auto *VecPtr = CreateVecPtr(Part, State.get(Addr, VPIteration(0, 0))); - if (isMaskRequired) - NewLI = Builder.CreateMaskedLoad( - DataTy, VecPtr, Alignment, BlockInMaskParts[Part], - PoisonValue::get(DataTy), "wide.masked.load"); - else - NewLI = - Builder.CreateAlignedLoad(DataTy, VecPtr, Alignment, "wide.load"); - - // Add metadata to the load, but setVectorValue to the reverse shuffle. - addMetadata(NewLI, LI); - if (Reverse) - NewLI = reverseVector(NewLI); - } - - State.set(Def, NewLI, Part); - } -} - void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, VPReplicateRecipe *RepRecipe, const VPIteration &Instance, @@ -10057,9 +9924,129 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) { VPValue *StoredValue = isStore() ? getStoredValue() : nullptr; - State.ILV->vectorizeMemoryInstruction( - &Ingredient, State, StoredValue ? nullptr : getVPSingleValue(), getAddr(), - StoredValue, getMask(), Consecutive, Reverse); + + // Attempt to issue a wide load. + LoadInst *LI = dyn_cast(&Ingredient); + StoreInst *SI = dyn_cast(&Ingredient); + + assert((LI || SI) && "Invalid Load/Store instruction"); + assert((!SI || StoredValue) && "No stored value provided for widened store"); + assert((!LI || !StoredValue) && "Stored value provided for widened load"); + + Type *ScalarDataTy = getLoadStoreType(&Ingredient); + + auto *DataTy = VectorType::get(ScalarDataTy, State.VF); + const Align Alignment = getLoadStoreAlignment(&Ingredient); + bool CreateGatherScatter = !Consecutive; + + auto &Builder = State.Builder; + InnerLoopVectorizer::VectorParts BlockInMaskParts(State.UF); + bool isMaskRequired = getMask(); + if (isMaskRequired) + for (unsigned Part = 0; Part < State.UF; ++Part) + BlockInMaskParts[Part] = State.get(getMask(), Part); + + const auto CreateVecPtr = [&](unsigned Part, Value *Ptr) -> Value * { + // Calculate the pointer for the specific unroll-part. + GetElementPtrInst *PartPtr = nullptr; + + bool InBounds = false; + if (auto *gep = dyn_cast(Ptr->stripPointerCasts())) + InBounds = gep->isInBounds(); + if (Reverse) { + // If the address is consecutive but reversed, then the + // wide store needs to start at the last vector element. + // RunTimeVF = VScale * VF.getKnownMinValue() + // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue() + Value *RunTimeVF = getRuntimeVF(Builder, Builder.getInt32Ty(), State.VF); + // NumElt = -Part * RunTimeVF + Value *NumElt = Builder.CreateMul(Builder.getInt32(-Part), RunTimeVF); + // LastLane = 1 - RunTimeVF + Value *LastLane = Builder.CreateSub(Builder.getInt32(1), RunTimeVF); + PartPtr = + cast(Builder.CreateGEP(ScalarDataTy, Ptr, NumElt)); + PartPtr->setIsInBounds(InBounds); + PartPtr = cast( + Builder.CreateGEP(ScalarDataTy, PartPtr, LastLane)); + PartPtr->setIsInBounds(InBounds); + if (isMaskRequired) // Reverse of a null all-one mask is a null mask. + BlockInMaskParts[Part] = + Builder.CreateVectorReverse(BlockInMaskParts[Part], "reverse"); + } else { + Value *Increment = + createStepForVF(Builder, Builder.getInt32Ty(), State.VF, Part); + PartPtr = cast( + Builder.CreateGEP(ScalarDataTy, Ptr, Increment)); + PartPtr->setIsInBounds(InBounds); + } + + unsigned AddressSpace = Ptr->getType()->getPointerAddressSpace(); + return Builder.CreateBitCast(PartPtr, DataTy->getPointerTo(AddressSpace)); + }; + + // Handle Stores: + if (SI) { + State.ILV->setDebugLocFromInst(SI); + + for (unsigned Part = 0; Part < State.UF; ++Part) { + Instruction *NewSI = nullptr; + Value *StoredVal = State.get(StoredValue, Part); + if (CreateGatherScatter) { + Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr; + Value *VectorGep = State.get(getAddr(), Part); + NewSI = Builder.CreateMaskedScatter(StoredVal, VectorGep, Alignment, + MaskPart); + } else { + if (Reverse) { + // If we store to reverse consecutive memory locations, then we need + // to reverse the order of elements in the stored value. + StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse"); + // We don't want to update the value in the map as it might be used in + // another expression. So don't call resetVectorValue(StoredVal). + } + auto *VecPtr = + CreateVecPtr(Part, State.get(getAddr(), VPIteration(0, 0))); + if (isMaskRequired) + NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment, + BlockInMaskParts[Part]); + else + NewSI = Builder.CreateAlignedStore(StoredVal, VecPtr, Alignment); + } + State.ILV->addMetadata(NewSI, SI); + } + return; + } + + // Handle loads. + assert(LI && "Must have a load instruction"); + State.ILV->setDebugLocFromInst(LI); + for (unsigned Part = 0; Part < State.UF; ++Part) { + Value *NewLI; + if (CreateGatherScatter) { + Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr; + Value *VectorGep = State.get(getAddr(), Part); + NewLI = Builder.CreateMaskedGather(DataTy, VectorGep, Alignment, MaskPart, + nullptr, "wide.masked.gather"); + State.ILV->addMetadata(NewLI, LI); + } else { + auto *VecPtr = + CreateVecPtr(Part, State.get(getAddr(), VPIteration(0, 0))); + if (isMaskRequired) + NewLI = Builder.CreateMaskedLoad( + DataTy, VecPtr, Alignment, BlockInMaskParts[Part], + PoisonValue::get(DataTy), "wide.masked.load"); + else + NewLI = + Builder.CreateAlignedLoad(DataTy, VecPtr, Alignment, "wide.load"); + + // Add metadata to the load, but setVectorValue to the reverse shuffle. + State.ILV->addMetadata(NewLI, LI); + if (Reverse) + NewLI = Builder.CreateVectorReverse(NewLI, "reverse"); + } + + State.set(getVPSingleValue(), NewLI, Part); + } } // Determine how to lower the scalar epilogue, which depends on 1) optimising