diff --git a/polly/docs/ReleaseNotes.rst b/polly/docs/ReleaseNotes.rst --- a/polly/docs/ReleaseNotes.rst +++ b/polly/docs/ReleaseNotes.rst @@ -10,3 +10,14 @@ These release notes are for the next release of Polly and describe the new features that have recently been committed to our development branch. + + +- Support for -polly-vectorizer=polly has been removed. Polly's internal + vectorizer is not well maintained and is known to not work in some cases + such as region ScopStmts. Unlike LLVM's LoopVectorize pass it also does + not have a target-dependent cost heuristics, and we recommend using + LoopVectorize instead of -polly-vectorizer=polly. + + In the future we hope that Polly can collaborate better with LoopVectorize, + like Polly marking a loop is safe to vectorize with a specific simd width, + instead of replicating its functionality. diff --git a/polly/include/polly/CodeGen/BlockGenerators.h b/polly/include/polly/CodeGen/BlockGenerators.h --- a/polly/include/polly/CodeGen/BlockGenerators.h +++ b/polly/include/polly/CodeGen/BlockGenerators.h @@ -622,186 +622,6 @@ void invalidateScalarEvolution(Scop &S); }; -/// Generate a new vector basic block for a polyhedral statement. -/// -/// The only public function exposed is generate(). -class VectorBlockGenerator final : BlockGenerator { -public: - /// Generate a new vector basic block for a ScoPStmt. - /// - /// This code generation is similar to the normal, scalar code generation, - /// except that each instruction is code generated for several vector lanes - /// at a time. If possible instructions are issued as actual vector - /// instructions, but e.g. for address calculation instructions we currently - /// generate scalar instructions for each vector lane. - /// - /// @param BlockGen A block generator object used as parent. - /// @param Stmt The statement to code generate. - /// @param VLTS A mapping from loops virtual canonical induction - /// variable to their new values - /// (for values recalculated in the new ScoP, but not - /// within this basic block), one for each lane. - /// @param Schedule A map from the statement to a schedule where the - /// innermost dimension is the dimension of the innermost - /// loop containing the statement. - /// @param NewAccesses A map from memory access ids to new ast expressions, - /// which may contain new access expressions for certain - /// memory accesses. - static void generate(BlockGenerator &BlockGen, ScopStmt &Stmt, - std::vector &VLTS, - __isl_keep isl_map *Schedule, - __isl_keep isl_id_to_ast_expr *NewAccesses) { - VectorBlockGenerator Generator(BlockGen, VLTS, Schedule); - Generator.copyStmt(Stmt, NewAccesses); - } - -private: - // This is a vector of loop->scev maps. The first map is used for the first - // vector lane, ... - // Each map, contains information about Instructions in the old ScoP, which - // are recalculated in the new SCoP. When copying the basic block, we replace - // all references to the old instructions with their recalculated values. - // - // For example, when the code generator produces this AST: - // - // for (int c1 = 0; c1 <= 1023; c1 += 1) - // for (int c2 = 0; c2 <= 1023; c2 += VF) - // for (int lane = 0; lane <= VF; lane += 1) - // Stmt(c2 + lane + 3, c1); - // - // VLTS[lane] contains a map: - // "outer loop in the old loop nest" -> SCEV("c2 + lane + 3"), - // "inner loop in the old loop nest" -> SCEV("c1"). - std::vector &VLTS; - - // A map from the statement to a schedule where the innermost dimension is the - // dimension of the innermost loop containing the statement. - isl_map *Schedule; - - VectorBlockGenerator(BlockGenerator &BlockGen, - std::vector &VLTS, - __isl_keep isl_map *Schedule); - - int getVectorWidth(); - - Value *getVectorValue(ScopStmt &Stmt, Value *Old, ValueMapT &VectorMap, - VectorValueMapT &ScalarMaps, Loop *L); - - /// Load a vector from a set of adjacent scalars - /// - /// In case a set of scalars is known to be next to each other in memory, - /// create a vector load that loads those scalars - /// - /// %vector_ptr= bitcast double* %p to <4 x double>* - /// %vec_full = load <4 x double>* %vector_ptr - /// - /// @param Stmt The statement to code generate. - /// @param NegativeStride This is used to indicate a -1 stride. In such - /// a case we load the end of a base address and - /// shuffle the accesses in reverse order into the - /// vector. By default we would do only positive - /// strides. - /// - /// @param NewAccesses A map from memory access ids to new ast - /// expressions, which may contain new access - /// expressions for certain memory accesses. - Value *generateStrideOneLoad(ScopStmt &Stmt, LoadInst *Load, - VectorValueMapT &ScalarMaps, - __isl_keep isl_id_to_ast_expr *NewAccesses, - bool NegativeStride); - - /// Load a vector initialized from a single scalar in memory - /// - /// In case all elements of a vector are initialized to the same - /// scalar value, this value is loaded and shuffled into all elements - /// of the vector. - /// - /// %splat_one = load <1 x double>* %p - /// %splat = shufflevector <1 x double> %splat_one, <1 x - /// double> %splat_one, <4 x i32> zeroinitializer - /// - /// @param NewAccesses A map from memory access ids to new ast expressions, - /// which may contain new access expressions for certain - /// memory accesses. - Value *generateStrideZeroLoad(ScopStmt &Stmt, LoadInst *Load, - ValueMapT &BBMap, - __isl_keep isl_id_to_ast_expr *NewAccesses); - - /// Load a vector from scalars distributed in memory - /// - /// In case some scalars a distributed randomly in memory. Create a vector - /// by loading each scalar and by inserting one after the other into the - /// vector. - /// - /// %scalar_1= load double* %p_1 - /// %vec_1 = insertelement <2 x double> undef, double %scalar_1, i32 0 - /// %scalar 2 = load double* %p_2 - /// %vec_2 = insertelement <2 x double> %vec_1, double %scalar_1, i32 1 - /// - /// @param NewAccesses A map from memory access ids to new ast expressions, - /// which may contain new access expressions for certain - /// memory accesses. - Value *generateUnknownStrideLoad(ScopStmt &Stmt, LoadInst *Load, - VectorValueMapT &ScalarMaps, - __isl_keep isl_id_to_ast_expr *NewAccesses); - - /// @param NewAccesses A map from memory access ids to new ast expressions, - /// which may contain new access expressions for certain - /// memory accesses. - void generateLoad(ScopStmt &Stmt, LoadInst *Load, ValueMapT &VectorMap, - VectorValueMapT &ScalarMaps, - __isl_keep isl_id_to_ast_expr *NewAccesses); - - void copyUnaryInst(ScopStmt &Stmt, UnaryInstruction *Inst, - ValueMapT &VectorMap, VectorValueMapT &ScalarMaps); - - void copyBinaryInst(ScopStmt &Stmt, BinaryOperator *Inst, - ValueMapT &VectorMap, VectorValueMapT &ScalarMaps); - - /// @param NewAccesses A map from memory access ids to new ast expressions, - /// which may contain new access expressions for certain - /// memory accesses. - void copyStore(ScopStmt &Stmt, StoreInst *Store, ValueMapT &VectorMap, - VectorValueMapT &ScalarMaps, - __isl_keep isl_id_to_ast_expr *NewAccesses); - - /// @param NewAccesses A map from memory access ids to new ast expressions, - /// which may contain new access expressions for certain - /// memory accesses. - void copyInstScalarized(ScopStmt &Stmt, Instruction *Inst, - ValueMapT &VectorMap, VectorValueMapT &ScalarMaps, - __isl_keep isl_id_to_ast_expr *NewAccesses); - - bool extractScalarValues(const Instruction *Inst, ValueMapT &VectorMap, - VectorValueMapT &ScalarMaps); - - bool hasVectorOperands(const Instruction *Inst, ValueMapT &VectorMap); - - /// Generate vector loads for scalars. - /// - /// @param Stmt The scop statement for which to generate the loads. - /// @param VectorBlockMap A map that will be updated to relate the original - /// values with the newly generated vector loads. - void generateScalarVectorLoads(ScopStmt &Stmt, ValueMapT &VectorBlockMap); - - /// Verify absence of scalar stores. - /// - /// @param Stmt The scop statement to check for scalar stores. - void verifyNoScalarStores(ScopStmt &Stmt); - - /// @param NewAccesses A map from memory access ids to new ast expressions, - /// which may contain new access expressions for certain - /// memory accesses. - void copyInstruction(ScopStmt &Stmt, Instruction *Inst, ValueMapT &VectorMap, - VectorValueMapT &ScalarMaps, - __isl_keep isl_id_to_ast_expr *NewAccesses); - - /// @param NewAccesses A map from memory access ids to new ast expressions, - /// which may contain new access expressions for certain - /// memory accesses. - void copyStmt(ScopStmt &Stmt, __isl_keep isl_id_to_ast_expr *NewAccesses); -}; - /// Generator for new versions of polyhedral region statements. class RegionGenerator final : BlockGenerator { public: diff --git a/polly/include/polly/CodeGen/CodeGeneration.h b/polly/include/polly/CodeGen/CodeGeneration.h --- a/polly/include/polly/CodeGen/CodeGeneration.h +++ b/polly/include/polly/CodeGen/CodeGeneration.h @@ -18,7 +18,6 @@ enum VectorizerChoice { VECTORIZER_NONE, VECTORIZER_STRIPMINE, - VECTORIZER_POLLY, }; extern VectorizerChoice PollyVectorizerChoice; diff --git a/polly/include/polly/CodeGen/IslNodeBuilder.h b/polly/include/polly/CodeGen/IslNodeBuilder.h --- a/polly/include/polly/CodeGen/IslNodeBuilder.h +++ b/polly/include/polly/CodeGen/IslNodeBuilder.h @@ -310,7 +310,6 @@ /// @returns False, iff a problem occurred and the load was not preloaded. bool preloadInvariantEquivClass(InvariantEquivClassTy &IAClass); - void createForVector(__isl_take isl_ast_node *For, int VectorWidth); void createForSequential(isl::ast_node_for For, bool MarkParallel); /// Create LLVM-IR that executes a for node thread parallel. @@ -375,10 +374,6 @@ std::vector &IVS, __isl_take isl_id *IteratorID); virtual void createIf(__isl_take isl_ast_node *If); - void createUserVector(__isl_take isl_ast_node *User, - std::vector &IVS, - __isl_take isl_id *IteratorID, - __isl_take isl_union_map *Schedule); virtual void createUser(__isl_take isl_ast_node *User); virtual void createBlock(__isl_take isl_ast_node *Block); diff --git a/polly/lib/CodeGen/BlockGenerators.cpp b/polly/lib/CodeGen/BlockGenerators.cpp --- a/polly/lib/CodeGen/BlockGenerators.cpp +++ b/polly/lib/CodeGen/BlockGenerators.cpp @@ -1009,391 +1009,6 @@ invalidateScalarEvolution(S); } -VectorBlockGenerator::VectorBlockGenerator(BlockGenerator &BlockGen, - std::vector &VLTS, - isl_map *Schedule) - : BlockGenerator(BlockGen), VLTS(VLTS), Schedule(Schedule) { - assert(Schedule && "No statement domain provided"); -} - -Value *VectorBlockGenerator::getVectorValue(ScopStmt &Stmt, Value *Old, - ValueMapT &VectorMap, - VectorValueMapT &ScalarMaps, - Loop *L) { - if (Value *NewValue = VectorMap.lookup(Old)) - return NewValue; - - int Width = getVectorWidth(); - - Value *Vector = UndefValue::get(FixedVectorType::get(Old->getType(), Width)); - - for (int Lane = 0; Lane < Width; Lane++) - Vector = Builder.CreateInsertElement( - Vector, getNewValue(Stmt, Old, ScalarMaps[Lane], VLTS[Lane], L), - Builder.getInt32(Lane)); - - VectorMap[Old] = Vector; - - return Vector; -} - -Value *VectorBlockGenerator::generateStrideOneLoad( - ScopStmt &Stmt, LoadInst *Load, VectorValueMapT &ScalarMaps, - __isl_keep isl_id_to_ast_expr *NewAccesses, bool NegativeStride = false) { - unsigned VectorWidth = getVectorWidth(); - Type *VectorType = FixedVectorType::get(Load->getType(), VectorWidth); - Type *VectorPtrType = - PointerType::get(VectorType, Load->getPointerAddressSpace()); - unsigned Offset = NegativeStride ? VectorWidth - 1 : 0; - - Value *NewPointer = generateLocationAccessed(Stmt, Load, ScalarMaps[Offset], - VLTS[Offset], NewAccesses); - Value *VectorPtr = - Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr"); - LoadInst *VecLoad = Builder.CreateLoad(VectorType, VectorPtr, - Load->getName() + "_p_vec_full"); - if (!Aligned) - VecLoad->setAlignment(Align(8)); - - if (NegativeStride) { - SmallVector Indices; - for (int i = VectorWidth - 1; i >= 0; i--) - Indices.push_back(ConstantInt::get(Builder.getInt32Ty(), i)); - Constant *SV = llvm::ConstantVector::get(Indices); - Value *RevVecLoad = Builder.CreateShuffleVector( - VecLoad, VecLoad, SV, Load->getName() + "_reverse"); - return RevVecLoad; - } - - return VecLoad; -} - -Value *VectorBlockGenerator::generateStrideZeroLoad( - ScopStmt &Stmt, LoadInst *Load, ValueMapT &BBMap, - __isl_keep isl_id_to_ast_expr *NewAccesses) { - Type *VectorType = FixedVectorType::get(Load->getType(), 1); - Type *VectorPtrType = - PointerType::get(VectorType, Load->getPointerAddressSpace()); - Value *NewPointer = - generateLocationAccessed(Stmt, Load, BBMap, VLTS[0], NewAccesses); - Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType, - Load->getName() + "_p_vec_p"); - LoadInst *ScalarLoad = Builder.CreateLoad(VectorType, VectorPtr, - Load->getName() + "_p_splat_one"); - - if (!Aligned) - ScalarLoad->setAlignment(Align(8)); - - Constant *SplatVector = Constant::getNullValue( - FixedVectorType::get(Builder.getInt32Ty(), getVectorWidth())); - - Value *VectorLoad = Builder.CreateShuffleVector( - ScalarLoad, ScalarLoad, SplatVector, Load->getName() + "_p_splat"); - return VectorLoad; -} - -Value *VectorBlockGenerator::generateUnknownStrideLoad( - ScopStmt &Stmt, LoadInst *Load, VectorValueMapT &ScalarMaps, - __isl_keep isl_id_to_ast_expr *NewAccesses) { - int VectorWidth = getVectorWidth(); - Type *ElemTy = Load->getType(); - auto *FVTy = FixedVectorType::get(ElemTy, VectorWidth); - - Value *Vector = UndefValue::get(FVTy); - - for (int i = 0; i < VectorWidth; i++) { - Value *NewPointer = generateLocationAccessed(Stmt, Load, ScalarMaps[i], - VLTS[i], NewAccesses); - Value *ScalarLoad = - Builder.CreateLoad(ElemTy, NewPointer, Load->getName() + "_p_scalar_"); - Vector = Builder.CreateInsertElement( - Vector, ScalarLoad, Builder.getInt32(i), Load->getName() + "_p_vec_"); - } - - return Vector; -} - -void VectorBlockGenerator::generateLoad( - ScopStmt &Stmt, LoadInst *Load, ValueMapT &VectorMap, - VectorValueMapT &ScalarMaps, __isl_keep isl_id_to_ast_expr *NewAccesses) { - if (Value *PreloadLoad = GlobalMap.lookup(Load)) { - VectorMap[Load] = Builder.CreateVectorSplat(getVectorWidth(), PreloadLoad, - Load->getName() + "_p"); - return; - } - - if (!VectorType::isValidElementType(Load->getType())) { - for (int i = 0; i < getVectorWidth(); i++) - ScalarMaps[i][Load] = - generateArrayLoad(Stmt, Load, ScalarMaps[i], VLTS[i], NewAccesses); - return; - } - - const MemoryAccess &Access = Stmt.getArrayAccessFor(Load); - - // Make sure we have scalar values available to access the pointer to - // the data location. - extractScalarValues(Load, VectorMap, ScalarMaps); - - Value *NewLoad; - if (Access.isStrideZero(isl::manage_copy(Schedule))) - NewLoad = generateStrideZeroLoad(Stmt, Load, ScalarMaps[0], NewAccesses); - else if (Access.isStrideOne(isl::manage_copy(Schedule))) - NewLoad = generateStrideOneLoad(Stmt, Load, ScalarMaps, NewAccesses); - else if (Access.isStrideX(isl::manage_copy(Schedule), -1)) - NewLoad = generateStrideOneLoad(Stmt, Load, ScalarMaps, NewAccesses, true); - else - NewLoad = generateUnknownStrideLoad(Stmt, Load, ScalarMaps, NewAccesses); - - VectorMap[Load] = NewLoad; -} - -void VectorBlockGenerator::copyUnaryInst(ScopStmt &Stmt, UnaryInstruction *Inst, - ValueMapT &VectorMap, - VectorValueMapT &ScalarMaps) { - int VectorWidth = getVectorWidth(); - Value *NewOperand = getVectorValue(Stmt, Inst->getOperand(0), VectorMap, - ScalarMaps, getLoopForStmt(Stmt)); - - assert(isa(Inst) && "Can not generate vector code for instruction"); - - const CastInst *Cast = dyn_cast(Inst); - auto *DestType = FixedVectorType::get(Inst->getType(), VectorWidth); - VectorMap[Inst] = Builder.CreateCast(Cast->getOpcode(), NewOperand, DestType); -} - -void VectorBlockGenerator::copyBinaryInst(ScopStmt &Stmt, BinaryOperator *Inst, - ValueMapT &VectorMap, - VectorValueMapT &ScalarMaps) { - Loop *L = getLoopForStmt(Stmt); - Value *OpZero = Inst->getOperand(0); - Value *OpOne = Inst->getOperand(1); - - Value *NewOpZero, *NewOpOne; - NewOpZero = getVectorValue(Stmt, OpZero, VectorMap, ScalarMaps, L); - NewOpOne = getVectorValue(Stmt, OpOne, VectorMap, ScalarMaps, L); - - Value *NewInst = Builder.CreateBinOp(Inst->getOpcode(), NewOpZero, NewOpOne, - Inst->getName() + "p_vec"); - VectorMap[Inst] = NewInst; -} - -void VectorBlockGenerator::copyStore( - ScopStmt &Stmt, StoreInst *Store, ValueMapT &VectorMap, - VectorValueMapT &ScalarMaps, __isl_keep isl_id_to_ast_expr *NewAccesses) { - const MemoryAccess &Access = Stmt.getArrayAccessFor(Store); - - Value *Vector = getVectorValue(Stmt, Store->getValueOperand(), VectorMap, - ScalarMaps, getLoopForStmt(Stmt)); - - // Make sure we have scalar values available to access the pointer to - // the data location. - extractScalarValues(Store, VectorMap, ScalarMaps); - - if (Access.isStrideOne(isl::manage_copy(Schedule))) { - Type *VectorType = FixedVectorType::get(Store->getValueOperand()->getType(), - getVectorWidth()); - Type *VectorPtrType = - PointerType::get(VectorType, Store->getPointerAddressSpace()); - Value *NewPointer = generateLocationAccessed(Stmt, Store, ScalarMaps[0], - VLTS[0], NewAccesses); - - Value *VectorPtr = - Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr"); - StoreInst *Store = Builder.CreateStore(Vector, VectorPtr); - - if (!Aligned) - Store->setAlignment(Align(8)); - } else { - for (unsigned i = 0; i < ScalarMaps.size(); i++) { - Value *Scalar = Builder.CreateExtractElement(Vector, Builder.getInt32(i)); - Value *NewPointer = generateLocationAccessed(Stmt, Store, ScalarMaps[i], - VLTS[i], NewAccesses); - Builder.CreateStore(Scalar, NewPointer); - } - } -} - -bool VectorBlockGenerator::hasVectorOperands(const Instruction *Inst, - ValueMapT &VectorMap) { - for (Value *Operand : Inst->operands()) - if (VectorMap.count(Operand)) - return true; - return false; -} - -bool VectorBlockGenerator::extractScalarValues(const Instruction *Inst, - ValueMapT &VectorMap, - VectorValueMapT &ScalarMaps) { - bool HasVectorOperand = false; - int VectorWidth = getVectorWidth(); - - for (Value *Operand : Inst->operands()) { - ValueMapT::iterator VecOp = VectorMap.find(Operand); - - if (VecOp == VectorMap.end()) - continue; - - HasVectorOperand = true; - Value *NewVector = VecOp->second; - - for (int i = 0; i < VectorWidth; ++i) { - ValueMapT &SM = ScalarMaps[i]; - - // If there is one scalar extracted, all scalar elements should have - // already been extracted by the code here. So no need to check for the - // existence of all of them. - if (SM.count(Operand)) - break; - - SM[Operand] = - Builder.CreateExtractElement(NewVector, Builder.getInt32(i)); - } - } - - return HasVectorOperand; -} - -void VectorBlockGenerator::copyInstScalarized( - ScopStmt &Stmt, Instruction *Inst, ValueMapT &VectorMap, - VectorValueMapT &ScalarMaps, __isl_keep isl_id_to_ast_expr *NewAccesses) { - bool HasVectorOperand; - int VectorWidth = getVectorWidth(); - - HasVectorOperand = extractScalarValues(Inst, VectorMap, ScalarMaps); - - for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++) - BlockGenerator::copyInstruction(Stmt, Inst, ScalarMaps[VectorLane], - VLTS[VectorLane], NewAccesses); - - if (!VectorType::isValidElementType(Inst->getType()) || !HasVectorOperand) - return; - - // Make the result available as vector value. - auto *FVTy = FixedVectorType::get(Inst->getType(), VectorWidth); - Value *Vector = UndefValue::get(FVTy); - - for (int i = 0; i < VectorWidth; i++) - Vector = Builder.CreateInsertElement(Vector, ScalarMaps[i][Inst], - Builder.getInt32(i)); - - VectorMap[Inst] = Vector; -} - -int VectorBlockGenerator::getVectorWidth() { return VLTS.size(); } - -void VectorBlockGenerator::copyInstruction( - ScopStmt &Stmt, Instruction *Inst, ValueMapT &VectorMap, - VectorValueMapT &ScalarMaps, __isl_keep isl_id_to_ast_expr *NewAccesses) { - // Terminator instructions control the control flow. They are explicitly - // expressed in the clast and do not need to be copied. - if (Inst->isTerminator()) - return; - - if (canSyntheziseInStmt(Stmt, Inst)) - return; - - if (auto *Load = dyn_cast(Inst)) { - generateLoad(Stmt, Load, VectorMap, ScalarMaps, NewAccesses); - return; - } - - if (hasVectorOperands(Inst, VectorMap)) { - if (auto *Store = dyn_cast(Inst)) { - // Identified as redundant by -polly-simplify. - if (!Stmt.getArrayAccessOrNULLFor(Store)) - return; - - copyStore(Stmt, Store, VectorMap, ScalarMaps, NewAccesses); - return; - } - - if (auto *Unary = dyn_cast(Inst)) { - copyUnaryInst(Stmt, Unary, VectorMap, ScalarMaps); - return; - } - - if (auto *Binary = dyn_cast(Inst)) { - copyBinaryInst(Stmt, Binary, VectorMap, ScalarMaps); - return; - } - - // Fallthrough: We generate scalar instructions, if we don't know how to - // generate vector code. - } - - copyInstScalarized(Stmt, Inst, VectorMap, ScalarMaps, NewAccesses); -} - -void VectorBlockGenerator::generateScalarVectorLoads( - ScopStmt &Stmt, ValueMapT &VectorBlockMap) { - for (MemoryAccess *MA : Stmt) { - if (MA->isArrayKind() || MA->isWrite()) - continue; - - auto *Address = getOrCreateAlloca(*MA); - Type *VectorType = FixedVectorType::get(MA->getElementType(), 1); - Type *VectorPtrType = PointerType::get( - VectorType, Address->getType()->getPointerAddressSpace()); - Value *VectorPtr = Builder.CreateBitCast(Address, VectorPtrType, - Address->getName() + "_p_vec_p"); - auto *Val = Builder.CreateLoad(VectorType, VectorPtr, - Address->getName() + ".reload"); - Constant *SplatVector = Constant::getNullValue( - FixedVectorType::get(Builder.getInt32Ty(), getVectorWidth())); - - Value *VectorVal = Builder.CreateShuffleVector( - Val, Val, SplatVector, Address->getName() + "_p_splat"); - VectorBlockMap[MA->getAccessValue()] = VectorVal; - } -} - -void VectorBlockGenerator::verifyNoScalarStores(ScopStmt &Stmt) { - for (MemoryAccess *MA : Stmt) { - if (MA->isArrayKind() || MA->isRead()) - continue; - - llvm_unreachable("Scalar stores not expected in vector loop"); - } -} - -void VectorBlockGenerator::copyStmt( - ScopStmt &Stmt, __isl_keep isl_id_to_ast_expr *NewAccesses) { - assert(Stmt.isBlockStmt() && - "TODO: Only block statements can be copied by the vector block " - "generator"); - - BasicBlock *BB = Stmt.getBasicBlock(); - BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(), - &*Builder.GetInsertPoint(), &DT, &LI); - CopyBB->setName("polly.stmt." + BB->getName()); - Builder.SetInsertPoint(&CopyBB->front()); - - // Create two maps that store the mapping from the original instructions of - // the old basic block to their copies in the new basic block. Those maps - // are basic block local. - // - // As vector code generation is supported there is one map for scalar values - // and one for vector values. - // - // In case we just do scalar code generation, the vectorMap is not used and - // the scalarMap has just one dimension, which contains the mapping. - // - // In case vector code generation is done, an instruction may either appear - // in the vector map once (as it is calculating >vectorwidth< values at a - // time. Or (if the values are calculated using scalar operations), it - // appears once in every dimension of the scalarMap. - VectorValueMapT ScalarBlockMap(getVectorWidth()); - ValueMapT VectorBlockMap; - - generateScalarVectorLoads(Stmt, VectorBlockMap); - - for (Instruction *Inst : Stmt.getInstructions()) - copyInstruction(Stmt, Inst, VectorBlockMap, ScalarBlockMap, NewAccesses); - - verifyNoScalarStores(Stmt); -} - BasicBlock *RegionGenerator::repairDominance(BasicBlock *BB, BasicBlock *BBCopy) { diff --git a/polly/lib/CodeGen/IslNodeBuilder.cpp b/polly/lib/CodeGen/IslNodeBuilder.cpp --- a/polly/lib/CodeGen/IslNodeBuilder.cpp +++ b/polly/lib/CodeGen/IslNodeBuilder.cpp @@ -390,30 +390,6 @@ return It->second; } -void IslNodeBuilder::createUserVector(__isl_take isl_ast_node *User, - std::vector &IVS, - __isl_take isl_id *IteratorID, - __isl_take isl_union_map *Schedule) { - isl_ast_expr *Expr = isl_ast_node_user_get_expr(User); - isl_ast_expr *StmtExpr = isl_ast_expr_get_op_arg(Expr, 0); - isl_id *Id = isl_ast_expr_get_id(StmtExpr); - isl_ast_expr_free(StmtExpr); - ScopStmt *Stmt = (ScopStmt *)isl_id_get_user(Id); - std::vector VLTS(IVS.size()); - - isl_union_set *Domain = isl_union_set_from_set(Stmt->getDomain().release()); - Schedule = isl_union_map_intersect_domain(Schedule, Domain); - isl_map *S = isl_map_from_union_map(Schedule); - - auto *NewAccesses = createNewAccesses(Stmt, User); - createSubstitutionsVector(Expr, Stmt, VLTS, IVS, IteratorID); - VectorBlockGenerator::generate(BlockGen, *Stmt, VLTS, S, NewAccesses); - isl_id_to_ast_expr_free(NewAccesses); - isl_map_free(S); - isl_id_free(Id); - isl_ast_node_free(User); -} - void IslNodeBuilder::createMark(__isl_take isl_ast_node *Node) { auto *Id = isl_ast_node_mark_get_id(Node); auto Child = isl_ast_node_mark_get_node(Node); @@ -422,13 +398,7 @@ // it will be optimized away and we should skip it. if (strcmp(isl_id_get_name(Id), "SIMD") == 0 && isl_ast_node_get_type(Child) == isl_ast_node_for) { - bool Vector = PollyVectorizerChoice == VECTORIZER_POLLY; - int VectorWidth = - getNumberOfIterations(isl::manage_copy(Child).as()); - if (Vector && 1 < VectorWidth && VectorWidth <= 16) - createForVector(Child, VectorWidth); - else - createForSequential(isl::manage(Child).as(), true); + createForSequential(isl::manage(Child).as(), true); isl_id_free(Id); return; } @@ -456,67 +426,6 @@ isl_id_free(Id); } -void IslNodeBuilder::createForVector(__isl_take isl_ast_node *For, - int VectorWidth) { - isl_ast_node *Body = isl_ast_node_for_get_body(For); - isl_ast_expr *Init = isl_ast_node_for_get_init(For); - isl_ast_expr *Inc = isl_ast_node_for_get_inc(For); - isl_ast_expr *Iterator = isl_ast_node_for_get_iterator(For); - isl_id *IteratorID = isl_ast_expr_get_id(Iterator); - - Value *ValueLB = ExprBuilder.create(Init); - Value *ValueInc = ExprBuilder.create(Inc); - - Type *MaxType = ExprBuilder.getType(Iterator); - MaxType = ExprBuilder.getWidestType(MaxType, ValueLB->getType()); - MaxType = ExprBuilder.getWidestType(MaxType, ValueInc->getType()); - - if (MaxType != ValueLB->getType()) - ValueLB = Builder.CreateSExt(ValueLB, MaxType); - if (MaxType != ValueInc->getType()) - ValueInc = Builder.CreateSExt(ValueInc, MaxType); - - std::vector IVS(VectorWidth); - IVS[0] = ValueLB; - - for (int i = 1; i < VectorWidth; i++) - IVS[i] = Builder.CreateAdd(IVS[i - 1], ValueInc, "p_vector_iv"); - - isl::union_map Schedule = getScheduleForAstNode(isl::manage_copy(For)); - assert(!Schedule.is_null() && - "For statement annotation does not contain its schedule"); - - IDToValue[IteratorID] = ValueLB; - - switch (isl_ast_node_get_type(Body)) { - case isl_ast_node_user: - createUserVector(Body, IVS, isl_id_copy(IteratorID), Schedule.copy()); - break; - case isl_ast_node_block: { - isl_ast_node_list *List = isl_ast_node_block_get_children(Body); - - for (int i = 0; i < isl_ast_node_list_n_ast_node(List); ++i) - createUserVector(isl_ast_node_list_get_ast_node(List, i), IVS, - isl_id_copy(IteratorID), Schedule.copy()); - - isl_ast_node_free(Body); - isl_ast_node_list_free(List); - break; - } - default: - isl_ast_node_dump(Body); - llvm_unreachable("Unhandled isl_ast_node in vectorizer"); - } - - IDToValue.erase(IDToValue.find(IteratorID)); - isl_id_free(IteratorID); - - isl_ast_node_free(For); - isl_ast_expr_free(Iterator); - - VectorLoops++; -} - /// Restore the initial ordering of dimensions of the band node /// /// In case the band node represents all the dimensions of the iteration @@ -761,46 +670,7 @@ ParallelLoops++; } -/// Return whether any of @p Node's statements contain partial accesses. -/// -/// Partial accesses are not supported by Polly's vector code generator. -static bool hasPartialAccesses(__isl_take isl_ast_node *Node) { - return isl_ast_node_foreach_descendant_top_down( - Node, - [](isl_ast_node *Node, void *User) -> isl_bool { - if (isl_ast_node_get_type(Node) != isl_ast_node_user) - return isl_bool_true; - - isl::ast_expr Expr = - isl::manage(isl_ast_node_user_get_expr(Node)); - isl::ast_expr StmtExpr = Expr.get_op_arg(0); - isl::id Id = StmtExpr.get_id(); - - ScopStmt *Stmt = - static_cast(isl_id_get_user(Id.get())); - isl::set StmtDom = Stmt->getDomain(); - for (auto *MA : *Stmt) { - if (MA->isLatestPartialAccess()) - return isl_bool_error; - } - return isl_bool_true; - }, - nullptr) == isl_stat_error; -} - void IslNodeBuilder::createFor(__isl_take isl_ast_node *For) { - bool Vector = PollyVectorizerChoice == VECTORIZER_POLLY; - - if (Vector && IslAstInfo::isInnermostParallel(isl::manage_copy(For)) && - !IslAstInfo::isReductionParallel(isl::manage_copy(For))) { - int VectorWidth = - getNumberOfIterations(isl::manage_copy(For).as()); - if (1 < VectorWidth && VectorWidth <= 16 && !hasPartialAccesses(For)) { - createForVector(For, VectorWidth); - return; - } - } - if (IslAstInfo::isExecutedInParallel(isl::manage_copy(For))) { createForParallel(For); return; diff --git a/polly/lib/Support/RegisterPasses.cpp b/polly/lib/Support/RegisterPasses.cpp --- a/polly/lib/Support/RegisterPasses.cpp +++ b/polly/lib/Support/RegisterPasses.cpp @@ -103,7 +103,6 @@ "polly-vectorizer", cl::desc("Select the vectorization strategy"), cl::values( clEnumValN(VECTORIZER_NONE, "none", "No Vectorization"), - clEnumValN(VECTORIZER_POLLY, "polly", "Polly internal vectorizer"), clEnumValN( VECTORIZER_STRIPMINE, "stripmine", "Strip-mine outer loops for the loop-vectorizer to trigger")), diff --git a/polly/test/CodeGen/20130211-getNumberOfIterations.ll b/polly/test/CodeGen/20130211-getNumberOfIterations.ll deleted file mode 100644 --- a/polly/test/CodeGen/20130211-getNumberOfIterations.ll +++ /dev/null @@ -1,27 +0,0 @@ -; RUN: opt %loadPolly -polly-codegen -polly-vectorizer=polly < %s - -; This test case checks that the polly vectorizer does not crash when -; calculating the number of iterations. -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" - -@b = external global [2048 x i64], align 16 - -define void @foo(i64 %n) { -entry: - br label %for.cond - -for.cond: ; preds = %for.body, %entry - %indvar = phi i64 [ 0, %entry ], [ %inc, %for.body ] - %cmp = icmp slt i64 %indvar, %n - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %arrayidx = getelementptr inbounds [2048 x i64], ptr @b, i64 0, i64 %indvar - store i64 1, ptr %arrayidx - %inc = add nsw i64 %indvar, 1 - br label %for.cond - -for.end: ; preds = %for.cond - ret void -} - diff --git a/polly/test/CodeGen/MemAccess/simple_analyze.ll b/polly/test/CodeGen/MemAccess/simple_analyze.ll --- a/polly/test/CodeGen/MemAccess/simple_analyze.ll +++ b/polly/test/CodeGen/MemAccess/simple_analyze.ll @@ -1,5 +1,4 @@ ;RUN: opt %loadPolly -polly-print-import-jscop -polly-import-jscop-postfix=transformed -disable-output < %s | FileCheck %s -;RUN: opt %loadPolly -polly-import-jscop -polly-import-jscop-postfix=transformed -polly-codegen -polly-vectorizer=polly -S < %s | FileCheck %s --check-prefix=JSCOPVEC target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32" @A = common global [100 x i32] zeroinitializer, align 4 @@ -46,31 +45,3 @@ } ; CHECK-DAG: New access function '{ Stmt_for_body7[i0] -> MemRef_B[0] }' detected in JSCOP file ; CHECK-DAG: New access function '{ Stmt_for_body[i0] -> MemRef_A[0] }' detected in JSCOP file - -; Verify that the new access function (see above) is actually used during vector code generation. - -; JSCOPVEC: store i32 0, ptr @B -; JSCOPVEC: store i32 1, ptr @B -; JSCOPVEC: store i32 2, ptr @B -; JSCOPVEC: store i32 3, ptr @B -; JSCOPVEC: store i32 4, ptr @B -; JSCOPVEC: store i32 5, ptr @B -; JSCOPVEC: store i32 6, ptr @B -; JSCOPVEC: store i32 7, ptr @B -; JSCOPVEC: store i32 8, ptr @B -; JSCOPVEC: store i32 9, ptr @B -; JSCOPVEC: store i32 10, ptr @B -; JSCOPVEC: store i32 11, ptr @B - -; JSCOPVEC: store i32 0, ptr @A -; JSCOPVEC: store i32 1, ptr @A -; JSCOPVEC: store i32 2, ptr @A -; JSCOPVEC: store i32 3, ptr @A -; JSCOPVEC: store i32 4, ptr @A -; JSCOPVEC: store i32 5, ptr @A -; JSCOPVEC: store i32 6, ptr @A -; JSCOPVEC: store i32 7, ptr @A -; JSCOPVEC: store i32 8, ptr @A -; JSCOPVEC: store i32 9, ptr @A -; JSCOPVEC: store i32 10, ptr @A -; JSCOPVEC: store i32 11, ptr @A diff --git a/polly/test/CodeGen/MemAccess/simple_stride_test.ll b/polly/test/CodeGen/MemAccess/simple_stride_test.ll deleted file mode 100644 --- a/polly/test/CodeGen/MemAccess/simple_stride_test.ll +++ /dev/null @@ -1,47 +0,0 @@ -; RUN: opt -opaque-pointers=0 %loadPolly -basic-aa -polly-import-jscop -polly-codegen -polly-vectorizer=polly -S < %s | FileCheck %s -; -; Check that we use the correct __new__ strides: -; stride zero for B -; stride one for A -; -; CHECK: %polly.access.B = getelementptr i32, i32* %B, i64 0 -; CHECK: %[[BC:[._a-zA-Z0-9]*]] = bitcast i32* %polly.access.B to <1 x i32>* -; CHECK: %[[LD:[._a-zA-Z0-9]*]] = load <1 x i32>, <1 x i32>* %[[BC]], align 8 -; CHECK: %[[SV:[._a-zA-Z0-9]*]] = shufflevector <1 x i32> %[[LD]], <1 x i32> %[[LD]], <16 x i32> zeroinitializer -; -; CHECK: %polly.access.A = getelementptr i32, i32* %A, i64 0 -; CHECK: %[[VP:[._a-zA-Z0-9]*]] = bitcast i32* %polly.access.A to <16 x i32>* -; CHECK: store <16 x i32> %[[SV]], <16 x i32>* %[[VP]], align 8 -; -; void simple_stride(int *restrict A, int *restrict B) { -; for (int i = 0; i < 16; i++) -; A[i * 2] = B[i * 2]; -; } -; -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - -define void @simple_stride(i32* noalias %A, i32* noalias %B) { -entry: - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] - %exitcond = icmp ne i64 %indvars.iv, 16 - br i1 %exitcond, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %tmp = shl nsw i64 %indvars.iv, 1 - %arrayidx = getelementptr inbounds i32, i32* %B, i64 %tmp - %tmp4 = load i32, i32* %arrayidx, align 4 - %tmp5 = shl nsw i64 %indvars.iv, 1 - %arrayidx3 = getelementptr inbounds i32, i32* %A, i64 %tmp5 - store i32 %tmp4, i32* %arrayidx3, align 4 - br label %for.inc - -for.inc: ; preds = %for.body - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - br label %for.cond - -for.end: ; preds = %for.cond - ret void -} diff --git a/polly/test/CodeGen/OpenMP/scev-rewriting.ll b/polly/test/CodeGen/OpenMP/scev-rewriting.ll --- a/polly/test/CodeGen/OpenMP/scev-rewriting.ll +++ b/polly/test/CodeGen/OpenMP/scev-rewriting.ll @@ -1,4 +1,4 @@ -; RUN: opt %loadPolly < %s -polly-vectorizer=polly -polly-parallel -polly-parallel-force -polly-process-unprofitable -polly-codegen -S | FileCheck %s +; RUN: opt %loadPolly < %s -polly-vectorizer=stripmine -polly-parallel -polly-parallel-force -polly-process-unprofitable -polly-codegen -S | FileCheck %s ; CHECK: define internal void @DoStringSort_polly_subfn target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64-unknown-linux-gnueabi" diff --git a/polly/test/CodeGen/getNumberOfIterations.ll b/polly/test/CodeGen/getNumberOfIterations.ll deleted file mode 100644 --- a/polly/test/CodeGen/getNumberOfIterations.ll +++ /dev/null @@ -1,39 +0,0 @@ -; RUN: opt -opaque-pointers=0 %loadPolly -polly-vectorizer=polly -polly-codegen \ -; RUN: < %s -S | FileCheck %s - -; #pragma known-parallel -; for (int c0 = 0; c0 <= min(15, N - 1); c0 += 1) -; Stmt_if_then(c0); - -; CHECK: polly.stmt.if.then: ; preds = %polly.loop_header -; CHECK: %p_conv = sitofp i64 %polly.indvar to float -; CHECK: %scevgep = getelementptr float, float* %A, i64 %polly.indvar -; CHECK: %_p_scalar_ = load float, float* %scevgep, align 4, !alias.scope !0, !noalias !3, !llvm.access.group !4 -; CHECK: %p_add = fadd float %p_conv, %_p_scalar_ -; CHECK: store float %p_add, float* %scevgep, align 4, !alias.scope !0, !noalias !3, !llvm.access.group !4 - -define void @foo(float* %A, i64 %N) #0 { -entry: - br label %for.body - -for.body: ; preds = %entry, %for.inc - %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.inc ] - %cmp1 = icmp slt i64 %i.02, %N - br i1 %cmp1, label %if.then, label %for.inc - -if.then: ; preds = %for.body - %conv = sitofp i64 %i.02 to float - %arrayidx = getelementptr inbounds float, float* %A, i64 %i.02 - %0 = load float, float* %arrayidx, align 4 - %add = fadd float %conv, %0 - store float %add, float* %arrayidx, align 4 - br label %for.inc - -for.inc: ; preds = %for.body, %if.then - %inc = add nuw nsw i64 %i.02, 1 - %exitcond = icmp ne i64 %inc, 16 - br i1 %exitcond, label %for.body, label %for.end - -for.end: ; preds = %for.inc - ret void -} diff --git a/polly/test/CodeGen/if-conditions-in-vector-code.ll b/polly/test/CodeGen/if-conditions-in-vector-code.ll deleted file mode 100644 --- a/polly/test/CodeGen/if-conditions-in-vector-code.ll +++ /dev/null @@ -1,68 +0,0 @@ -; RUN: opt %loadPolly -polly-vectorizer=polly -polly-print-ast -disable-output < %s | FileCheck %s -check-prefix=AST -; RUN: opt %loadPolly -polly-vectorizer=polly -polly-codegen -S < %s | FileCheck %s -; -; void foo(float *A) { -; for (long i = 0; i < 16; i++) { -; if (i % 2) -; A[i] += 2; -; if (i % 3) -; A[i] += 3; -; } -; } -; -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - -; AST: #pragma simd -; AST: #pragma known-parallel -; AST: for (int c0 = 0; c0 <= 15; c0 += 1) { -; AST: if ((c0 + 1) % 2 == 0) -; AST: Stmt_bb4(c0); -; AST: if (c0 % 3 >= 1) -; AST: Stmt_bb11(c0); -; AST: } - -; CHECK: polly.split_new_and_old - -define void @foo(ptr %A) { -bb: - br label %bb1 - -bb1: ; preds = %bb16, %bb - %i.0 = phi i64 [ 0, %bb ], [ %tmp17, %bb16 ] - %exitcond = icmp ne i64 %i.0, 16 - br i1 %exitcond, label %bb2, label %bb18 - -bb2: ; preds = %bb1 - %tmp = srem i64 %i.0, 2 - %tmp3 = icmp eq i64 %tmp, 0 - br i1 %tmp3, label %bb8, label %bb4 - -bb4: ; preds = %bb2 - %tmp5 = getelementptr inbounds float, ptr %A, i64 %i.0 - %tmp6 = load float, ptr %tmp5, align 4 - %tmp7 = fadd float %tmp6, 2.000000e+00 - store float %tmp7, ptr %tmp5, align 4 - br label %bb8 - -bb8: ; preds = %bb2, %bb4 - %tmp9 = srem i64 %i.0, 3 - %tmp10 = icmp eq i64 %tmp9, 0 - br i1 %tmp10, label %bb15, label %bb11 - -bb11: ; preds = %bb8 - %tmp12 = getelementptr inbounds float, ptr %A, i64 %i.0 - %tmp13 = load float, ptr %tmp12, align 4 - %tmp14 = fadd float %tmp13, 3.000000e+00 - store float %tmp14, ptr %tmp12, align 4 - br label %bb15 - -bb15: ; preds = %bb8, %bb11 - br label %bb16 - -bb16: ; preds = %bb15 - %tmp17 = add nsw i64 %i.0, 1 - br label %bb1 - -bb18: ; preds = %bb1 - ret void -} diff --git a/polly/test/CodeGen/invariant_load_hoist_alignment.ll b/polly/test/CodeGen/invariant_load_hoist_alignment.ll deleted file mode 100644 --- a/polly/test/CodeGen/invariant_load_hoist_alignment.ll +++ /dev/null @@ -1,31 +0,0 @@ -; RUN: opt -opaque-pointers=0 %loadPolly -basic-aa -polly-codegen -polly-vectorizer=polly -S \ -; RUN: -polly-invariant-load-hoisting=true < %s | FileCheck %s -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" -target triple = "x86_64-unknown-linux-gnu" - -@A = common global [1024 x i32] zeroinitializer, align 16 -@B = common global [1024 x i32] zeroinitializer, align 16 - -declare i32 @foo(i32) readnone - -define void @force_alignment() nounwind { -;CHECK: @force_alignment -entry: - br label %body - -body: - %indvar = phi i64 [ 0, %entry ], [ %indvar_next, %body ] - %scevgep = getelementptr [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvar -; CHECK: [[T2:%.load]] = load i32, i32* getelementptr inbounds ([1024 x i32], [1024 x i32]* @A, i32 0, i32 0), align 4 -; CHECK: %value_p.splatinsert = insertelement <4 x i32> poison, i32 [[T2]], i64 0 - %value = load i32, i32* getelementptr inbounds ([1024 x i32], [1024 x i32]* @A, i64 0, i64 0), align 4 - %result = tail call i32 @foo(i32 %value) nounwind - store i32 %result, i32* %scevgep, align 4 - %indvar_next = add i64 %indvar, 1 - %exitcond = icmp eq i64 %indvar_next, 4 - br i1 %exitcond, label %return, label %body - -return: - ret void -} - diff --git a/polly/test/CodeGen/partial_write_mapped_vector.ll b/polly/test/CodeGen/partial_write_mapped_vector.ll deleted file mode 100644 --- a/polly/test/CodeGen/partial_write_mapped_vector.ll +++ /dev/null @@ -1,57 +0,0 @@ -; RUN: opt %loadPolly -basic-aa -polly-stmt-granularity=bb -polly-import-jscop -polly-import-jscop-postfix=transformed -polly-vectorizer=polly -polly-opt-isl -polly-ast -polly-codegen -S < %s | FileCheck %s -; -; Polly's vectorizer does not support partial accesses. -; -; for (int j = 0; j < 4; j += 1) { -;body: -; val = 21.0 + 21.0; -; if (j > 1) -;user: -; A[0] = val; -; } - -define void @partial_write_mapped_vector(ptr noalias nonnull %A) { -entry: - br label %for - -for: - %j = phi i32 [0, %entry], [%j.inc, %inc] - %j.cmp = icmp slt i32 %j, 4 - br i1 %j.cmp, label %body, label %exit - - body: - %val = fadd double 21.0, 21.0 - %if.cond = icmp sgt i32 %j, 1 - br i1 %if.cond, label %user, label %inc - - user: - %elt= getelementptr inbounds double, ptr %A, i32 %j - store double %val, ptr %elt - br label %inc - -inc: - %j.inc = add nuw nsw i32 %j, 1 - br label %for - -exit: - br label %return - -return: - ret void -} - - -; CHECK-LABEL: polly.stmt.body: -; CHECK-NEXT: %p_val = fadd double 2.100000e+01, 2.100000e+01 -; CHECK-NEXT: %0 = trunc i64 %polly.indvar to i32 -; CHECK-NEXT: %p_if.cond = icmp sgt i32 %0, 1 -; CHECK-NEXT: %1 = icmp sge i64 %polly.indvar, 2 -; CHECK-NEXT: %polly.Stmt_body_Write0.cond = icmp ne i1 %1, false -; CHECK-NEXT: br i1 %polly.Stmt_body_Write0.cond, label %polly.stmt.body.Stmt_body_Write0.partial, label %polly.stmt.body.cont - -; CHECK-LABEL: polly.stmt.body.Stmt_body_Write0.partial: -; CHECK-NEXT: %polly.access.A = getelementptr double, ptr %A, i64 1 -; CHECK-NEXT: store double %p_val, ptr %polly.access.A -; CHECK-NEXT: br label %polly.stmt.body.cont - -; CHECK-LABEL: polly.stmt.body.cont: diff --git a/polly/test/CodeGen/partial_write_mapped_vector___%for---%return.jscop b/polly/test/CodeGen/partial_write_mapped_vector___%for---%return.jscop deleted file mode 100644 --- a/polly/test/CodeGen/partial_write_mapped_vector___%for---%return.jscop +++ /dev/null @@ -1,39 +0,0 @@ -{ - "arrays" : [ - { - "name" : "MemRef_A", - "sizes" : [ "*" ], - "type" : "double" - } - ], - "context" : "{ : }", - "name" : "%for---%return", - "statements" : [ - { - "accesses" : [ - { - "kind" : "write", - "relation" : "{ Stmt_body[i0] -> MemRef_val[] }" - } - ], - "domain" : "{ Stmt_body[i0] : 0 <= i0 <= 3 }", - "name" : "Stmt_body", - "schedule" : "{ Stmt_body[i0] -> [i0, 0] }" - }, - { - "accesses" : [ - { - "kind" : "write", - "relation" : "{ Stmt_user[i0] -> MemRef_A[i0] }" - }, - { - "kind" : "read", - "relation" : "{ Stmt_user[i0] -> MemRef_val[] }" - } - ], - "domain" : "{ Stmt_user[i0] : 2 <= i0 <= 3 }", - "name" : "Stmt_user", - "schedule" : "{ Stmt_user[i0] -> [i0, 1] }" - } - ] -} diff --git a/polly/test/CodeGen/partial_write_mapped_vector___%for---%return.jscop.transformed b/polly/test/CodeGen/partial_write_mapped_vector___%for---%return.jscop.transformed deleted file mode 100644 --- a/polly/test/CodeGen/partial_write_mapped_vector___%for---%return.jscop.transformed +++ /dev/null @@ -1,39 +0,0 @@ -{ - "arrays" : [ - { - "name" : "MemRef_A", - "sizes" : [ "*" ], - "type" : "double" - } - ], - "context" : "{ : }", - "name" : "%for---%return", - "statements" : [ - { - "accesses" : [ - { - "kind" : "write", - "relation" : "{ Stmt_body[j] -> MemRef_A[1] : j > 1 }" - } - ], - "domain" : "{ Stmt_body[i0] : 0 <= i0 <= 3 }", - "name" : "Stmt_body", - "schedule" : "{ Stmt_body[i0] -> [i0, 0] }" - }, - { - "accesses" : [ - { - "kind" : "write", - "relation" : "{ Stmt_user[i0] -> MemRef_A[i0] }" - }, - { - "kind" : "read", - "relation" : "{ Stmt_user[j] -> MemRef_A[1] }" - } - ], - "domain" : "{ Stmt_user[i0] : 2 <= i0 <= 3 }", - "name" : "Stmt_user", - "schedule" : "{ Stmt_user[i0] -> [i0, 1] }" - } - ] -} diff --git a/polly/test/CodeGen/simple_vec_assign_scalar.ll b/polly/test/CodeGen/simple_vec_assign_scalar.ll deleted file mode 100644 --- a/polly/test/CodeGen/simple_vec_assign_scalar.ll +++ /dev/null @@ -1,64 +0,0 @@ -; RUN: opt %loadPolly -basic-aa -polly-codegen \ -; RUN: -polly-vectorizer=polly -dce -S < %s | FileCheck %s - -; RUN: opt %loadPolly -basic-aa -polly-codegen -polly-vectorizer=stripmine -dce -S < %s | FileCheck %s --check-prefix=STRIPMINE - -;#define N 1024 -;float A[N]; -;float B[N]; -; -;void simple_vec_const(void) { -; int i; -; -; for (i = 0; i < 4; i++) -; B[i] = A[i] + 1; -;} -;int main() -;{ -; simple_vec_const(); -; return A[42]; -;} - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" - -@A = common global [1024 x float] zeroinitializer, align 16 -@B = common global [1024 x float] zeroinitializer, align 16 - -define void @simple_vec_const() nounwind { -bb: - br label %bb2 - -bb2: ; preds = %bb5, %bb - %indvar = phi i64 [ %indvar.next, %bb5 ], [ 0, %bb ] - %scevgep = getelementptr [1024 x float], ptr @B, i64 0, i64 %indvar - %scevgep1 = getelementptr [1024 x float], ptr @A, i64 0, i64 %indvar - %exitcond = icmp ne i64 %indvar, 4 - br i1 %exitcond, label %bb3, label %bb6 - -bb3: ; preds = %bb2 - %tmp = load float, ptr %scevgep1, align 4 - %tmp4 = fadd float %tmp, 1.000000e+00 - store float %tmp4, ptr %scevgep, align 4 - br label %bb5 - -bb5: ; preds = %bb3 - %indvar.next = add i64 %indvar, 1 - br label %bb2 - -bb6: ; preds = %bb2 - ret void -} - -define i32 @main() nounwind { -bb: - call void @simple_vec_const() - %tmp = load float, ptr getelementptr inbounds ([1024 x float], ptr @A, i64 0, i64 42), align 8 - %tmp1 = fptosi float %tmp to i32 - ret i32 %tmp1 -} - -; STRIPMINE-NOT: <4 x float> - -; CHECK: %tmp_p_vec_full = load <4 x float>, ptr @A, align 8, !alias.scope !0, !noalias !3 -; CHECK: %tmp4p_vec = fadd <4 x float> %tmp_p_vec_full, -; CHECK: store <4 x float> %tmp4p_vec, ptr @B diff --git a/polly/test/CodeGen/simple_vec_assign_scalar_2.ll b/polly/test/CodeGen/simple_vec_assign_scalar_2.ll deleted file mode 100644 --- a/polly/test/CodeGen/simple_vec_assign_scalar_2.ll +++ /dev/null @@ -1,65 +0,0 @@ -; RUN: opt %loadPolly -basic-aa -polly-codegen -polly-vectorizer=polly -dce -S < %s | FileCheck %s - -;#define N 1024 -;float A[N]; -;float B[N]; -; -;void simple_vec_const(void) { -; int i; -; -; for (i = 0; i < 4; i++) -; B[i] = A[i] + i; -;} -;int main() -;{ -; simple_vec_const(); -; return A[42]; -;} - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" - -@A = common global [1024 x float] zeroinitializer, align 16 -@B = common global [1024 x float] zeroinitializer, align 16 - -define void @simple_vec_const() nounwind { -bb: - br label %bb2 - -bb2: ; preds = %bb6, %bb - %indvar = phi i64 [ %indvar.next, %bb6 ], [ 0, %bb ] - %scevgep = getelementptr [1024 x float], ptr @B, i64 0, i64 %indvar - %i.0 = trunc i64 %indvar to i32 - %scevgep1 = getelementptr [1024 x float], ptr @A, i64 0, i64 %indvar - %exitcond = icmp ne i64 %indvar, 4 - br i1 %exitcond, label %bb3, label %bb7 - -bb3: ; preds = %bb2 - %tmp = load float, ptr %scevgep1, align 4 - %tmp4 = sitofp i32 %i.0 to float - %tmp5 = fadd float %tmp, %tmp4 - store float %tmp5, ptr %scevgep, align 4 - br label %bb6 - -bb6: ; preds = %bb3 - %indvar.next = add i64 %indvar, 1 - br label %bb2 - -bb7: ; preds = %bb2 - ret void -} - -define i32 @main() nounwind { -bb: - call void @simple_vec_const() - %tmp = load float, ptr getelementptr inbounds ([1024 x float], ptr @A, i64 0, i64 42), align 8 - %tmp1 = fptosi float %tmp to i32 - ret i32 %tmp1 -} - - -; CHECK: insertelement <4 x float> undef, float %{{[^,]+}}, i32 0 -; CHECK: insertelement <4 x float> %0, float %{{[^,]+}}, i32 1 -; CHECK: insertelement <4 x float> %1, float %{{[^,]+}}, i32 2 -; CHECK: insertelement <4 x float> %2, float %{{[^,]+}}, i32 3 -; CHECK: fadd <4 x float> %tmp_p_vec_full, %3 - diff --git a/polly/test/CodeGen/simple_vec_call.ll b/polly/test/CodeGen/simple_vec_call.ll deleted file mode 100644 --- a/polly/test/CodeGen/simple_vec_call.ll +++ /dev/null @@ -1,37 +0,0 @@ -; RUN: opt -opaque-pointers=0 %loadPolly -basic-aa -polly-codegen -polly-vectorizer=polly -S \ -; RUN: -polly-invariant-load-hoisting=true < %s | FileCheck %s -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" - -@A = common global [1024 x float] zeroinitializer, align 16 -@B = common global [1024 x float] zeroinitializer, align 16 - -declare float @foo(float) readnone - -define void @simple_vec_call() nounwind { -entry: - br label %body - -body: - %indvar = phi i64 [ 0, %entry ], [ %indvar_next, %body ] - %scevgep = getelementptr [1024 x float], [1024 x float]* @B, i64 0, i64 %indvar - %value = load float, float* getelementptr inbounds ([1024 x float], [1024 x float]* @A, i64 0, i64 0), align 16 - %result = tail call float @foo(float %value) nounwind - store float %result, float* %scevgep, align 4 - %indvar_next = add i64 %indvar, 1 - %exitcond = icmp eq i64 %indvar_next, 4 - br i1 %exitcond, label %return, label %body - -return: - ret void -} - -; CHECK: [[RES1:%[a-zA-Z0-9_]+]] = tail call float @foo(float %.load) [[NUW:#[0-9]+]] -; CHECK: [[RES2:%[a-zA-Z0-9_]+]] = tail call float @foo(float %.load) [[NUW]] -; CHECK: [[RES3:%[a-zA-Z0-9_]+]] = tail call float @foo(float %.load) [[NUW]] -; CHECK: [[RES4:%[a-zA-Z0-9_]+]] = tail call float @foo(float %.load) [[NUW]] -; CHECK: [[RES5:%[a-zA-Z0-9_]+]] = insertelement <4 x float> undef, float [[RES1]], i32 0 -; CHECK: [[RES6:%[a-zA-Z0-9_]+]] = insertelement <4 x float> [[RES5]], float [[RES2]], i32 1 -; CHECK: [[RES7:%[a-zA-Z0-9_]+]] = insertelement <4 x float> [[RES6]], float [[RES3]], i32 2 -; CHECK: [[RES8:%[a-zA-Z0-9_]+]] = insertelement <4 x float> [[RES7]], float [[RES4]], i32 3 -; CHECK: store <4 x float> [[RES8]] -; CHECK: attributes [[NUW]] = { nounwind } diff --git a/polly/test/CodeGen/simple_vec_call_2.ll b/polly/test/CodeGen/simple_vec_call_2.ll deleted file mode 100644 --- a/polly/test/CodeGen/simple_vec_call_2.ll +++ /dev/null @@ -1,37 +0,0 @@ -; RUN: opt -opaque-pointers=0 %loadPolly -basic-aa -polly-codegen -polly-vectorizer=polly -dce \ -; RUN: -polly-invariant-load-hoisting=true -S < %s | FileCheck %s -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" - -@A = common global [1024 x float] zeroinitializer, align 16 -@B = common global [1024 x float**] zeroinitializer, align 16 - -declare float** @foo(float) readnone - -define void @simple_vec_call() nounwind { -entry: - br label %body - -body: - %indvar = phi i64 [ 0, %entry ], [ %indvar_next, %body ] - %scevgep = getelementptr [1024 x float**], [1024 x float**]* @B, i64 0, i64 %indvar - %value = load float, float* getelementptr inbounds ([1024 x float], [1024 x float]* @A, i64 0, i64 0), align 16 - %result = tail call float** @foo(float %value) nounwind - store float** %result, float*** %scevgep, align 4 - %indvar_next = add i64 %indvar, 1 - %exitcond = icmp eq i64 %indvar_next, 4 - br i1 %exitcond, label %return, label %body - -return: - ret void -} - -; CHECK: [[RES1:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %.load) [[NUW:#[0-9]+]] -; CHECK: [[RES2:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %.load) [[NUW]] -; CHECK: [[RES3:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %.load) [[NUW]] -; CHECK: [[RES4:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %.load) [[NUW]] -; CHECK: %0 = insertelement <4 x float**> undef, float** %p_result, i32 0 -; CHECK: %1 = insertelement <4 x float**> %0, float** %p_result1, i32 1 -; CHECK: %2 = insertelement <4 x float**> %1, float** %p_result2, i32 2 -; CHECK: %3 = insertelement <4 x float**> %2, float** %p_result3, i32 3 -; CHECK: store <4 x float**> %3, <4 x float**>* bitcast ([1024 x float**]* @B to <4 x float**>*), align -; CHECK: attributes [[NUW]] = { nounwind } diff --git a/polly/test/CodeGen/simple_vec_cast.ll b/polly/test/CodeGen/simple_vec_cast.ll deleted file mode 100644 --- a/polly/test/CodeGen/simple_vec_cast.ll +++ /dev/null @@ -1,38 +0,0 @@ -; RUN: opt -opaque-pointers=0 %loadPolly -basic-aa -polly-codegen -polly-vectorizer=polly \ -; RUN: -polly-invariant-load-hoisting=true -dce -S < %s | FileCheck %s -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" - -@A = common global [1024 x float] zeroinitializer, align 16 -@B = common global [1024 x double] zeroinitializer, align 16 - -define void @simple_vec_const() nounwind { -bb: - br label %bb1 - -bb1: ; preds = %bb3, %bb - %indvar = phi i64 [ %indvar.next, %bb3 ], [ 0, %bb ] - %scevgep = getelementptr [1024 x double], [1024 x double]* @B, i64 0, i64 %indvar - %exitcond = icmp ne i64 %indvar, 4 - br i1 %exitcond, label %bb2, label %bb4 - -bb2: ; preds = %bb1 - %tmp = load float, float* getelementptr inbounds ([1024 x float], [1024 x float]* @A, i64 0, i64 0), align 16 - %tmp2 = fpext float %tmp to double - store double %tmp2, double* %scevgep, align 4 - br label %bb3 - -bb3: ; preds = %bb2 - %indvar.next = add i64 %indvar, 1 - br label %bb1 - -bb4: ; preds = %bb1 - ret void -} - -; CHECK: %.load = load float, float* getelementptr inbounds ([1024 x float], [1024 x float]* @A, i32 0, i32 0) - -; CHECK: polly.stmt.bb2: ; preds = %polly.start -; CHECK: %tmp_p.splatinsert = insertelement <4 x float> poison, float %.load, i64 0 -; CHECK: %tmp_p.splat = shufflevector <4 x float> %tmp_p.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer -; CHECK: %0 = fpext <4 x float> %tmp_p.splat to <4 x double> -; CHECK: store <4 x double> %0, <4 x double>* diff --git a/polly/test/CodeGen/simple_vec_const.ll b/polly/test/CodeGen/simple_vec_const.ll deleted file mode 100644 --- a/polly/test/CodeGen/simple_vec_const.ll +++ /dev/null @@ -1,60 +0,0 @@ -; RUN: opt -opaque-pointers=0 %loadPolly -basic-aa -polly-codegen -polly-vectorizer=polly -S \ -; RUN: -polly-invariant-load-hoisting=true < %s | FileCheck %s - -;#define N 1024 -;float A[N]; -;float B[N]; -; -;void simple_vec_const(void) { -; int i; -; -; for (i = 0; i < 4; i++) -; B[i] = A[0]; -;} -;int main() -;{ -; simple_vec_const(); -; return A[42]; -;} - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" - -@A = common global [1024 x float] zeroinitializer, align 16 -@B = common global [1024 x float] zeroinitializer, align 16 - -define void @simple_vec_const() nounwind { -;