Index: include/polly/CodeGen/BlockGenerators.h =================================================================== --- include/polly/CodeGen/BlockGenerators.h +++ include/polly/CodeGen/BlockGenerators.h @@ -209,13 +209,17 @@ /// The pass is needed to update other analysis. /// @param LI The loop info for the current function /// @param SE The scalar evolution info for the current function + /// @param Build The AST build with the new schedule. + /// @param ExprBuilder An expression builder to generate new access functions. static void generate(PollyIRBuilder &B, ScopStmt &Stmt, VectorValueMapT &GlobalMaps, std::vector &VLTS, __isl_keep isl_map *Schedule, Pass *P, LoopInfo &LI, - ScalarEvolution &SE) { + ScalarEvolution &SE, + __isl_keep isl_ast_build *Build = nullptr, + IslExprBuilder *ExprBuilder = nullptr) { VectorBlockGenerator Generator(B, GlobalMaps, VLTS, Stmt, Schedule, P, LI, - SE); + SE, Build, ExprBuilder); Generator.copyBB(); } @@ -252,7 +256,9 @@ VectorBlockGenerator(PollyIRBuilder &B, VectorValueMapT &GlobalMaps, std::vector &VLTS, ScopStmt &Stmt, __isl_keep isl_map *Schedule, Pass *P, LoopInfo &LI, - ScalarEvolution &SE); + ScalarEvolution &SE, + __isl_keep isl_ast_build *Build = nullptr, + IslExprBuilder *ExprBuilder = nullptr); int getVectorWidth(); Index: lib/CodeGen/BlockGenerators.cpp =================================================================== --- lib/CodeGen/BlockGenerators.cpp +++ lib/CodeGen/BlockGenerators.cpp @@ -289,8 +289,9 @@ VectorBlockGenerator::VectorBlockGenerator( PollyIRBuilder &B, VectorValueMapT &GlobalMaps, std::vector &VLTS, ScopStmt &Stmt, - __isl_keep isl_map *Schedule, Pass *P, LoopInfo &LI, ScalarEvolution &SE) - : BlockGenerator(B, Stmt, P, LI, SE, nullptr, nullptr), + __isl_keep isl_map *Schedule, Pass *P, LoopInfo &LI, ScalarEvolution &SE, + __isl_keep isl_ast_build *Build, IslExprBuilder *ExprBuilder) + : BlockGenerator(B, Stmt, P, LI, SE, Build, ExprBuilder), GlobalMaps(GlobalMaps), VLTS(VLTS), Schedule(Schedule) { assert(GlobalMaps.size() > 1 && "Only one vector lane found"); assert(Schedule && "No statement domain provided"); @@ -338,8 +339,8 @@ unsigned Offset = NegativeStride ? VectorWidth - 1 : 0; Value *NewPointer = nullptr; - NewPointer = getNewValue(Pointer, ScalarMaps[Offset], GlobalMaps[Offset], - VLTS[Offset], getLoopForInst(Load)); + NewPointer = generateLocationAccessed(Load, Pointer, ScalarMaps[Offset], + GlobalMaps[Offset], VLTS[Offset]); Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr"); LoadInst *VecLoad = @@ -365,7 +366,7 @@ const Value *Pointer = Load->getPointerOperand(); Type *VectorPtrType = getVectorPtrTy(Pointer, 1); Value *NewPointer = - getNewValue(Pointer, BBMap, GlobalMaps[0], VLTS[0], getLoopForInst(Load)); + generateLocationAccessed(Load, Pointer, BBMap, GlobalMaps[0], VLTS[0]); Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType, Load->getName() + "_p_vec_p"); LoadInst *ScalarLoad = @@ -393,8 +394,8 @@ Value *Vector = UndefValue::get(VectorType); for (int i = 0; i < VectorWidth; i++) { - Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i], - VLTS[i], getLoopForInst(Load)); + Value *NewPointer = generateLocationAccessed(Load, Pointer, ScalarMaps[i], + GlobalMaps[i], VLTS[i]); Value *ScalarLoad = Builder.CreateLoad(NewPointer, Load->getName() + "_p_scalar_"); Vector = Builder.CreateInsertElement( @@ -481,8 +482,8 @@ if (Access.isStrideOne(isl_map_copy(Schedule))) { Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth); - Value *NewPointer = getNewValue(Pointer, ScalarMaps[0], GlobalMaps[0], - VLTS[0], getLoopForInst(Store)); + Value *NewPointer = generateLocationAccessed(Store, Pointer, ScalarMaps[0], + GlobalMaps[0], VLTS[0]); Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr"); @@ -493,8 +494,8 @@ } else { for (unsigned i = 0; i < ScalarMaps.size(); i++) { Value *Scalar = Builder.CreateExtractElement(Vector, Builder.getInt32(i)); - Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i], - VLTS[i], getLoopForInst(Store)); + Value *NewPointer = generateLocationAccessed( + Store, Pointer, ScalarMaps[i], GlobalMaps[i], VLTS[i]); Builder.CreateStore(Scalar, NewPointer); } } @@ -549,8 +550,8 @@ HasVectorOperand = extractScalarValues(Inst, VectorMap, ScalarMaps); for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++) - copyInstScalar(Inst, ScalarMaps[VectorLane], GlobalMaps[VectorLane], - VLTS[VectorLane]); + BlockGenerator::copyInstruction(Inst, ScalarMaps[VectorLane], + GlobalMaps[VectorLane], VLTS[VectorLane]); if (!VectorType::isValidElementType(Inst->getType()) || !HasVectorOperand) return; Index: lib/CodeGen/IslCodeGeneration.cpp =================================================================== --- lib/CodeGen/IslCodeGeneration.cpp +++ lib/CodeGen/IslCodeGeneration.cpp @@ -248,7 +248,8 @@ isl_map *S = isl_map_from_union_map(Schedule); createSubstitutionsVector(Expr, Stmt, VectorMap, VLTS, IVS, IteratorID); - VectorBlockGenerator::generate(Builder, *Stmt, VectorMap, VLTS, S, P, LI, SE); + VectorBlockGenerator::generate(Builder, *Stmt, VectorMap, VLTS, S, P, LI, SE, + IslAstInfo::getBuild(User), &ExprBuilder); isl_map_free(S); isl_id_free(Id); Index: test/Isl/CodeGen/MemAccess/simple_analyze.ll =================================================================== --- test/Isl/CodeGen/MemAccess/simple_analyze.ll +++ test/Isl/CodeGen/MemAccess/simple_analyze.ll @@ -1,6 +1,6 @@ ;RUN: opt %loadPolly -polly-import-jscop -analyze -polly-import-jscop-dir=%S -polly-import-jscop-postfix=transformed < %s | FileCheck %s +;RUN: opt %loadPolly -polly-import-jscop -polly-import-jscop-dir=%S -polly-import-jscop-postfix=transformed -polly-codegen-isl -polly-vectorizer=polly -S < %s | FileCheck %s --check-prefix=JSCOPVEC target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32" -target triple = "i386-pc-linux-gnu" @A = common global [100 x i32] zeroinitializer, align 4 @B = common global [100 x i32] zeroinitializer, align 4 @@ -45,3 +45,31 @@ } ; CHECK-DAG: New access function '{ Stmt_for_body7[i0] -> MemRef_B[0] }'detected in JSCOP file ; CHECK-DAG: New access function '{ Stmt_for_body[i0] -> MemRef_A[0] }'detected in JSCOP file + +; Verify that the new access function (see above) is actually used during vector code generation. + +; JSCOPVEC: store i32 0, i32* getelementptr inbounds ([100 x i32]* @B, i32 0, i32 0) +; JSCOPVEC: store i32 1, i32* getelementptr inbounds ([100 x i32]* @B, i32 0, i32 0) +; JSCOPVEC: store i32 2, i32* getelementptr inbounds ([100 x i32]* @B, i32 0, i32 0) +; JSCOPVEC: store i32 3, i32* getelementptr inbounds ([100 x i32]* @B, i32 0, i32 0) +; JSCOPVEC: store i32 4, i32* getelementptr inbounds ([100 x i32]* @B, i32 0, i32 0) +; JSCOPVEC: store i32 5, i32* getelementptr inbounds ([100 x i32]* @B, i32 0, i32 0) +; JSCOPVEC: store i32 6, i32* getelementptr inbounds ([100 x i32]* @B, i32 0, i32 0) +; JSCOPVEC: store i32 7, i32* getelementptr inbounds ([100 x i32]* @B, i32 0, i32 0) +; JSCOPVEC: store i32 8, i32* getelementptr inbounds ([100 x i32]* @B, i32 0, i32 0) +; JSCOPVEC: store i32 9, i32* getelementptr inbounds ([100 x i32]* @B, i32 0, i32 0) +; JSCOPVEC: store i32 10, i32* getelementptr inbounds ([100 x i32]* @B, i32 0, i32 0) +; JSCOPVEC: store i32 11, i32* getelementptr inbounds ([100 x i32]* @B, i32 0, i32 0) + +; JSCOPVEC: store i32 0, i32* getelementptr inbounds ([100 x i32]* @A, i32 0, i32 0) +; JSCOPVEC: store i32 1, i32* getelementptr inbounds ([100 x i32]* @A, i32 0, i32 0) +; JSCOPVEC: store i32 2, i32* getelementptr inbounds ([100 x i32]* @A, i32 0, i32 0) +; JSCOPVEC: store i32 3, i32* getelementptr inbounds ([100 x i32]* @A, i32 0, i32 0) +; JSCOPVEC: store i32 4, i32* getelementptr inbounds ([100 x i32]* @A, i32 0, i32 0) +; JSCOPVEC: store i32 5, i32* getelementptr inbounds ([100 x i32]* @A, i32 0, i32 0) +; JSCOPVEC: store i32 6, i32* getelementptr inbounds ([100 x i32]* @A, i32 0, i32 0) +; JSCOPVEC: store i32 7, i32* getelementptr inbounds ([100 x i32]* @A, i32 0, i32 0) +; JSCOPVEC: store i32 8, i32* getelementptr inbounds ([100 x i32]* @A, i32 0, i32 0) +; JSCOPVEC: store i32 9, i32* getelementptr inbounds ([100 x i32]* @A, i32 0, i32 0) +; JSCOPVEC: store i32 10, i32* getelementptr inbounds ([100 x i32]* @A, i32 0, i32 0) +; JSCOPVEC: store i32 11, i32* getelementptr inbounds ([100 x i32]* @A, i32 0, i32 0)