Index: include/polly/CodeGen/IslExprBuilder.h =================================================================== --- include/polly/CodeGen/IslExprBuilder.h +++ include/polly/CodeGen/IslExprBuilder.h @@ -17,7 +17,8 @@ #include "isl/ast.h" namespace llvm { -class SCEVExpander; +class DataLayout; +class ScalarEvolution; } namespace polly { @@ -91,12 +92,10 @@ /// variables (identified by an isl_id). The IDTOValue map /// specifies the LLVM-IR Values that correspond to these /// parameters and variables. - /// @param Expander A SCEVExpander to create the indices for multi - /// dimensional accesses. - IslExprBuilder(PollyIRBuilder &Builder, IDToValueTy &IDToValue, - llvm::SCEVExpander &Expander, llvm::DominatorTree &DT, - llvm::LoopInfo &LI) - : Builder(Builder), IDToValue(IDToValue), Expander(Expander), DT(DT), + IslExprBuilder(Scop &S, PollyIRBuilder &Builder, IDToValueTy &IDToValue, + const llvm::DataLayout &DL, llvm::ScalarEvolution &SE, + llvm::DominatorTree &DT, llvm::LoopInfo &LI) + : S(S), Builder(Builder), IDToValue(IDToValue), DL(DL), SE(SE), DT(DT), LI(LI) {} /// @brief Create LLVM-IR for an isl_ast_expr[ession]. @@ -124,12 +123,13 @@ llvm::IntegerType *getType(__isl_keep isl_ast_expr *Expr); private: + Scop &S; + PollyIRBuilder &Builder; IDToValueTy &IDToValue; - /// @brief A SCEVExpander to translate dimension sizes to llvm values. - llvm::SCEVExpander &Expander; - + const llvm::DataLayout &DL; + llvm::ScalarEvolution &SE; llvm::DominatorTree &DT; llvm::LoopInfo &LI; Index: include/polly/CodeGen/IslNodeBuilder.h =================================================================== --- include/polly/CodeGen/IslNodeBuilder.h +++ include/polly/CodeGen/IslNodeBuilder.h @@ -16,7 +16,6 @@ #include "polly/CodeGen/BlockGenerators.h" #include "polly/CodeGen/IslExprBuilder.h" #include "polly/CodeGen/LoopGenerators.h" -#include "llvm/Analysis/ScalarEvolutionExpander.h" #include "isl/ctx.h" #include "isl/union_map.h" @@ -30,8 +29,8 @@ IslNodeBuilder(PollyIRBuilder &Builder, ScopAnnotator &Annotator, Pass *P, const DataLayout &DL, LoopInfo &LI, ScalarEvolution &SE, DominatorTree &DT, Scop &S) - : S(S), Builder(Builder), Annotator(Annotator), Rewriter(SE, DL, "polly"), - ExprBuilder(Builder, IDToValue, Rewriter, DT, LI), + : S(S), Builder(Builder), Annotator(Annotator), + ExprBuilder(S, Builder, IDToValue, DL, SE, DT, LI), BlockGen(Builder, LI, SE, DT, ScalarMap, PHIOpMap, EscapeMap, &ExprBuilder), RegionGen(BlockGen), P(P), DL(DL), LI(LI), SE(SE), DT(DT) {} @@ -53,9 +52,6 @@ PollyIRBuilder &Builder; ScopAnnotator &Annotator; - /// @brief A SCEVExpander to create llvm values from SCEVs. - SCEVExpander Rewriter; - IslExprBuilder ExprBuilder; /// @brief Maps used by the block and region generator to demote scalars. Index: include/polly/Support/ScopHelper.h =================================================================== --- include/polly/Support/ScopHelper.h +++ include/polly/Support/ScopHelper.h @@ -27,6 +27,7 @@ class Pass; class BasicBlock; class StringRef; +class DataLayout; class DominatorTree; class RegionInfo; class ScalarEvolution; @@ -77,5 +78,26 @@ /// @param P The pass that currently running. /// void splitEntryBlockForAlloca(llvm::BasicBlock *EntryBlock, llvm::Pass *P); + +/// @brief Wrapper for SCEVExpander extended to all Polly features. +/// +/// This wrapper will internally call the SCEVExpander but also make sure that +/// all additional features not represented in SCEV (e.g., SDiv/SRem are not +/// black boxes but can be part of the function) will be expanded correctly. +/// +/// The parameters are the same as for the creation of a SCEVExpander as well +/// as the call to SCEVExpander::expandCodeFor: +/// +/// @param S The current Scop. +/// @param SE The Scalar Evolution pass. +/// @param DL The module data layout. +/// @param Name The suffix added to the new instruction names. +/// @param E The expression for which code is actually generated. +/// @param Ty The type of the resulting code. +/// @param IP The insertion point for the new code. +llvm::Value *expandCodeFor(Scop &S, llvm::ScalarEvolution &SE, + const llvm::DataLayout &DL, const char *Name, + const llvm::SCEV *E, llvm::Type *Ty, + llvm::Instruction *IP); } #endif Index: lib/CodeGen/BlockGenerators.cpp =================================================================== --- lib/CodeGen/BlockGenerators.cpp +++ lib/CodeGen/BlockGenerators.cpp @@ -24,7 +24,6 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/RegionInfo.h" #include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -116,17 +115,16 @@ VTV.insert(BBMap.begin(), BBMap.end()); VTV.insert(GlobalMap.begin(), GlobalMap.end()); NewScev = SCEVParameterRewriter::rewrite(NewScev, SE, VTV); - SCEVExpander Expander(SE, Stmt.getParent() - ->getRegion() - .getEntry() - ->getParent() - ->getParent() - ->getDataLayout(), - "polly"); - assert(Builder.GetInsertPoint() != Builder.GetInsertBlock()->end() && + + Scop &S = *Stmt.getParent(); + const DataLayout &DL = + S.getRegion().getEntry()->getParent()->getParent()->getDataLayout(); + auto IP = Builder.GetInsertPoint(); + + assert(IP != Builder.GetInsertBlock()->end() && "Only instructions can be insert points for SCEVExpander"); - Value *Expanded = Expander.expandCodeFor(NewScev, Old->getType(), - Builder.GetInsertPoint()); + Value *Expanded = + expandCodeFor(S, SE, DL, "polly", NewScev, Old->getType(), IP); BBMap[Old] = Expanded; return Expanded; @@ -376,6 +374,7 @@ void BlockGenerator::handleOutsideUsers(const Region &R, Instruction *Inst, Value *InstCopy) { + EscapeUserVectorTy EscapeUsers; for (User *U : Inst->users()) { Index: lib/CodeGen/IslExprBuilder.cpp =================================================================== --- lib/CodeGen/IslExprBuilder.cpp +++ lib/CodeGen/IslExprBuilder.cpp @@ -12,7 +12,7 @@ #include "polly/CodeGen/IslExprBuilder.h" #include "polly/ScopInfo.h" #include "polly/Support/GICHelper.h" -#include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "polly/Support/ScopHelper.h" #include "llvm/Support/Debug.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -148,8 +148,9 @@ break; const SCEV *DimSCEV = SAI->getDimensionSize(u - 1); - Value *DimSize = Expander.expandCodeFor(DimSCEV, DimSCEV->getType(), - Builder.GetInsertPoint()); + Value *DimSize = + expandCodeFor(S, SE, DL, "polly", DimSCEV, DimSCEV->getType(), + Builder.GetInsertPoint()); Type *Ty = getWidestType(DimSize->getType(), IndexOp->getType()); Index: lib/CodeGen/IslNodeBuilder.cpp =================================================================== --- lib/CodeGen/IslNodeBuilder.cpp +++ lib/CodeGen/IslNodeBuilder.cpp @@ -31,7 +31,6 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/PostDominators.h" -#include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Module.h" #include "llvm/IR/Verifier.h" @@ -742,5 +741,6 @@ Value *IslNodeBuilder::generateSCEV(const SCEV *Expr) { Instruction *InsertLocation = --(Builder.GetInsertBlock()->end()); - return Rewriter.expandCodeFor(Expr, Expr->getType(), InsertLocation); + return expandCodeFor(S, SE, DL, "polly", Expr, Expr->getType(), + InsertLocation); } Index: lib/Support/ScopHelper.cpp =================================================================== --- lib/Support/ScopHelper.cpp +++ lib/Support/ScopHelper.cpp @@ -17,12 +17,14 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/RegionInfo.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/IR/CFG.h" #include "llvm/Support/Debug.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" using namespace llvm; +using namespace polly; #define DEBUG_TYPE "polly-scop-helper" @@ -252,3 +254,110 @@ // splitBlock updates DT, LI and RI. splitBlock(EntryBlock, I, DT, LI, RI); } + +/// The SCEVExpander will __not__ generate any code for an existing SDiv/SRem +/// instruction but just use it, if it is references as a SCEVUnknown. We want +/// however to generate new code if the instruction is in the analyzed region +/// and we generate code outside/infront of that region. Hence, we generate the +/// code for the SDiv/SRem operands in front of the analyzed region and then +/// create a new SDiv/SRem operation there too. +struct ScopExpander : SCEVVisitor { + friend struct SCEVVisitor; + + explicit ScopExpander(const Region &R, ScalarEvolution &SE, + const DataLayout &DL, const char *Name) + : Expander(SCEVExpander(SE, DL, Name)), SE(SE), Name(Name), R(R) {} + + Value *expandCodeFor(const SCEV *E, Type *Ty, Instruction *I) { + // If we generate code in the region we will immediately fall back to the + // SCEVExpander, otherwise we will stop at all unknowns in the SCEV and if + // needed replace them by copies computed in the entering block. + if (!R.contains(I)) + E = visit(E); + return Expander.expandCodeFor(E, Ty, I); + } + +private: + SCEVExpander Expander; + ScalarEvolution &SE; + const char *Name; + const Region &R; + + const SCEV *visitUnknown(const SCEVUnknown *E) { + Instruction *Inst = dyn_cast(E->getValue()); + if (!Inst || (Inst->getOpcode() != Instruction::SRem && + Inst->getOpcode() != Instruction::SDiv)) + return E; + + if (!R.contains(Inst)) + return E; + + Instruction *StartIP = R.getEnteringBlock()->getTerminator(); + + const SCEV *LHSScev = visit(SE.getSCEV(Inst->getOperand(0))); + const SCEV *RHSScev = visit(SE.getSCEV(Inst->getOperand(1))); + + Value *LHS = Expander.expandCodeFor(LHSScev, E->getType(), StartIP); + Value *RHS = Expander.expandCodeFor(RHSScev, E->getType(), StartIP); + + Inst = BinaryOperator::Create((Instruction::BinaryOps)Inst->getOpcode(), + LHS, RHS, Inst->getName() + Name, StartIP); + return SE.getSCEV(Inst); + } + + /// The following functions will just traverse the SCEV and rebuild it with + /// the new operands returned by the traversal. + /// + ///{ + const SCEV *visitConstant(const SCEVConstant *E) { return E; } + const SCEV *visitTruncateExpr(const SCEVTruncateExpr *E) { + return SE.getTruncateExpr(visit(E->getOperand()), E->getType()); + } + const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *E) { + return SE.getZeroExtendExpr(visit(E->getOperand()), E->getType()); + } + const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *E) { + return SE.getSignExtendExpr(visit(E->getOperand()), E->getType()); + } + const SCEV *visitUDivExpr(const SCEVUDivExpr *E) { + return SE.getUDivExpr(visit(E->getLHS()), visit(E->getRHS())); + } + const SCEV *visitAddExpr(const SCEVAddExpr *E) { + SmallVector NewOps; + for (const SCEV *Op : E->operands()) + NewOps.push_back(visit(Op)); + return SE.getAddExpr(NewOps); + } + const SCEV *visitMulExpr(const SCEVMulExpr *E) { + SmallVector NewOps; + for (const SCEV *Op : E->operands()) + NewOps.push_back(visit(Op)); + return SE.getMulExpr(NewOps); + } + const SCEV *visitUMaxExpr(const SCEVUMaxExpr *E) { + SmallVector NewOps; + for (const SCEV *Op : E->operands()) + NewOps.push_back(visit(Op)); + return SE.getUMaxExpr(NewOps); + } + const SCEV *visitSMaxExpr(const SCEVSMaxExpr *E) { + SmallVector NewOps; + for (const SCEV *Op : E->operands()) + NewOps.push_back(visit(Op)); + return SE.getSMaxExpr(NewOps); + } + const SCEV *visitAddRecExpr(const SCEVAddRecExpr *E) { + SmallVector NewOps; + for (const SCEV *Op : E->operands()) + NewOps.push_back(visit(Op)); + return SE.getAddRecExpr(NewOps, E->getLoop(), E->getNoWrapFlags()); + } + ///} +}; + +Value *polly::expandCodeFor(Scop &S, ScalarEvolution &SE, const DataLayout &DL, + const char *Name, const SCEV *E, Type *Ty, + Instruction *IP) { + ScopExpander Expander(S.getRegion(), SE, DL, Name); + return Expander.expandCodeFor(E, Ty, IP); +} Index: test/Isl/CodeGen/inner_scev_sdiv_1.ll =================================================================== --- test/Isl/CodeGen/inner_scev_sdiv_1.ll +++ test/Isl/CodeGen/inner_scev_sdiv_1.ll @@ -8,7 +8,8 @@ ; computation of the SCEV to before the scop that references %div44, which is ; not available then. ; -; XFAIL: * +; CHECK: polly.split_new_and_old: +; CHECK-NEXT: %div23.neg.polly.copy = sdiv i64 0, -4 ; target triple = "x86_64-unknown-linux-gnu" Index: test/Isl/CodeGen/inner_scev_sdiv_2.ll =================================================================== --- test/Isl/CodeGen/inner_scev_sdiv_2.ll +++ test/Isl/CodeGen/inner_scev_sdiv_2.ll @@ -1,11 +1,15 @@ ; RUN: opt %loadPolly -S -polly-no-early-exit -polly-detect-unprofitable -polly-codegen < %s | FileCheck %s -; XFAIL: * ; ; The SCEV expression in this test case refers to a sequence of sdiv ; instructions, which are part of different bbs in the SCoP. When code ; generating the parameter expressions, the code that is generated by the SCEV ; expander has still references to the in-scop instructions, which is invalid. - +; +; CHECK: polly.split_new_and_old: +; CHECK-NOT: = sdiv i64 0, -4 +; CHECK: %div43polly = sdiv i64 %param, 2 +; CHECK: %div44polly = sdiv i64 %div43polly, 2 +; target triple = "x86_64-unknown-linux-gnu" define void @_vorbis_apply_window(float* %d, i64 %param) { Index: test/Isl/CodeGen/inner_scev_sdiv_3.ll =================================================================== --- test/Isl/CodeGen/inner_scev_sdiv_3.ll +++ test/Isl/CodeGen/inner_scev_sdiv_3.ll @@ -1,14 +1,13 @@ ; RUN: opt %loadPolly -S -polly-no-early-exit -polly-detect-unprofitable -polly-codegen < %s | FileCheck %s -; XFAIL: * ; -; The SCEV expression in this test case refers to a sequence of sdiv -; instructions, which are part of different bbs in the SCoP. When code -; generating the parameter expressions, the code that is generated by the SCEV -; expander has still references to the in-scop instructions, which is invalid. - +; This test case has a inner SCEV sdiv that will escape the SCoP. Just check we +; do not crash and generate valid code. +; +; CHECK: polly.split_new_and_old: +; target triple = "x86_64-unknown-linux-gnu" -define void @_vorbis_apply_window(float* %d, i64 %param) { +define i64 @_vorbis_apply_window(float* %d, i64 %param) { entry: %0 = load float*, float** undef, align 8 %div23.neg = sdiv i64 0, -4 @@ -30,7 +29,11 @@ br label %for.body.51 for.cond.60.preheader: ; preds = %for.body.51, %for.cond.30.preheader - ret void + %div44.m = phi i64 [%div44, %for.body.51], [ 0, %for.cond.30.preheader] + br i1 true, label %end, label %for.cond.30.preheader + +end: + ret i64 %div44.m for.body.51: ; preds = %for.body.51, %for.body.51.lr.ph %indvars.iv86 = phi i64 [ %2, %for.body.51.lr.ph ], [ undef, %for.body.51 ] Index: test/Isl/CodeGen/inner_scev_sdiv_in_lb.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/inner_scev_sdiv_in_lb.ll @@ -0,0 +1,63 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -S -polly-codegen < %s | FileCheck %s --check-prefix=CODEGEN +; +; TODO: This is a negative test. +; +; Once we use isl to come up with loop bounds this should work +; and hopefully not break +; +; CHECK-NOT: Valid Region +; CODEGEN-NOT: polly +; +; void f(int *A, int N) { +; for (int i = 0; i < N; i++) +; for (int j = 0; j < i / 3; j++) +; A[i] += A[j]; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A, i32 %N) { +bb: + %tmp = sext i32 %N to i64 + br label %bb3 + +bb3: ; preds = %bb19, %bb + %indvars.iv1 = phi i64 [ %indvars.iv.next2, %bb19 ], [ 0, %bb ] + %tmp4 = icmp slt i64 %indvars.iv1, %tmp + br i1 %tmp4, label %bb5, label %bb20 + +bb5: ; preds = %bb3 + br label %bb6 + +bb6: ; preds = %bb17, %bb5 + %indvars.iv = phi i64 [ %indvars.iv.next, %bb17 ], [ 0, %bb5 ] + %tmp7 = trunc i64 %indvars.iv1 to i32 + %tmp8 = sdiv i32 %tmp7, 3 + %tmp9 = sext i32 %tmp8 to i64 + %tmp10 = icmp slt i64 %indvars.iv, %tmp9 + br i1 %tmp10, label %bb11, label %bb18 + +bb11: ; preds = %bb6 + %tmp12 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %tmp13 = load i32, i32* %tmp12, align 4 + %tmp14 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv1 + %tmp15 = load i32, i32* %tmp14, align 4 + %tmp16 = add nsw i32 %tmp15, %tmp13 + store i32 %tmp16, i32* %tmp14, align 4 + br label %bb17 + +bb17: ; preds = %bb11 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %bb6 + +bb18: ; preds = %bb6 + br label %bb19 + +bb19: ; preds = %bb18 + %indvars.iv.next2 = add nuw nsw i64 %indvars.iv1, 1 + br label %bb3 + +bb20: ; preds = %bb3 + ret void +} Index: test/Isl/CodeGen/inner_scev_sdiv_in_lb_invariant.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/inner_scev_sdiv_in_lb_invariant.ll @@ -0,0 +1,41 @@ +; RUN: opt %loadPolly -S -polly-codegen -polly-no-early-exit < %s | FileCheck %s +; +; Check that this will not crash our code generation. +; +; CHECK: polly.start: +; +; void f(int *A, int N) { +; for (int i = 0; i < N / 4; i++) +; A[i] += A[i - 1]; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A, i32 %N) { +bb: + %tmp = sdiv i32 %N, 4 + %tmp2 = sext i32 %tmp to i64 + br label %bb1 + +bb1: ; preds = %bb11, %bb + %indvars.iv = phi i64 [ %indvars.iv.next, %bb11 ], [ 0, %bb ] + %tmp3 = icmp slt i64 %indvars.iv, %tmp2 + br i1 %tmp3, label %bb4, label %bb12 + +bb4: ; preds = %bb1 + %tmp5 = add nsw i64 %indvars.iv, -1 + %tmp6 = getelementptr inbounds i32, i32* %A, i64 %tmp5 + %tmp7 = load i32, i32* %tmp6, align 4 + %tmp8 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %tmp9 = load i32, i32* %tmp8, align 4 + %tmp10 = add nsw i32 %tmp9, %tmp7 + store i32 %tmp10, i32* %tmp8, align 4 + br label %bb11 + +bb11: ; preds = %bb4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %bb1 + +bb12: ; preds = %bb1 + ret void +} Index: test/Isl/CodeGen/inner_scev_sdiv_in_rtc.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/inner_scev_sdiv_in_rtc.ll @@ -0,0 +1,40 @@ +; RUN: opt %loadPolly -polly-codegen -polly-no-early-exit -S < %s | FileCheck %s +; +; This will just check that we generate valid code here. +; +; CHECK: polly.start: +; +; void f(int *A, int *B) { +; for (int i = 0; i < 1024; i++) +; A[i % 3] = B[i / 42]; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A, i32* %B, i32 %N) { +bb: + br label %bb1 + +bb1: ; preds = %bb9, %bb + %i.0 = phi i32 [ 0, %bb ], [ %tmp10, %bb9 ] + %exitcond = icmp ne i32 %i.0, %N + br i1 %exitcond, label %bb2, label %bb11 + +bb2: ; preds = %bb1 + %tmp = sdiv i32 %i.0, 42 + %tmp3 = sext i32 %tmp to i64 + %tmp4 = getelementptr inbounds i32, i32* %B, i64 %tmp3 + %tmp5 = load i32, i32* %tmp4, align 4 + %tmp6 = srem i32 %i.0, 3 + %tmp7 = sext i32 %tmp6 to i64 + %tmp8 = getelementptr inbounds i32, i32* %A, i64 %tmp7 + store i32 %tmp5, i32* %tmp8, align 4 + br label %bb9 + +bb9: ; preds = %bb2 + %tmp10 = add nuw nsw i32 %i.0, 1 + br label %bb1 + +bb11: ; preds = %bb1 + ret void +}