Index: include/polly/ScopInfo.h =================================================================== --- include/polly/ScopInfo.h +++ include/polly/ScopInfo.h @@ -199,6 +199,9 @@ /// @brief Return the access instruction of this memory access. Instruction *getAccessInstruction() const { return Inst; } + /// @brief Return the element type of this memory access. + class Type *getElementType() const; + /// @brief Get the new access function imported from JSCOP file isl_map *getNewAccessRelation() const; Index: lib/Analysis/ScopInfo.cpp =================================================================== --- lib/Analysis/ScopInfo.cpp +++ lib/Analysis/ScopInfo.cpp @@ -616,6 +616,15 @@ newAccessRelation = newAccess; } +Type *MemoryAccess::getElementType() const { + Instruction *Inst = getAccessInstruction(); + if (LoadInst *LI = dyn_cast(Inst)) + return LI->getType(); + if (StoreInst *SI = dyn_cast(Inst)) + return SI->getValueOperand()->getType(); + llvm_unreachable("Unknown access instruction type"); +} + //===----------------------------------------------------------------------===// isl_map *ScopStmt::getScattering() const { return isl_map_copy(Scattering); } Index: lib/CodeGen/IslCodeGeneration.cpp =================================================================== --- lib/CodeGen/IslCodeGeneration.cpp +++ lib/CodeGen/IslCodeGeneration.cpp @@ -555,10 +555,52 @@ } void IslNodeBuilder::addMemoryAccesses(Scop &S) { + SCEVExpander Expander(SE, "polly"); + const Region &R = S.getRegion(); + for (ScopStmt *Stmt : S) for (MemoryAccess *MA : *Stmt) { isl_id *Id = MA->getArrayId(); - IDToValue[Id] = MA->getBaseAddr(); + if (IDToValue.count(Id)) { + isl_id_free(Id); + continue; + } + + // Hoist indirect loads into the current/entering block. + // This is a needed optimization because e.g., runtime alias + // checks need the loaded pointer value. It is sound because + // the loads are (or have to be) invariant in the region. + Value *BaseAddr = MA->getBaseAddr(); + if (Instruction *BaseInst = dyn_cast(BaseAddr)) { + if (R.contains(BaseInst)) { + assert(isa(BaseInst) && + "Indirect access should always be a load!"); + + Value *OldPtr = getPointerOperand(*BaseInst); + Value *NewPtr = Expander.expandCodeFor( + SE.getSCEV(OldPtr), OldPtr->getType(), Builder.GetInsertPoint()); + + BaseAddr = Builder.CreateLoad(NewPtr); + BaseInst->replaceAllUsesWith(BaseAddr); + BaseInst->eraseFromParent(); + + if (Instruction *OldPtrInst = dyn_cast(OldPtr)) { + OldPtrInst->replaceAllUsesWith(NewPtr); + OldPtrInst->eraseFromParent(); + } + } + } + + // Cast the base address we use for the IslExprBuilder to a pointer + // of the final type. This is necesarry because ScalarEvolution already + // gave us the access offsets in units of the final type regardless of + // the actual underlying type (e.g., an array type with differently sized + // elements). + Type *MAElemType = MA->getElementType(); + if (BaseAddr->getType()->getPointerElementType() != MAElemType) + BaseAddr = Builder.CreateBitCast(BaseAddr, MAElemType->getPointerTo()); + + IDToValue[Id] = BaseAddr; isl_id_free(Id); } } Index: lib/CodeGen/IslExprBuilder.cpp =================================================================== --- lib/CodeGen/IslExprBuilder.cpp +++ lib/CodeGen/IslExprBuilder.cpp @@ -103,7 +103,7 @@ assert(isl_ast_expr_get_op_n_arg(Expr) == 2 && "Multidimensional access functions are not supported yet"); - Value *Base, *IndexOp, *Zero, *Access; + Value *Base, *IndexOp, *Access; SmallVector Indices; Type *PtrElTy; @@ -113,23 +113,10 @@ IndexOp = create(isl_ast_expr_get_op_arg(Expr, 1)); assert(IndexOp->getType()->isIntegerTy() && "Access index should be an integer"); - Zero = ConstantInt::getNullValue(IndexOp->getType()); - - // If base is a array type like, - // int A[N][M][K]; - // we have to adjust the GEP. The easiest way is to transform accesses like, - // A[i][j][k] - // into equivalent ones like, - // A[0][0][ i*N*M + j*M + k] - // because SCEV already folded the "peudo dimensions" into one. Thus our index - // operand will be 'i*N*M + j*M + k' anyway. - PtrElTy = Base->getType()->getPointerElementType(); - while (PtrElTy->isArrayTy()) { - Indices.push_back(Zero); - PtrElTy = PtrElTy->getArrayElementType(); - } Indices.push_back(IndexOp); + + PtrElTy = Base->getType()->getPointerElementType(); assert((PtrElTy->isIntOrIntVectorTy() || PtrElTy->isFPOrFPVectorTy() || PtrElTy->isPtrOrPtrVectorTy()) && "We do not yet change the type of the access base during code " Index: test/Isl/CodeGen/MemAccess/codegen_constant_offset.ll =================================================================== --- test/Isl/CodeGen/MemAccess/codegen_constant_offset.ll +++ test/Isl/CodeGen/MemAccess/codegen_constant_offset.ll @@ -40,4 +40,4 @@ for.end: ; preds = %for.cond ret i32 0 } -; CHECK: load i32* getelementptr inbounds ([100 x i32]* @A, i64 0, i64 10) +; CHECK: load i32* getelementptr inbounds ([100 x i32]* @A, i{{(32|64)}} 0, i{{(32|64)}} 10) Index: test/Isl/CodeGen/MemAccess/codegen_simple.ll =================================================================== --- test/Isl/CodeGen/MemAccess/codegen_simple.ll +++ test/Isl/CodeGen/MemAccess/codegen_simple.ll @@ -40,4 +40,4 @@ for.end: ; preds = %for.cond ret i32 0 } -; CHECK: load i32* getelementptr inbounds ([100 x i32]* @A, i64 0, i64 0) +; CHECK: load i32* getelementptr inbounds ([100 x i32]* @A, i{{(32|64)}} 0, i{{(32|64)}} 0) Index: test/Isl/CodeGen/MemAccess/codegen_simple_float.ll =================================================================== --- test/Isl/CodeGen/MemAccess/codegen_simple_float.ll +++ test/Isl/CodeGen/MemAccess/codegen_simple_float.ll @@ -38,4 +38,4 @@ for.end: ; preds = %for.cond ret i32 0 } -; CHECK: load float* getelementptr inbounds ([100 x float]* @A, i64 0, i64 0) +; CHECK: load float* getelementptr inbounds ([100 x float]* @A, i{{(32|64)}} 0, i{{(32|64)}} 0) Index: test/Isl/CodeGen/MemAccess/codegen_simple_md.ll =================================================================== --- test/Isl/CodeGen/MemAccess/codegen_simple_md.ll +++ test/Isl/CodeGen/MemAccess/codegen_simple_md.ll @@ -63,7 +63,7 @@ ; WITHCONST: %[[MUL2:[._a-zA-Z0-9]+]] = mul nsw i64 2, %[[IVIn]] ; WITHCONST: %[[SUM1:[._a-zA-Z0-9]+]] = add nsw i64 %[[MUL1]], %[[MUL2]] ; WITHCONST: %[[SUM2:[._a-zA-Z0-9]+]] = add nsw i64 %[[SUM1]], 5 -; WITHCONST: %[[ACC:[._a-zA-Z0-9]*]] = getelementptr [1040 x i32]* @A, i64 0, i64 %[[SUM2]] +; WITHCONST: %[[ACC:[._a-zA-Z0-9]*]] = getelementptr i32* getelementptr inbounds ([1040 x i32]* @A, i{{(32|64)}} 0, i{{(32|64)}} 0), i{{(32|64)}} %[[SUM2]] ; WITHCONST: store i32 100, i32* %[[ACC]] ; WITHOUTCONST: %[[IVOut:polly.indvar[0-9]*]] = phi i64 [ 0, %polly.loop_preheader{{[0-9]*}} ], [ %polly.indvar_next{{[0-9]*}}, %polly.{{[._a-zA-Z0-9]*}} ] @@ -71,5 +71,5 @@ ; WITHOUTCONST: %[[MUL1:[._a-zA-Z0-9]+]] = mul nsw i64 16, %[[IVOut]] ; WITHOUTCONST: %[[MUL2:[._a-zA-Z0-9]+]] = mul nsw i64 2, %[[IVIn]] ; WITHOUTCONST: %[[SUM1:[._a-zA-Z0-9]+]] = add nsw i64 %[[MUL1]], %[[MUL2]] -; WITHOUTCONST: %[[ACC:[._a-zA-Z0-9]*]] = getelementptr [1040 x i32]* @A, i64 0, i64 %[[SUM1]] +; WITHOUTCONST: %[[ACC:[._a-zA-Z0-9]*]] = getelementptr i32* getelementptr inbounds ([1040 x i32]* @A, i{{(32|64)}} 0, i{{(32|64)}} 0), i{{(32|64)}} %[[SUM1]] ; WITHOUTCONST: store i32 100, i32* %[[ACC]] Index: test/Isl/CodeGen/MemAccess/codegen_simple_md_float.ll =================================================================== --- test/Isl/CodeGen/MemAccess/codegen_simple_md_float.ll +++ test/Isl/CodeGen/MemAccess/codegen_simple_md_float.ll @@ -59,7 +59,7 @@ ; WITHCONST: %[[MUL2:[._a-zA-Z0-9]+]] = mul nsw i64 2, %[[IVIn]] ; WITHCONST: %[[SUM1:[._a-zA-Z0-9]+]] = add nsw i64 %[[MUL1]], %[[MUL2]] ; WITHCONST: %[[SUM2:[._a-zA-Z0-9]+]] = add nsw i64 %[[SUM1]], 5 -; WITHCONST: %[[ACC:[._a-zA-Z0-9]*]] = getelementptr [1040 x float]* @A, i64 0, i64 %[[SUM2]] +; WITHCONST: %[[ACC:[._a-zA-Z0-9]*]] = getelementptr float* getelementptr inbounds ([1040 x float]* @A, i{{(32|64)}} 0, i{{(32|64)}} 0), i{{(32|64)}} %[[SUM2]] ; WITHCONST: store float 1.000000e+02, float* %[[ACC]] ; WITHOUTCONST: %[[IVOut:polly.indvar[0-9]*]] = phi i64 [ 0, %polly.loop_preheader{{[0-9]*}} ], [ %polly.indvar_next{{[0-9]*}}, %polly.{{[._a-zA-Z0-9]*}} ] @@ -67,5 +67,5 @@ ; WITHOUTCONST: %[[MUL1:[._a-zA-Z0-9]+]] = mul nsw i64 16, %[[IVOut]] ; WITHOUTCONST: %[[MUL2:[._a-zA-Z0-9]+]] = mul nsw i64 2, %[[IVIn]] ; WITHOUTCONST: %[[SUM1:[._a-zA-Z0-9]+]] = add nsw i64 %[[MUL1]], %[[MUL2]] -; WITHOUTCONST: %[[ACC:[._a-zA-Z0-9]*]] = getelementptr [1040 x float]* @A, i64 0, i64 %[[SUM1]] +; WITHOUTCONST: %[[ACC:[._a-zA-Z0-9]*]] = getelementptr float* getelementptr inbounds ([1040 x float]* @A, i{{(32|64)}} 0, i{{(32|64)}} 0), i{{(32|64)}} %[[SUM1]] ; WITHOUTCONST: store float 1.000000e+02, float* %[[ACC]] Index: test/Isl/CodeGen/aliasing_indirect_accesses.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/aliasing_indirect_accesses.ll @@ -0,0 +1,47 @@ +; RUN: opt %loadPolly -basicaa -polly-code-generator=isl -polly-codegen-isl -S < %s | FileCheck %s +; +; Check that we hoist the load of %C out of the whole +; region into the entering block (here entry). This is necessary +; because we might create alias checks involving the loaded address. +; It is sound since we ensured (during ScopDetection) that *C is invariant +; in the whole region. +; +; CHECK: (i8* %B, i8* noalias %C) +; CHECK: entry: +; CHECK: %0 = bitcast i8* %C to i16** +; CHECK-NEXT: %1 = load i16** %0 +; CHECK-NOT: {{(%C|%0)}} +; +; void jd(char *B, void *C) { +; for (int i = 0; i < 1024; i++) +; (*(short **)C)[i] = B[i]; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @jd(i8* %B, i8* noalias %C) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %exitcond = icmp ne i64 %indvars.iv, 1024 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %arrayidx = getelementptr inbounds i8* %B, i64 %indvars.iv + %tmp = load i8* %arrayidx, align 1 + %conv = sext i8 %tmp to i16 + %tmp1 = bitcast i8* %C to i16** + %tmp2 = load i16** %tmp1, align 8 + %arrayidx2 = getelementptr inbounds i16* %tmp2, i64 %indvars.iv + store i16 %conv, i16* %arrayidx2, align 2 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} Index: test/Isl/CodeGen/aliasing_struct_elements.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/aliasing_struct_elements.ll @@ -0,0 +1,53 @@ +; RUN: opt %loadPolly -polly-code-generator=isl -polly-codegen-isl -S < %s | FileCheck %s +; +; Check that we cast struct s *S to short * before we map it to the MemRef_S +; because the IslExprBuilder cannot do that later on. Note that the offset we +; use as maximal access (here 1036) is actually in "short" units, thus the +; cast is sound. +; +; CHECK: %[[Cast:[._0-9a-zA-Z]*]] = bitcast %struct.s* %S to i16* +; CHECK-NEXT: %polly.access.{{[._0-9a-zA-Z]*}} = getelementptr i16* %[[Cast]], i64 1036 +; +; struct s { +; short A[2]; +; long puffer0[2]; +; short B[1024]; +; }; +; +; void jd(struct s *S, long *E) { +; for (int i = 0; i < 1024; i++) +; E[i] = S->A[i] + S->B[i]; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +%struct.s = type { [2 x i16], [2 x i64], [1024 x i16] } + +define void @jd(%struct.s* %S, i64* %E) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %exitcond = icmp ne i64 %indvars.iv, 1024 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %arrayidx = getelementptr inbounds %struct.s* %S, i64 0, i32 0, i64 %indvars.iv + %tmp = load i16* %arrayidx, align 2 + %conv = sext i16 %tmp to i64 + %arrayidx2 = getelementptr inbounds %struct.s* %S, i64 0, i32 2, i64 %indvars.iv + %tmp1 = load i16* %arrayidx2, align 2 + %conv3 = sext i16 %tmp1 to i64 + %add = add nsw i64 %conv, %conv3 + %arrayidx6 = getelementptr inbounds i64* %E, i64 %indvars.iv + store i64 %add, i64* %arrayidx6, align 8 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} Index: test/Isl/CodeGen/aliasing_struct_elements_float.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/aliasing_struct_elements_float.ll @@ -0,0 +1,49 @@ +; RUN: opt %loadPolly -polly-code-generator=isl -polly-codegen-isl -S < %s | FileCheck %s +; +; Check that we cast struct s *S to double * before we map it to the MemRef_S +; because the IslExprBuilder cannot do that later on. Note that the offset we +; use as maximal access (here 1027) is actually in "double" units, thus the +; cast is sound. +; +; CHECK: %[[Cast:[._0-9a-zA-Z]*]] = bitcast %struct.s* %S to double* +; CHECK-NEXT: %polly.access.{{[._0-9a-zA-Z]*}} = getelementptr double* %[[Cast]], i64 1027 +; +; struct s { +; short A[2]; +; long puffer[2]; +; double B[1024]; +; }; +; +; void jd(struct s *S, long *E) { +; for (int i = 0; i < 1024; i++) +; E[i] = S->B[i]; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +%struct.s = type { [2 x i16], [2 x i64], [1024 x double] } + +define void @jd(%struct.s* %S, i64* %E) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %exitcond = icmp ne i64 %indvars.iv, 1024 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %arrayidx = getelementptr inbounds %struct.s* %S, i64 0, i32 2, i64 %indvars.iv + %tmp = load double* %arrayidx, align 8 + %conv = fptosi double %tmp to i64 + %arrayidx2 = getelementptr inbounds i64* %E, i64 %indvars.iv + store i64 %conv, i64* %arrayidx2, align 8 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +}