Index: polly/trunk/include/polly/ZoneAlgo.h =================================================================== --- polly/trunk/include/polly/ZoneAlgo.h +++ polly/trunk/include/polly/ZoneAlgo.h @@ -137,6 +137,13 @@ void addArrayReadAccess(MemoryAccess *MA); + /// Return the ValInst write by a (must-)write access. Returns the 'unknown' + /// ValInst if there is no single ValInst[] the array element written to will + /// have. + /// + /// @return { ValInst[] } + isl::map getWrittenValue(MemoryAccess *MA, isl::map AccRel); + void addArrayWriteAccess(MemoryAccess *MA); protected: Index: polly/trunk/lib/Transform/ZoneAlgo.cpp =================================================================== --- polly/trunk/lib/Transform/ZoneAlgo.cpp +++ polly/trunk/lib/Transform/ZoneAlgo.cpp @@ -363,17 +363,6 @@ continue; } - if (!isa(MA->getAccessInstruction())) { - DEBUG(dbgs() << "WRITE that is not a StoreInst not supported\n"); - OptimizationRemarkMissed R(PassName, "UnusualStore", - MA->getAccessInstruction()); - R << "encountered write that is not a StoreInst: " - << printInstruction(MA->getAccessInstruction()); - S->getFunction().getContext().diagnose(R); - - IncompatibleElts = IncompatibleElts.add_set(ArrayElts); - } - // In region statements the order is less clear, eg. the load and store // might be in a boxed loop. if (Stmt->isRegionStmt() && @@ -432,6 +421,37 @@ } } +isl::map ZoneAlgorithm::getWrittenValue(MemoryAccess *MA, isl::map AccRel) { + if (!MA->isMustWrite()) + return {}; + + Value *AccVal = MA->getAccessValue(); + ScopStmt *Stmt = MA->getStatement(); + Instruction *AccInst = MA->getAccessInstruction(); + + // Write a value to a single element. + auto L = MA->isOriginalArrayKind() ? LI->getLoopFor(AccInst->getParent()) + : Stmt->getSurroundingLoop(); + if (AccVal && + AccVal->getType() == MA->getLatestScopArrayInfo()->getElementType() && + AccRel.is_single_valued()) + return makeValInst(AccVal, Stmt, L); + + // memset(_, '0', ) is equivalent to writing the null value to all touched + // elements. isMustWrite() ensures that all of an element's bytes are + // overwritten. + if (auto *Memset = dyn_cast(AccInst)) { + auto *WrittenConstant = dyn_cast(Memset->getValue()); + Type *Ty = MA->getLatestScopArrayInfo()->getElementType(); + if (WrittenConstant && WrittenConstant->isZeroValue()) { + Constant *Zero = Constant::getNullValue(Ty); + return makeValInst(Zero, Stmt, L); + } + } + + return {}; +} + void ZoneAlgorithm::addArrayWriteAccess(MemoryAccess *MA) { assert(MA->isLatestArrayKind()); assert(MA->isWrite()); @@ -449,10 +469,9 @@ give(isl_union_map_add_map(AllMayWrites.take(), AccRel.copy())); // { Domain[] -> ValInst[] } - auto WriteValInstance = - makeValInst(MA->getAccessValue(), Stmt, - LI->getLoopFor(MA->getAccessInstruction()->getParent()), - MA->isMustWrite()); + auto WriteValInstance = getWrittenValue(MA, AccRel); + if (!WriteValInstance) + WriteValInstance = makeUnknownForDomain(Stmt); // { Domain[] -> [Element[] -> Domain[]] } auto IncludeElement = give(isl_map_curry(isl_map_domain_map(AccRel.copy()))); @@ -698,8 +717,6 @@ for (auto *MA : Stmt) { if (!MA->isLatestArrayKind()) continue; - if (!isCompatibleAccess(MA)) - continue; if (MA->isRead()) addArrayReadAccess(MA); Index: polly/trunk/test/DeLICM/map_memset_zero.ll =================================================================== --- polly/trunk/test/DeLICM/map_memset_zero.ll +++ polly/trunk/test/DeLICM/map_memset_zero.ll @@ -0,0 +1,71 @@ +; RUN: opt %loadPolly -polly-delicm -analyze < %s | FileCheck -match-full-lines %s +; +; Check that PHI mapping works even in presence of a memset whose' +; zero value is used. +; +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) + +define void @func(i8* noalias nonnull %A) { +entry: + br label %outer.for + +outer.for: + %j = phi i32 [0, %entry], [%j.inc, %outer.inc] + %j.cmp = icmp slt i32 %j, 2 + br i1 %j.cmp, label %bodyA, label %outer.exit + + + bodyA: + %A_idx = getelementptr inbounds i8, i8* %A, i32 %j + %cond = icmp eq i32 21, 21 + br i1 %cond, label %bodyB, label %bodyC + + bodyB: + call void @llvm.memset.p0i8.i64(i8* %A_idx, i8 0, i64 1, i32 1, i1 false) + br label %bodyC + + bodyC: + %phi = phi i8 [1, %bodyA], [0, %bodyB] + %a = load i8, i8* %A_idx + store i8 %phi, i8* %A_idx + br label %outer.inc + + +outer.inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %outer.for + +outer.exit: + br label %return + +return: + ret void +} + + +; CHECK: Statistics { +; CHECK: Compatible overwrites: 1 +; CHECK: Overwrites mapped to: 1 +; CHECK: PHI scalars mapped: 1 +; CHECK: } + +; CHECK: After accesses { +; CHECK-NEXT: Stmt_bodyA +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_bodyA[i0] -> MemRef_phi__phi[] }; +; CHECK-NEXT: new: { Stmt_bodyA[i0] -> MemRef_A[o0] : 1 = 0 }; +; CHECK-NEXT: Stmt_bodyB +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_bodyB[i0] -> MemRef_A[i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_bodyB[i0] -> MemRef_phi__phi[] }; +; CHECK-NEXT: new: { Stmt_bodyB[i0] -> MemRef_A[i0] }; +; CHECK-NEXT: Stmt_bodyC +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_bodyC[i0] -> MemRef_phi__phi[] }; +; CHECK-NEXT: new: { Stmt_bodyC[i0] -> MemRef_A[i0] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_bodyC[i0] -> MemRef_A[i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_bodyC[i0] -> MemRef_A[i0] }; +; CHECK-NEXT: } Index: polly/trunk/test/DeLICM/reject_unusualstore.ll =================================================================== --- polly/trunk/test/DeLICM/reject_unusualstore.ll +++ polly/trunk/test/DeLICM/reject_unusualstore.ll @@ -1,8 +1,10 @@ -; RUN: opt %loadPolly -polly-delicm -analyze -pass-remarks-missed=polly-delicm < %s 2>&1 | FileCheck %s +; RUN: opt %loadPolly -polly-delicm -analyze< %s | FileCheck %s +; RUN: opt %loadPolly -polly-delicm -disable-output -stats < %s 2>&1 | FileCheck %s --check-prefix=STATS ; ; void func(double *A) { ; for (int j = 0; j < 2; j += 1) { /* outer */ -; memset(A[j], 0, sizeof(double)); +; A[j] = 21.0; +; A[j] = 42.0; ; double phi = 0.0; ; for (int i = 0; i < 4; i += 1) /* reduction */ ; phi += 4.2; @@ -11,8 +13,6 @@ ; } ; -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) - define void @func(double* noalias nonnull %A) { entry: br label %outer.preheader @@ -28,8 +28,8 @@ reduction.preheader: %A_idx = getelementptr inbounds double, double* %A, i32 %j - %tmp = bitcast double* %A_idx to i8* - call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 8, i32 1, i1 false) + store double 21.0, double* %A_idx + store double 42.0, double* %A_idx br label %reduction.for reduction.for: @@ -68,4 +68,5 @@ } -; CHECK: encountered write that is not a StoreInst: call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 8, i32 1, i1 false) +; CHECK: No modification has been made +; STATS: 1 polly-zone - Number of not zone-analyzable arrays Index: polly/trunk/test/ForwardOpTree/forward_load_memset_after.ll =================================================================== --- polly/trunk/test/ForwardOpTree/forward_load_memset_after.ll +++ polly/trunk/test/ForwardOpTree/forward_load_memset_after.ll @@ -0,0 +1,60 @@ +; RUN: opt %loadPolly -polly-optree -analyze < %s | FileCheck %s -match-full-lines +; +; Rematerialize a load in the presence of a non-store WRITE access. +; +; for (int j = 0; j < n; j += 1) { +; bodyA: +; double val = B[j]; +; +; bodyB: +; A[j] = val; +; +; bodyC: +; memset(A, 0, 16); +; memset(B, 0, 16); +; } +; + +declare void @llvm.memset.p0f64.i64(double* nocapture, i8, i64, i32, i1) + +define void @func(i32 %n, double* noalias nonnull %A, double* noalias nonnull %B) { +entry: + br label %for + +for: + %j = phi i32 [0, %entry], [%j.inc, %inc] + %j.cmp = icmp slt i32 %j, %n + br i1 %j.cmp, label %bodyA, label %exit + + bodyA: + %B_idx = getelementptr inbounds double, double* %B, i32 %j + %val = load double, double* %B_idx + br label %bodyB + + bodyB: + %A_idx = getelementptr inbounds double, double* %A, i32 %j + store double %val, double* %A_idx + br label %bodyC + + bodyC: + call void @llvm.memset.p0f64.i64(double* %A, i8 0, i64 16, i32 1, i1 false) + call void @llvm.memset.p0f64.i64(double* %B, i8 0, i64 16, i32 1, i1 false) + br label %inc + +inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %for + +exit: + br label %return + +return: + ret void +} + + +; CHECK: Statistics { +; CHECK: Known loads forwarded: 1 +; CHECK: Operand trees forwarded: 1 +; CHECK: Statements with forwarded operand trees: 1 +; CHECK: } Index: polly/trunk/test/ForwardOpTree/forward_load_memset_before.ll =================================================================== --- polly/trunk/test/ForwardOpTree/forward_load_memset_before.ll +++ polly/trunk/test/ForwardOpTree/forward_load_memset_before.ll @@ -0,0 +1,81 @@ +; RUN: opt %loadPolly -polly-optree -analyze < %s | FileCheck %s -match-full-lines +; +; Rematerialize a load in the presence of a non-store WRITE access. +; +; for (int j = 0; j < n; j += 1) { +; bodyA: +; memset(A, 0, 16); +; memset(B, 0, 16); +; +; bodyB: +; double val = B[j]; +; +; bodyC: +; A[j] = val; +; } +; + +declare void @llvm.memset.p0f64.i64(double* nocapture, i8, i64, i32, i1) + +define void @func(i32 %n, double* noalias nonnull %A, double* noalias nonnull %B) { +entry: + br label %for + +for: + %j = phi i32 [0, %entry], [%j.inc, %inc] + %j.cmp = icmp slt i32 %j, %n + br i1 %j.cmp, label %bodyA, label %exit + + bodyA: + call void @llvm.memset.p0f64.i64(double* %A, i8 0, i64 16, i32 1, i1 false) + call void @llvm.memset.p0f64.i64(double* %B, i8 0, i64 16, i32 1, i1 false) + br label %bodyB + + bodyB: + %B_idx = getelementptr inbounds double, double* %B, i32 %j + %val = load double, double* %B_idx + br label %bodyC + + bodyC: + %A_idx = getelementptr inbounds double, double* %A, i32 %j + store double %val, double* %A_idx + br label %inc + +inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %for + +exit: + br label %return + +return: + ret void +} + + +; CHECK: Statistics { +; CHECK: Known loads forwarded: 1 +; CHECK: Operand trees forwarded: 1 +; CHECK: Statements with forwarded operand trees: 1 +; CHECK: } + +; CHECK-NEXT: After statements { +; CHECK: Stmt_bodyB +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [n] -> { Stmt_bodyB[i0] -> MemRef_B[o0] : 8i0 <= o0 <= 7 + 8i0 }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [n] -> { Stmt_bodyB[i0] -> MemRef_val[] }; +; CHECK-NEXT: Instructions { +; CHECK-NEXT: %val = load double, double* %B_idx +; CHECK-NEXT: } +; CHECK-NEXT: Stmt_bodyC +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: null; +; CHECK-NEXT: new: [n] -> { Stmt_bodyC[i0] -> MemRef_B[8i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [n] -> { Stmt_bodyC[i0] -> MemRef_A[o0] : 8i0 <= o0 <= 7 + 8i0 }; +; CHECK-NEXT: Instructions { +; CHECK-NEXT: %val = load double, double* %B_idx +; CHECK-NEXT: store double %val, double* %A_idx +; CHECK-NEXT: } +; CHECK-NEXT: }