Index: lib/Target/NVPTX/NVPTXTargetMachine.cpp =================================================================== --- lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -181,6 +181,9 @@ addPass(createEarlyCSEPass()); // Run NaryReassociate after EarlyCSE/GVN to be more effective. addPass(createNaryReassociatePass()); + // NaryReassociate on GEPs creates redundant common expressions, so run + // EarlyCSE after it. + addPass(createEarlyCSEPass()); } bool NVPTXPassConfig::addInstSelector() { Index: lib/Transforms/Scalar/NaryReassociate.cpp =================================================================== --- lib/Transforms/Scalar/NaryReassociate.cpp +++ lib/Transforms/Scalar/NaryReassociate.cpp @@ -234,6 +234,7 @@ BasicBlock *BB = Node->getBlock(); for (auto I = BB->begin(); I != BB->end(); ++I) { if (SE->isSCEVable(I->getType()) && isPotentiallyNaryReassociable(I)) { + const SCEV *OldSCEV = SE->getSCEV(I); if (Instruction *NewI = tryReassociate(I)) { Changed = true; SE->forgetValue(I); @@ -243,7 +244,28 @@ } // Add the rewritten instruction to SeenExprs; the original instruction // is deleted. - SeenExprs[SE->getSCEV(I)].push_back(I); + const SCEV *NewSCEV = SE->getSCEV(I); + SeenExprs[NewSCEV].push_back(I); + // Ideally, NewSCEV should equal OldSCEV because tryReassociate(I) + // is equivalent to I. However, ScalarEvolution::getSCEV may + // weaken nsw causing NewSCEV not to equal OldSCEV. For example, suppose + // we reassociate + // I = &a[sext(i +nsw j)] // assuming sizeof(a[0]) = 4 + // to + // NewI = &a[sext(i)] + sext(j). + // + // ScalarEvolution computes + // getSCEV(I) = a + 4 * sext(i + j) + // getSCEV(newI) = a + 4 * sext(i) + 4 * sext(j) + // which are different SCEVs. + // + // To alleviate this issue of ScalarEvolution not always capturing + // equivalence, we add I to SeenExprs[OldSCEV] as well so that we can + // map both SCEV before and after tryReassociate(I) to I. + // + // This improvement is exercised in @reassociate_gep_nsw in nary-gep.ll. + if (NewSCEV != OldSCEV) + SeenExprs[OldSCEV].push_back(I); } } } Index: test/Transforms/NaryReassociate/NVPTX/nary-gep.ll =================================================================== --- test/Transforms/NaryReassociate/NVPTX/nary-gep.ll +++ test/Transforms/NaryReassociate/NVPTX/nary-gep.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -nary-reassociate -S | FileCheck %s +; RUN: opt < %s -nary-reassociate -early-cse -S | FileCheck %s target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" target triple = "nvptx64-unknown-unknown" @@ -27,24 +27,37 @@ ; foo(&a[sext(j)]); ; foo(&a[sext(i +nsw j)]); +; foo(&a[sext((i +nsw j) +nsw i)]); ; => -; t = &a[sext(j)]; -; foo(t); -; foo(t + sext(i)); +; t1 = &a[sext(j)]; +; foo(t1); +; t2 = t1 + sext(i); +; foo(t2); +; t3 = t2 + sext(i); // sext(i) should be GVN'ed. +; foo(t3); define void @reassociate_gep_nsw(float* %a, i32 %i, i32 %j) { ; CHECK-LABEL: @reassociate_gep_nsw( - %1 = add nsw i32 %i, %j - %idxprom.1 = sext i32 %1 to i64 %idxprom.j = sext i32 %j to i64 - %2 = getelementptr float, float* %a, i64 %idxprom.j + %1 = getelementptr float, float* %a, i64 %idxprom.j ; CHECK: [[t1:[^ ]+]] = getelementptr float, float* %a, i64 %idxprom.j - call void @foo(float* %2) + call void @foo(float* %1) ; CHECK: call void @foo(float* [[t1]]) - %3 = getelementptr float, float* %a, i64 %idxprom.1 + + %2 = add nsw i32 %i, %j + %idxprom.2 = sext i32 %2 to i64 + %3 = getelementptr float, float* %a, i64 %idxprom.2 ; CHECK: [[sexti:[^ ]+]] = sext i32 %i to i64 ; CHECK: [[t2:[^ ]+]] = getelementptr float, float* [[t1]], i64 [[sexti]] call void @foo(float* %3) ; CHECK: call void @foo(float* [[t2]]) + + %4 = add nsw i32 %2, %i + %idxprom.4 = sext i32 %4 to i64 + %5 = getelementptr float, float* %a, i64 %idxprom.4 +; CHECK: [[t3:[^ ]+]] = getelementptr float, float* [[t2]], i64 [[sexti]] + call void @foo(float* %5) +; CHECK: call void @foo(float* [[t3]]) + ret void }