Index: llvm/trunk/lib/Transforms/Scalar/EarlyCSE.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/EarlyCSE.cpp +++ llvm/trunk/lib/Transforms/Scalar/EarlyCSE.cpp @@ -498,15 +498,43 @@ return; // FIXME: Removing a store here can leave MemorySSA in an unoptimized state // by creating MemoryPhis that have identical arguments and by creating - // MemoryUses whose defining access is not an actual clobber. - if (MemoryAccess *MA = MSSA->getMemoryAccess(Inst)) - MSSA->removeMemoryAccess(MA); + // MemoryUses whose defining access is not an actual clobber. We handle the + // phi case here, but the non-optimized MemoryUse case is not handled. Once + // MemorySSA tracks whether uses are optimized this will be taken care of on + // the MemorySSA side. + if (MemoryAccess *MA = MSSA->getMemoryAccess(Inst)) { + // Optimize MemoryPhi nodes that may become redundant by having all the + // same input values once MA is removed. + SmallVector PhisToCheck; + SmallVector WorkQueue; + WorkQueue.push_back(MA); + // Process MemoryPhi nodes in FIFO order using a ever-growing vector since + // we shouldn't be processing that many phis and this will avoid an + // allocation in almost all cases. + for (unsigned I = 0; I < WorkQueue.size(); ++I) { + MemoryAccess *WI = WorkQueue[I]; + + for (auto *U : WI->users()) + if (MemoryPhi *MP = dyn_cast(U)) + PhisToCheck.push_back(MP); + + MSSA->removeMemoryAccess(WI); + + for (MemoryPhi *MP : PhisToCheck) { + MemoryAccess *FirstIn = MP->getIncomingValue(0); + if (all_of(MP->incoming_values(), + [=](Use &In) { return In == FirstIn; })) + WorkQueue.push_back(MP); + } + PhisToCheck.clear(); + } + } } }; } -/// Determine if the memory referenced by LaterInst is from the same heap version -/// as EarlierInst. +/// Determine if the memory referenced by LaterInst is from the same heap +/// version as EarlierInst. /// This is currently called in two scenarios: /// /// load p @@ -536,11 +564,17 @@ // LaterInst, if LaterDef dominates EarlierInst then it can't occur between // EarlierInst and LaterInst and neither can any other write that potentially // clobbers LaterInst. - // FIXME: This is currently fairly expensive since it does an AA check even - // for MemoryUses that were already optimized by MemorySSA construction. - // Re-visit once MemorySSA optimized use tracking change has been committed. - MemoryAccess *LaterDef = - MSSA->getWalker()->getClobberingMemoryAccess(LaterInst); + // FIXME: Use getClobberingMemoryAccess only for stores since it is currently + // fairly expensive to call on MemoryUses since it does an AA check even for + // MemoryUses that were already optimized by MemorySSA construction. Once + // MemorySSA optimized use tracking change has been committed we can use + // getClobberingMemoryAccess for MemoryUses as well. + MemoryAccess *LaterMA = MSSA->getMemoryAccess(LaterInst); + MemoryAccess *LaterDef; + if (auto *LaterUse = dyn_cast(LaterMA)) + LaterDef = LaterUse->getDefiningAccess(); + else + LaterDef = MSSA->getWalker()->getClobberingMemoryAccess(LaterInst); return MSSA->dominates(LaterDef, MSSA->getMemoryAccess(EarlierInst)); } Index: llvm/trunk/test/Transforms/EarlyCSE/memoryssa.ll =================================================================== --- llvm/trunk/test/Transforms/EarlyCSE/memoryssa.ll +++ llvm/trunk/test/Transforms/EarlyCSE/memoryssa.ll @@ -32,3 +32,38 @@ store i32 %V1, i32* @G1 ret void } + +;; Check that memoryphi optimization happens during EarlyCSE, enabling +;; more load CSE opportunities. +; CHECK-LABEL: @test_memphiopt( +; CHECK-NOMEMSSA-LABEL: @test_memphiopt( +define void @test_memphiopt(i1 %c, i32* %p) { +; CHECK-LABEL: entry: +; CHECK-NOMEMSSA-LABEL: entry: +entry: +; CHECK: load +; CHECK-NOMEMSSA: load + %v1 = load i32, i32* @G1 + br i1 %c, label %then, label %end + +; CHECK-LABEL: then: +; CHECK-NOMEMSSA-LABEL: then: +then: +; CHECK: load +; CHECK-NOMEMSSA: load + %pv = load i32, i32* %p +; CHECK-NOT: store +; CHECK-NOMEMSSA-NOT: store + store i32 %pv, i32* %p + br label %end + +; CHECK-LABEL: end: +; CHECK-NOMEMSSA-LABEL: end: +end: +; CHECK-NOT: load +; CHECK-NOMEMSSA: load + %v2 = load i32, i32* @G1 + %sum = add i32 %v1, %v2 + store i32 %sum, i32* @G2 + ret void +}