Index: include/llvm-c/Transforms/Scalar.h =================================================================== --- include/llvm-c/Transforms/Scalar.h +++ include/llvm-c/Transforms/Scalar.h @@ -133,7 +133,7 @@ void LLVMAddCorrelatedValuePropagationPass(LLVMPassManagerRef PM); /** See llvm::createEarlyCSEPass function */ -void LLVMAddEarlyCSEPass(LLVMPassManagerRef PM); +void LLVMAddEarlyCSEPass(LLVMPassManagerRef PM, int UseMemorySSA); /** See llvm::createLowerExpectIntrinsicPass function */ void LLVMAddLowerExpectIntrinsicPass(LLVMPassManagerRef PM); Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -119,6 +119,7 @@ void initializeDominatorTreeWrapperPassPass(PassRegistry&); void initializeDwarfEHPreparePass(PassRegistry&); void initializeEarlyCSELegacyPassPass(PassRegistry &); +void initializeEarlyCSEMemSSALegacyPassPass(PassRegistry &); void initializeEarlyIfConverterPass(PassRegistry&); void initializeEdgeBundlesPass(PassRegistry&); void initializeEfficiencySanitizerPass(PassRegistry&); Index: include/llvm/Transforms/Scalar.h =================================================================== --- include/llvm/Transforms/Scalar.h +++ include/llvm/Transforms/Scalar.h @@ -322,7 +322,7 @@ // EarlyCSE - This pass performs a simple and fast CSE pass over the dominator // tree. // -FunctionPass *createEarlyCSEPass(); +FunctionPass *createEarlyCSEPass(bool UseMemorySSA = false); //===----------------------------------------------------------------------===// // Index: include/llvm/Transforms/Scalar/EarlyCSE.h =================================================================== --- include/llvm/Transforms/Scalar/EarlyCSE.h +++ include/llvm/Transforms/Scalar/EarlyCSE.h @@ -27,8 +27,12 @@ /// cases so that instcombine and other passes are more effective. It is /// expected that a later pass of GVN will catch the interesting/hard cases. struct EarlyCSEPass : PassInfoMixin { + EarlyCSEPass(bool UseMemorySSA = false) : UseMemorySSA(UseMemorySSA) {} + /// \brief Run the pass over the function. PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + bool UseMemorySSA; }; } Index: lib/Passes/PassRegistry.def =================================================================== --- lib/Passes/PassRegistry.def +++ lib/Passes/PassRegistry.def @@ -139,7 +139,8 @@ FUNCTION_PASS("correlated-propagation", CorrelatedValuePropagationPass()) FUNCTION_PASS("dce", DCEPass()) FUNCTION_PASS("dse", DSEPass()) -FUNCTION_PASS("early-cse", EarlyCSEPass()) +FUNCTION_PASS("early-cse", EarlyCSEPass(/*UseMemorySSA=*/false)) +FUNCTION_PASS("early-cse-memssa", EarlyCSEPass(/*UseMemorySSA=*/true)) FUNCTION_PASS("gvn-hoist", GVNHoistPass()) FUNCTION_PASS("instcombine", InstCombinePass()) FUNCTION_PASS("instsimplify", InstSimplifierPass()) Index: lib/Transforms/Scalar/EarlyCSE.cpp =================================================================== --- lib/Transforms/Scalar/EarlyCSE.cpp +++ lib/Transforms/Scalar/EarlyCSE.cpp @@ -32,6 +32,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/MemorySSA.h" #include using namespace llvm; using namespace llvm::PatternMatch; @@ -251,6 +252,7 @@ const TargetTransformInfo &TTI; DominatorTree &DT; AssumptionCache &AC; + MemorySSA *MSSA; typedef RecyclingAllocator< BumpPtrAllocator, ScopedHashTableVal> AllocatorTy; typedef ScopedHashTable, @@ -312,8 +314,8 @@ /// \brief Set up the EarlyCSE runner for a particular function. EarlyCSE(const TargetLibraryInfo &TLI, const TargetTransformInfo &TTI, - DominatorTree &DT, AssumptionCache &AC) - : TLI(TLI), TTI(TTI), DT(DT), AC(AC), CurrentGeneration(0) {} + DominatorTree &DT, AssumptionCache &AC, MemorySSA *MSSA) + : TLI(TLI), TTI(TTI), DT(DT), AC(AC), MSSA(MSSA), CurrentGeneration(0) {} bool run(); @@ -487,9 +489,55 @@ return TTI.getOrCreateResultFromMemIntrinsic(cast(Inst), ExpectedType); } + + bool isSameMemGeneration(unsigned EarlierGeneration, unsigned LaterGeneration, + Instruction *EarlierInst, Instruction *LaterInst); + + void removeMSSA(Instruction *Inst) { + if (!MSSA) + return; + if (MemoryAccess *MA = MSSA->getMemoryAccess(Inst)) + MSSA->removeMemoryAccess(MA); + } }; } +/// Determine if the memory referenced by LaterInst is from the same heap version +/// as EarlierInst. +/// This is currently called in two scenarios: +/// +/// load p +/// ... +/// load p +/// +/// and +/// +/// x = load p +/// ... +/// store x, p +/// +/// in both cases we want to verify that there are no possible writes to the +/// memory referenced by p between the earlier and later instruction. +bool EarlyCSE::isSameMemGeneration(unsigned EarlierGeneration, + unsigned LaterGeneration, + Instruction *EarlierInst, + Instruction *LaterInst) { + // Check the simple memory generation tracking first. + if (EarlierGeneration == LaterGeneration) + return true; + + if (!MSSA) + return false; + + // Since we know LaterDef dominates LaterInst and EarlierInst dominates + // LaterInst, if LaterDef dominates EarlierInst then it can't occur between + // EarlierInst and LaterInst and neither can any other write that potentially + // clobbers LaterInst. + MemoryAccess *LaterDef = + MSSA->getWalker()->getClobberingMemoryAccess(LaterInst); + return MSSA->dominates(LaterDef, MSSA->getMemoryAccess(EarlierInst)); +} + bool EarlyCSE::processNode(DomTreeNode *Node) { bool Changed = false; BasicBlock *BB = Node->getBlock(); @@ -547,6 +595,7 @@ // Dead instructions should just be removed. if (isInstructionTriviallyDead(Inst, &TLI)) { DEBUG(dbgs() << "EarlyCSE DCE: " << *Inst << '\n'); + removeMSSA(Inst); Inst->eraseFromParent(); Changed = true; ++NumSimplify; @@ -601,6 +650,7 @@ Changed = true; } if (isInstructionTriviallyDead(Inst, &TLI)) { + removeMSSA(Inst); Inst->eraseFromParent(); Changed = true; Killed = true; @@ -619,6 +669,7 @@ if (auto *I = dyn_cast(V)) I->andIRFlags(Inst); Inst->replaceAllUsesWith(V); + removeMSSA(Inst); Inst->eraseFromParent(); Changed = true; ++NumCSE; @@ -650,18 +701,21 @@ // load we're CSE'ing _to_ does. LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand()); if (InVal.DefInst != nullptr && - (InVal.Generation == CurrentGeneration || InVal.IsInvariant) && InVal.MatchingId == MemInst.getMatchingId() && // We don't yet handle removing loads with ordering of any kind. !MemInst.isVolatile() && MemInst.isUnordered() && // We can't replace an atomic load with one which isn't also atomic. - InVal.IsAtomic >= MemInst.isAtomic()) { + InVal.IsAtomic >= MemInst.isAtomic() && + (InVal.IsInvariant || + isSameMemGeneration(InVal.Generation, CurrentGeneration, + InVal.DefInst, Inst))) { Value *Op = getOrCreateResult(InVal.DefInst, Inst->getType()); if (Op != nullptr) { DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst << " to: " << *InVal.DefInst << '\n'); if (!Inst->use_empty()) Inst->replaceAllUsesWith(Op); + removeMSSA(Inst); Inst->eraseFromParent(); Changed = true; ++NumCSELoad; @@ -692,11 +746,14 @@ // If we have an available version of this call, and if it is the right // generation, replace this instruction. std::pair InVal = AvailableCalls.lookup(Inst); - if (InVal.first != nullptr && InVal.second == CurrentGeneration) { + if (InVal.first != nullptr && + isSameMemGeneration(InVal.second, CurrentGeneration, InVal.first, + Inst)) { DEBUG(dbgs() << "EarlyCSE CSE CALL: " << *Inst << " to: " << *InVal.first << '\n'); if (!Inst->use_empty()) Inst->replaceAllUsesWith(InVal.first); + removeMSSA(Inst); Inst->eraseFromParent(); Changed = true; ++NumCSECall; @@ -729,15 +786,22 @@ LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand()); if (InVal.DefInst && InVal.DefInst == getOrCreateResult(Inst, InVal.DefInst->getType()) && - InVal.Generation == CurrentGeneration && InVal.MatchingId == MemInst.getMatchingId() && // We don't yet handle removing stores with ordering of any kind. - !MemInst.isVolatile() && MemInst.isUnordered()) { + !MemInst.isVolatile() && MemInst.isUnordered() && + isSameMemGeneration(InVal.Generation, CurrentGeneration, + InVal.DefInst, Inst)) { + // It is okay to have a LastStore to a different pointer here if MemorySSA + // tells us that the load and store are from the same memory generation. + // In that case, LastStore should keep its present value since we're + // removing the current store. assert((!LastStore || ParseMemoryInst(LastStore, TTI).getPointerOperand() == - MemInst.getPointerOperand()) && - "can't have an intervening store!"); + MemInst.getPointerOperand() || + MSSA) && + "can't have an intervening store if not using MemorySSA!"); DEBUG(dbgs() << "EarlyCSE DSE (writeback): " << *Inst << '\n'); + removeMSSA(Inst); Inst->eraseFromParent(); Changed = true; ++NumDSE; @@ -769,6 +833,7 @@ if (LastStoreMemInst.isMatchingMemLoc(MemInst)) { DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore << " due to: " << *Inst << '\n'); + removeMSSA(LastStore); LastStore->eraseFromParent(); Changed = true; ++NumDSE; @@ -865,8 +930,10 @@ auto &TTI = AM.getResult(F); auto &DT = AM.getResult(F); auto &AC = AM.getResult(F); + auto *MSSA = + UseMemorySSA ? &AM.getResult(F).getMSSA() : nullptr; - EarlyCSE CSE(TLI, TTI, DT, AC); + EarlyCSE CSE(TLI, TTI, DT, AC, MSSA); if (!CSE.run()) return PreservedAnalyses::all(); @@ -876,6 +943,8 @@ PreservedAnalyses PA; PA.preserve(); PA.preserve(); + if (UseMemorySSA) + PA.preserve(); return PA; } @@ -887,12 +956,16 @@ /// canonicalize things as it goes. It is intended to be fast and catch obvious /// cases so that instcombine and other passes are more effective. It is /// expected that a later pass of GVN will catch the interesting/hard cases. -class EarlyCSELegacyPass : public FunctionPass { +template +class EarlyCSELegacyCommonPass : public FunctionPass { public: static char ID; - EarlyCSELegacyPass() : FunctionPass(ID) { - initializeEarlyCSELegacyPassPass(*PassRegistry::getPassRegistry()); + EarlyCSELegacyCommonPass() : FunctionPass(ID) { + if (UseMemorySSA) + initializeEarlyCSEMemSSALegacyPassPass(*PassRegistry::getPassRegistry()); + else + initializeEarlyCSELegacyPassPass(*PassRegistry::getPassRegistry()); } bool runOnFunction(Function &F) override { @@ -903,8 +976,10 @@ auto &TTI = getAnalysis().getTTI(F); auto &DT = getAnalysis().getDomTree(); auto &AC = getAnalysis().getAssumptionCache(F); + auto *MSSA = + UseMemorySSA ? &getAnalysis().getMSSA() : nullptr; - EarlyCSE CSE(TLI, TTI, DT, AC); + EarlyCSE CSE(TLI, TTI, DT, AC, MSSA); return CSE.run(); } @@ -914,15 +989,20 @@ AU.addRequired(); AU.addRequired(); AU.addRequired(); + if (UseMemorySSA) { + AU.addRequired(); + AU.addPreserved(); + } AU.addPreserved(); AU.setPreservesCFG(); } }; } -char EarlyCSELegacyPass::ID = 0; +using EarlyCSELegacyPass = EarlyCSELegacyCommonPass; -FunctionPass *llvm::createEarlyCSEPass() { return new EarlyCSELegacyPass(); } +template<> +char EarlyCSELegacyPass::ID = 0; INITIALIZE_PASS_BEGIN(EarlyCSELegacyPass, "early-cse", "Early CSE", false, false) @@ -931,3 +1011,26 @@ INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(EarlyCSELegacyPass, "early-cse", "Early CSE", false, false) + +using EarlyCSEMemSSALegacyPass = + EarlyCSELegacyCommonPass; + +template<> +char EarlyCSEMemSSALegacyPass::ID = 0; + +FunctionPass *llvm::createEarlyCSEPass(bool UseMemorySSA) { + if (UseMemorySSA) + return new EarlyCSEMemSSALegacyPass(); + else + return new EarlyCSELegacyPass(); +} + +INITIALIZE_PASS_BEGIN(EarlyCSEMemSSALegacyPass, "early-cse-memssa", + "Early CSE w/ MemorySSA", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) +INITIALIZE_PASS_END(EarlyCSEMemSSALegacyPass, "early-cse-memssa", + "Early CSE w/ MemorySSA", false, false) Index: lib/Transforms/Scalar/Scalar.cpp =================================================================== --- lib/Transforms/Scalar/Scalar.cpp +++ lib/Transforms/Scalar/Scalar.cpp @@ -44,6 +44,7 @@ initializeGuardWideningLegacyPassPass(Registry); initializeGVNLegacyPassPass(Registry); initializeEarlyCSELegacyPassPass(Registry); + initializeEarlyCSEMemSSALegacyPassPass(Registry); initializeGVNHoistLegacyPassPass(Registry); initializeFlattenCFGPassPass(Registry); initializeInductiveRangeCheckEliminationPass(Registry); @@ -233,8 +234,8 @@ unwrap(PM)->add(createCorrelatedValuePropagationPass()); } -void LLVMAddEarlyCSEPass(LLVMPassManagerRef PM) { - unwrap(PM)->add(createEarlyCSEPass()); +void LLVMAddEarlyCSEPass(LLVMPassManagerRef PM, int UseMemorySSA) { + unwrap(PM)->add(createEarlyCSEPass(UseMemorySSA)); } void LLVMAddGVNHoistLegacyPass(LLVMPassManagerRef PM) { Index: test/Transforms/EarlyCSE/AArch64/intrinsics.ll =================================================================== --- test/Transforms/EarlyCSE/AArch64/intrinsics.ll +++ test/Transforms/EarlyCSE/AArch64/intrinsics.ll @@ -1,5 +1,7 @@ ; RUN: opt < %s -S -mtriple=aarch64-none-linux-gnu -mattr=+neon -early-cse | FileCheck %s +; RUN: opt < %s -S -mtriple=aarch64-none-linux-gnu -mattr=+neon -basicaa -early-cse-memssa | FileCheck %s ; RUN: opt < %s -S -mtriple=aarch64-none-linux-gnu -mattr=+neon -passes=early-cse | FileCheck %s +; RUN: opt < %s -S -mtriple=aarch64-none-linux-gnu -mattr=+neon -aa-pipeline=basic-aa -passes=early-cse-memssa | FileCheck %s define <4 x i32> @test_cse(i32* %a, [2 x <4 x i32>] %s.coerce, i32 %n) { entry: Index: test/Transforms/EarlyCSE/AArch64/ldstN.ll =================================================================== --- test/Transforms/EarlyCSE/AArch64/ldstN.ll +++ test/Transforms/EarlyCSE/AArch64/ldstN.ll @@ -1,4 +1,5 @@ ; RUN: opt -S -early-cse < %s | FileCheck %s +; RUN: opt -S -basicaa -early-cse-memssa < %s | FileCheck %s target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" target triple = "aarch64--linux-gnu" Index: test/Transforms/EarlyCSE/atomics.ll =================================================================== --- test/Transforms/EarlyCSE/atomics.ll +++ test/Transforms/EarlyCSE/atomics.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -S -early-cse | FileCheck %s +; RUN: opt < %s -S -basicaa -early-cse-memssa | FileCheck %s ; CHECK-LABEL: @test12( define i32 @test12(i1 %B, i32* %P1, i32* %P2) { Index: test/Transforms/EarlyCSE/basic.ll =================================================================== --- test/Transforms/EarlyCSE/basic.ll +++ test/Transforms/EarlyCSE/basic.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -S -early-cse | FileCheck %s +; RUN: opt < %s -S -basicaa -early-cse-memssa | FileCheck %s ; RUN: opt < %s -S -passes=early-cse | FileCheck %s declare void @llvm.assume(i1) nounwind Index: test/Transforms/EarlyCSE/commute.ll =================================================================== --- test/Transforms/EarlyCSE/commute.ll +++ test/Transforms/EarlyCSE/commute.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -S -early-cse | FileCheck %s +; RUN: opt < %s -S -basicaa -early-cse-memssa | FileCheck %s ; CHECK-LABEL: @test1( define void @test1(float %A, float %B, float* %PA, float* %PB) { Index: test/Transforms/EarlyCSE/conditional.ll =================================================================== --- test/Transforms/EarlyCSE/conditional.ll +++ test/Transforms/EarlyCSE/conditional.ll @@ -1,4 +1,5 @@ ; RUN: opt -early-cse -S < %s | FileCheck %s +; RUN: opt -basicaa -early-cse-memssa -S < %s | FileCheck %s ; Can we CSE a known condition to a constant? define i1 @test(i8* %p) { Index: test/Transforms/EarlyCSE/edge.ll =================================================================== --- test/Transforms/EarlyCSE/edge.ll +++ test/Transforms/EarlyCSE/edge.ll @@ -1,4 +1,5 @@ ; RUN: opt -early-cse -S < %s | FileCheck %s +; RUN: opt -basicaa -early-cse-memssa -S < %s | FileCheck %s ; Same as GVN/edge.ll, but updated to reflect EarlyCSE's less powerful ; implementation. EarlyCSE currently doesn't exploit equality comparisons ; against constants. Index: test/Transforms/EarlyCSE/fence.ll =================================================================== --- test/Transforms/EarlyCSE/fence.ll +++ test/Transforms/EarlyCSE/fence.ll @@ -1,4 +1,5 @@ ; RUN: opt -S -early-cse < %s | FileCheck %s +; RUN: opt < %s -S -basicaa -early-cse-memssa | FileCheck %s ; NOTE: This file is testing the current implementation. Some of ; the transforms used as negative tests below would be legal, but ; only if reached through a chain of logic which EarlyCSE is incapable Index: test/Transforms/EarlyCSE/flags.ll =================================================================== --- test/Transforms/EarlyCSE/flags.ll +++ test/Transforms/EarlyCSE/flags.ll @@ -1,4 +1,5 @@ ; RUN: opt -early-cse -S < %s | FileCheck %s +; RUN: opt -basicaa -early-cse-memssa -S < %s | FileCheck %s declare void @use(i1) Index: test/Transforms/EarlyCSE/floatingpoint.ll =================================================================== --- test/Transforms/EarlyCSE/floatingpoint.ll +++ test/Transforms/EarlyCSE/floatingpoint.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -S -early-cse | FileCheck %s +; RUN: opt < %s -S -basicaa -early-cse-memssa | FileCheck %s ; Ensure we don't simplify away additions vectors of +0.0's (same as scalars). define <4 x float> @fV( <4 x float> %a) { Index: test/Transforms/EarlyCSE/guards.ll =================================================================== --- test/Transforms/EarlyCSE/guards.ll +++ test/Transforms/EarlyCSE/guards.ll @@ -1,4 +1,5 @@ ; RUN: opt -S -early-cse < %s | FileCheck %s +; RUN: opt < %s -S -basicaa -early-cse-memssa | FileCheck %s declare void @llvm.experimental.guard(i1,...) Index: test/Transforms/EarlyCSE/instsimplify-dom.ll =================================================================== --- test/Transforms/EarlyCSE/instsimplify-dom.ll +++ test/Transforms/EarlyCSE/instsimplify-dom.ll @@ -1,4 +1,5 @@ ; RUN: opt -early-cse -S < %s | FileCheck %s +; RUN: opt -basicaa -early-cse-memssa -S < %s | FileCheck %s ; PR12231 declare i32 @f() Index: test/Transforms/EarlyCSE/invariant-loads.ll =================================================================== --- test/Transforms/EarlyCSE/invariant-loads.ll +++ test/Transforms/EarlyCSE/invariant-loads.ll @@ -1,4 +1,5 @@ -; RUN: opt -S -early-cse < %s | FileCheck %s +; RUN: opt -S -early-cse < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOMEMSSA +; RUN: opt -S -basicaa -early-cse-memssa < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MEMSSA declare void @clobber_and_use(i32) @@ -38,13 +39,19 @@ define void @f_2(i32* %ptr) { ; Negative test -- we can't forward a non-invariant load into an -; invariant load. +; invariant load. We can eliminate the second load when using +; MemorySSA since it tells us both loads are from the same heap state +; (LiveOnEntry). + +; FIXME: I think this test should be changed to check that the second load is eliminated. +; Waiting on Sajoy/community input on !invariant.load semantics and LangRef clarifications. ; CHECK-LABEL: @f_2( ; CHECK: %val0 = load i32, i32* %ptr ; CHECK: call void @clobber_and_use(i32 %val0) -; CHECK: %val1 = load i32, i32* %ptr, !invariant.load !0 -; CHECK: call void @clobber_and_use(i32 %val1) +; CHECK-NOMEMSSA: %val1 = load i32, i32* %ptr, !invariant.load !0 +; CHECK-NOMEMSSA-NEXT: call void @clobber_and_use(i32 %val1) +; CHECK-MEMSSA-NEXT: call void @clobber_and_use(i32 %val0) %val0 = load i32, i32* %ptr call void @clobber_and_use(i32 %val0) Index: test/Transforms/EarlyCSE/memoryssa.ll =================================================================== --- /dev/null +++ test/Transforms/EarlyCSE/memoryssa.ll @@ -0,0 +1,34 @@ +; RUN: opt < %s -S -early-cse | FileCheck %s --check-prefix=CHECK-NOMEMSSA +; RUN: opt < %s -S -basicaa -early-cse-memssa | FileCheck %s +; RUN: opt < %s -S -passes='early-cse' | FileCheck %s --check-prefix=CHECK-NOMEMSSA +; RUN: opt < %s -S -aa-pipeline=basic-aa -passes='early-cse-memssa' | FileCheck %s + +@G1 = global i32 zeroinitializer +@G2 = global i32 zeroinitializer + +;; Simple load value numbering across non-clobbering store. +; CHECK-LABEL: @test1( +; CHECK-NOMEMSSA-LABEL: @test1( +define i32 @test1() { + %V1 = load i32, i32* @G1 + store i32 0, i32* @G2 + %V2 = load i32, i32* @G1 + ; CHECK-NOMEMSSA: sub i32 %V1, %V2 + %Diff = sub i32 %V1, %V2 + ret i32 %Diff + ; CHECK: ret i32 0 +} + +;; Simple dead store elimination across non-clobbering store. +; CHECK-LABEL: @test2( +; CHECK-NOMEMSSA-LABEL: @test2( +define void @test2() { +entry: + %V1 = load i32, i32* @G1 + ; CHECK: store i32 0, i32* @G2 + store i32 0, i32* @G2 + ; CHECK-NOT: store + ; CHECK-NOMEMSSA: store i32 %V1, i32* @G1 + store i32 %V1, i32* @G1 + ret void +} Index: test/Transforms/EarlyCSE/read-reg.ll =================================================================== --- test/Transforms/EarlyCSE/read-reg.ll +++ test/Transforms/EarlyCSE/read-reg.ll @@ -1,4 +1,5 @@ ; RUN: opt -S -early-cse < %s | FileCheck %s +; RUN: opt -S -basicaa -early-cse-memssa < %s | FileCheck %s target datalayout = "E-m:e-i64:64-n32:64" target triple = "powerpc64-unknown-linux-gnu"