Index: llvm/include/llvm/Transforms/Scalar/GVN.h =================================================================== --- llvm/include/llvm/Transforms/Scalar/GVN.h +++ llvm/include/llvm/Transforms/Scalar/GVN.h @@ -163,13 +163,17 @@ // Value number to PHINode mapping. Used for phi-translate in scalarpre. DenseMap NumberingPhi; + // Value number to BasicBlock mapping. Used for phi-translate across + // MemoryPhis. + DenseMap NumberingBB; + // Cache for phi-translate in scalarpre. using PhiTranslateMap = DenseMap, uint32_t>; PhiTranslateMap PhiTranslateTable; AAResults *AA = nullptr; - MemoryDependenceResults *MD = nullptr; + MemorySSA *MSSA = nullptr; DominatorTree *DT = nullptr; uint32_t nextValueNumber = 1; @@ -178,7 +182,10 @@ Expression createCmpExpr(unsigned Opcode, CmpInst::Predicate Predicate, Value *LHS, Value *RHS); Expression createExtractvalueExpr(ExtractValueInst *EI); + Expression createCallExpr(CallInst *C); + void addMemoryStateArg(Instruction *I, Expression &E); uint32_t lookupOrAddCall(CallInst *C); + uint32_t lookupOrAddLoadStore(Instruction *I); uint32_t phiTranslateImpl(const BasicBlock *BB, const BasicBlock *PhiBlock, uint32_t Num, GVNPass &Gvn); bool areCallValsEqual(uint32_t Num, uint32_t NewNum, const BasicBlock *Pred, @@ -206,7 +213,7 @@ void erase(Value *v); void setAliasAnalysis(AAResults *A) { AA = A; } AAResults *getAliasAnalysis() const { return AA; } - void setMemDep(MemoryDependenceResults *M) { MD = M; } + void setMemorySSA(MemorySSA *M) { MSSA = M; } void setDomTree(DominatorTree *D) { DT = D; } uint32_t getNextUnusedValueNumber() { return nextValueNumber; } void verifyRemoved(const Value *) const; Index: llvm/lib/Transforms/Scalar/GVN.cpp =================================================================== --- llvm/lib/Transforms/Scalar/GVN.cpp +++ llvm/lib/Transforms/Scalar/GVN.cpp @@ -290,22 +290,15 @@ Expression e; e.type = I->getType(); e.opcode = I->getOpcode(); - if (const GCRelocateInst *GCR = dyn_cast(I)) { - // gc.relocate is 'special' call: its second and third operands are - // not real values, but indices into statepoint's argument list. - // Use the refered to values for purposes of identity. - e.varargs.push_back(lookupOrAdd(GCR->getOperand(0))); - e.varargs.push_back(lookupOrAdd(GCR->getBasePtr())); - e.varargs.push_back(lookupOrAdd(GCR->getDerivedPtr())); - } else { - for (Use &Op : I->operands()) - e.varargs.push_back(lookupOrAdd(Op)); - } + + for (Use &Op : I->operands()) + e.varargs.push_back(lookupOrAdd(Op)); + if (I->isCommutative()) { // Ensure that commutative instructions that only differ by a permutation - // of their operands get the same value number by sorting the operand value - // numbers. Since commutative operands are the 1st two operands it is more - // efficient to sort by hand rather than using, say, std::sort. + // of their operands get the same value number by sorting the operand + // value numbers. Since commutative operands are the 1st two operands it + // is more efficient to sort by hand rather than using, say, std::sort. assert(I->getNumOperands() >= 2 && "Unsupported commutative instruction!"); if (e.varargs[0] > e.varargs[1]) std::swap(e.varargs[0], e.varargs[1]); @@ -379,6 +372,37 @@ return e; } +GVNPass::Expression GVNPass::ValueTable::createCallExpr(CallInst *C) { + Expression E; + E.type = C->getType(); + E.opcode = C->getOpcode(); + if (const GCRelocateInst *GCR = dyn_cast(C)) { + // gc.relocate is 'special' call: its second and third operands are + // not real values, but indices into statepoint's argument list. + // Use the refered to values for purposes of identity. + E.varargs.push_back(lookupOrAdd(GCR->getOperand(0))); + E.varargs.push_back(lookupOrAdd(GCR->getBasePtr())); + E.varargs.push_back(lookupOrAdd(GCR->getDerivedPtr())); + return E; + } + + for (Use &Op : C->operands()) + E.varargs.push_back(lookupOrAdd(Op)); + + if (C->isCommutative()) { + // There are commutative instrinsic calls. + assert(C->getNumOperands() >= 2 && "Unsupported commutative instruction!"); + if (E.varargs[0] > E.varargs[1]) + std::swap(E.varargs[0], E.varargs[1]); + E.commutative = true; + } + + if (!AA->doesNotAccessMemory(C)) + addMemoryStateArg(C, E); + + return E; +} + //===----------------------------------------------------------------------===// // ValueTable External Functions //===----------------------------------------------------------------------===// @@ -397,107 +421,55 @@ NumberingPhi[num] = PN; } -uint32_t GVNPass::ValueTable::lookupOrAddCall(CallInst *C) { - if (AA->doesNotAccessMemory(C)) { - Expression exp = createExpr(C); - uint32_t e = assignExpNewValueNum(exp).first; - valueNumbering[C] = e; - return e; - } else if (MD && AA->onlyReadsMemory(C)) { - Expression exp = createExpr(C); - auto ValNum = assignExpNewValueNum(exp); - if (ValNum.second) { - valueNumbering[C] = ValNum.first; - return ValNum.first; - } +// Include the incoming memory state into the hash for the instruction. If the +// incoming memory state is: +// * LiveOnEntry, add the value number of the entry block +// * a MemoryPhi, add the value number of the basic block, +// corresponding to that MemoryPhi +// * a MemoryDef, add the value number of the memory setting +// instruction. +void GVNPass::ValueTable::addMemoryStateArg(Instruction *I, Expression &E) { + assert(MSSA->getMemoryAccess(I) && "Instruction does not access memory"); + MemoryAccess *MA = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(I); - MemDepResult local_dep = MD->getDependency(C); + uint32_t N = 0; + if (isa(MA)) + N = lookupOrAdd(MA->getBlock()); + else if (MSSA->isLiveOnEntryDef(MA)) + N = lookupOrAdd(&I->getFunction()->getEntryBlock()); + else + N = lookupOrAdd(cast(MA)->getMemoryInst()); - if (!local_dep.isDef() && !local_dep.isNonLocal()) { - valueNumbering[C] = nextValueNumber; - return nextValueNumber++; - } - - if (local_dep.isDef()) { - // For masked load/store intrinsics, the local_dep may actully be - // a normal load or store instruction. - CallInst *local_cdep = dyn_cast(local_dep.getInst()); - - if (!local_cdep || local_cdep->arg_size() != C->arg_size()) { - valueNumbering[C] = nextValueNumber; - return nextValueNumber++; - } - - for (unsigned i = 0, e = C->arg_size(); i < e; ++i) { - uint32_t c_vn = lookupOrAdd(C->getArgOperand(i)); - uint32_t cd_vn = lookupOrAdd(local_cdep->getArgOperand(i)); - if (c_vn != cd_vn) { - valueNumbering[C] = nextValueNumber; - return nextValueNumber++; - } - } - - uint32_t v = lookupOrAdd(local_cdep); - valueNumbering[C] = v; - return v; - } - - // Non-local case. - const MemoryDependenceResults::NonLocalDepInfo &deps = - MD->getNonLocalCallDependency(C); - // FIXME: Move the checking logic to MemDep! - CallInst* cdep = nullptr; - - // Check to see if we have a single dominating call instruction that is - // identical to C. - for (unsigned i = 0, e = deps.size(); i != e; ++i) { - const NonLocalDepEntry *I = &deps[i]; - if (I->getResult().isNonLocal()) - continue; - - // We don't handle non-definitions. If we already have a call, reject - // instruction dependencies. - if (!I->getResult().isDef() || cdep != nullptr) { - cdep = nullptr; - break; - } - - CallInst *NonLocalDepCall = dyn_cast(I->getResult().getInst()); - // FIXME: All duplicated with non-local case. - if (NonLocalDepCall && DT->properlyDominates(I->getBB(), C->getParent())){ - cdep = NonLocalDepCall; - continue; - } - - cdep = nullptr; - break; - } - - if (!cdep) { - valueNumbering[C] = nextValueNumber; - return nextValueNumber++; - } - - if (cdep->arg_size() != C->arg_size()) { - valueNumbering[C] = nextValueNumber; - return nextValueNumber++; - } - for (unsigned i = 0, e = C->arg_size(); i < e; ++i) { - uint32_t c_vn = lookupOrAdd(C->getArgOperand(i)); - uint32_t cd_vn = lookupOrAdd(cdep->getArgOperand(i)); - if (c_vn != cd_vn) { - valueNumbering[C] = nextValueNumber; - return nextValueNumber++; - } - } + E.varargs.push_back(N); +} - uint32_t v = lookupOrAdd(cdep); - valueNumbering[C] = v; - return v; - } else { +uint32_t GVNPass::ValueTable::lookupOrAddCall(CallInst *C) { + if (!AA->onlyReadsMemory(C)) { valueNumbering[C] = nextValueNumber; return nextValueNumber++; } + Expression E = createCallExpr(C); + uint32_t VN = assignExpNewValueNum(E).first; + valueNumbering[C] = VN; + return VN; +} + +uint32_t GVNPass::ValueTable::lookupOrAddLoadStore(Instruction *I) { + if (MSSA == nullptr) { + valueNumbering[I] = nextValueNumber; + return nextValueNumber++; + } + + Expression E; + E.type = I->getType(); + E.opcode = I->getOpcode(); + for (Use &Op : I->operands()) + E.varargs.push_back(lookupOrAdd(Op)); + addMemoryStateArg(I, E); + + uint32_t N = assignExpNewValueNum(E).first; + valueNumbering[I] = N; + return N; } /// Returns true if a value number exists for the specified value. @@ -514,6 +486,8 @@ if (!isa(V)) { valueNumbering[V] = nextValueNumber; + if (auto *BB = dyn_cast(V)) + NumberingBB[nextValueNumber] = BB; return nextValueNumber++; } @@ -572,6 +546,9 @@ valueNumbering[V] = nextValueNumber; NumberingPhi[nextValueNumber] = cast(V); return nextValueNumber++; + case Instruction::Load: + case Instruction::Store: + return lookupOrAddLoadStore(I); default: valueNumbering[V] = nextValueNumber; return nextValueNumber++; @@ -609,6 +586,7 @@ valueNumbering.clear(); expressionNumbering.clear(); NumberingPhi.clear(); + NumberingBB.clear(); PhiTranslateTable.clear(); nextValueNumber = 1; Expressions.clear(); @@ -623,6 +601,8 @@ // If V is PHINode, V <--> value number is an one-to-one mapping. if (isa(V)) NumberingPhi.erase(Num); + else if (isa(V)) + NumberingBB.erase(Num); } /// verifyRemoved - Verify that the value is removed from all internal data @@ -671,7 +651,7 @@ auto *MemDep = isMemDepEnabled() ? &AM.getResult(F) : nullptr; auto *LI = AM.getCachedResult(F); - auto *MSSA = AM.getCachedResult(F); + auto *MSSA = &AM.getResult(F); auto &ORE = AM.getResult(F); bool Changed = runImpl(F, AC, DT, TLI, AA, MemDep, LI, &ORE, MSSA ? &MSSA->getMSSA() : nullptr); @@ -1992,56 +1972,43 @@ return NewNum; } -// Return true if the value number \p Num and NewNum have equal value. -// Return false if the result is unknown. -bool GVNPass::ValueTable::areCallValsEqual(uint32_t Num, uint32_t NewNum, - const BasicBlock *Pred, - const BasicBlock *PhiBlock, - GVNPass &Gvn) { - CallInst *Call = nullptr; - LeaderTableEntry *Vals = &Gvn.LeaderTable[Num]; - while (Vals) { - Call = dyn_cast(Vals->Val); - if (Call && Call->getParent() == PhiBlock) - break; - Vals = Vals->Next; - } - - if (AA->doesNotAccessMemory(Call)) - return true; - - if (!MD || !AA->onlyReadsMemory(Call)) - return false; - - MemDepResult local_dep = MD->getDependency(Call); - if (!local_dep.isNonLocal()) - return false; - - const MemoryDependenceResults::NonLocalDepInfo &deps = - MD->getNonLocalCallDependency(Call); - - // Check to see if the Call has no function local clobber. - for (const NonLocalDepEntry &D : deps) { - if (D.getResult().isNonFuncLocal()) - return true; - } - return false; -} - /// Translate value number \p Num using phis, so that it has the values of /// the phis in BB. uint32_t GVNPass::ValueTable::phiTranslateImpl(const BasicBlock *Pred, const BasicBlock *PhiBlock, uint32_t Num, GVNPass &Gvn) { if (PHINode *PN = NumberingPhi[Num]) { + if (PN->getParent() != PhiBlock) + return Num; for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) { - if (PN->getParent() == PhiBlock && PN->getIncomingBlock(i) == Pred) + if (PN->getIncomingBlock(i) == Pred) if (uint32_t TransVal = lookup(PN->getIncomingValue(i), false)) return TransVal; } return Num; } + if (BasicBlock *BB = NumberingBB[Num]) { + // Value numbers of basic blocks are used to represent memory state in + // load/store instructions and read-only function calls when said state is + // set by a MemoryPhi. + if (BB != PhiBlock) + return Num; + MemoryPhi *MPhi = MSSA->getMemoryAccess(BB); + for(unsigned I = 0, N = MPhi->getNumIncomingValues(); I != N; ++I) { + if (MPhi->getIncomingBlock(I) != Pred) + continue; + MemoryAccess *MA = MPhi->getIncomingValue(I); + if (auto *PredPhi = dyn_cast(MA)) + return lookupOrAdd(PredPhi->getBlock()); + if (MSSA->isLiveOnEntryDef(MA)) + return lookupOrAdd(&BB->getParent()->getEntryBlock()); + return lookupOrAdd(cast(MA)->getMemoryInst()); + } + llvm_unreachable( + "CFG/MemorySSA mismatch: predecessor not found among incoming blocks"); + } + // If there is any value related with Num is defined in a BB other than // PhiBlock, it cannot depend on a phi in PhiBlock without going through // a backedge. We can do an early exit in that case to save compile time. @@ -2075,11 +2042,9 @@ } } - if (uint32_t NewNum = expressionNumbering[Exp]) { - if (Exp.opcode == Instruction::Call && NewNum != Num) - return areCallValsEqual(Num, NewNum, Pred, PhiBlock, Gvn) ? NewNum : Num; + if (uint32_t NewNum = expressionNumbering[Exp]) return NewNum; - } + return Num; } @@ -2468,7 +2433,7 @@ ImplicitControlFlowTracking ImplicitCFT; ICF = &ImplicitCFT; this->LI = LI; - VN.setMemDep(MD); + VN.setMemorySSA(MSSA); ORE = RunORE; InvalidBlockRPONumbers = true; MemorySSAUpdater Updater(MSSA); @@ -3067,8 +3032,6 @@ return false; auto *LIWP = getAnalysisIfAvailable(); - - auto *MSSAWP = getAnalysisIfAvailable(); return Impl.runImpl( F, getAnalysis().getAssumptionCache(F), getAnalysis().getDomTree(), @@ -3079,7 +3042,7 @@ : nullptr, LIWP ? &LIWP->getLoopInfo() : nullptr, &getAnalysis().getORE(), - MSSAWP ? &MSSAWP->getMSSA() : nullptr); + &getAnalysis().getMSSA()); } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -3087,6 +3050,7 @@ AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); if (Impl.isMemDepEnabled()) AU.addRequired(); AU.addRequired(); @@ -3106,6 +3070,7 @@ INITIALIZE_PASS_BEGIN(GVNLegacyPass, "gvn", "Global Value Numbering", false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) Index: llvm/lib/Transforms/Scalar/GVNHoist.cpp =================================================================== --- llvm/lib/Transforms/Scalar/GVNHoist.cpp +++ llvm/lib/Transforms/Scalar/GVNHoist.cpp @@ -554,7 +554,6 @@ NumFuncArgs = F.arg_size(); VN.setDomTree(DT); VN.setAliasAnalysis(AA); - VN.setMemDep(MD); bool Res = false; // Perform DFS Numbering of instructions. unsigned BBI = 0; Index: llvm/test/Analysis/ValueTracking/gep-negative-issue.ll =================================================================== --- llvm/test/Analysis/ValueTracking/gep-negative-issue.ll +++ llvm/test/Analysis/ValueTracking/gep-negative-issue.ll @@ -5,16 +5,16 @@ %ArrayImpl = type { i64, i64 addrspace(100)*, [1 x i64], [1 x i64], [1 x i64], i64, i64, double addrspace(100)*, double addrspace(100)*, i8, i64 } %_array = type { i64, %ArrayImpl addrspace(100)*, i8 } -define void @test(i64 %n_chpl) { +define void @test(%_array* %p) { entry: ; First section is some code - %0 = getelementptr inbounds %_array, %_array* null, i32 0, i32 1 + %0 = getelementptr inbounds %_array, %_array* %p, i32 0, i32 1 %1 = load %ArrayImpl addrspace(100)*, %ArrayImpl addrspace(100)** %0 %2 = getelementptr inbounds %ArrayImpl, %ArrayImpl addrspace(100)* %1, i32 0, i32 8 %3 = load double addrspace(100)*, double addrspace(100)* addrspace(100)* %2 %4 = getelementptr inbounds double, double addrspace(100)* %3, i64 -1 ; Second section is that code repeated - %x0 = getelementptr inbounds %_array, %_array* null, i32 0, i32 1 + %x0 = getelementptr inbounds %_array, %_array* %p, i32 0, i32 1 %x1 = load %ArrayImpl addrspace(100)*, %ArrayImpl addrspace(100)** %x0 %x2 = getelementptr inbounds %ArrayImpl, %ArrayImpl addrspace(100)* %x1, i32 0, i32 8 %x3 = load double addrspace(100)*, double addrspace(100)* addrspace(100)* %x2 Index: llvm/test/CodeGen/AMDGPU/llc-pipeline.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -1026,8 +1026,9 @@ ; GCN-O3-NEXT: Speculatively execute instructions ; GCN-O3-NEXT: Scalar Evolution Analysis ; GCN-O3-NEXT: Straight line strength reduction -; GCN-O3-NEXT: Phi Values Analysis ; GCN-O3-NEXT: Function Alias Analysis Results +; GCN-O3-NEXT: Memory SSA +; GCN-O3-NEXT: Phi Values Analysis ; GCN-O3-NEXT: Memory Dependence Analysis ; GCN-O3-NEXT: Optimization Remark Emitter ; GCN-O3-NEXT: Global Value Numbering @@ -1064,9 +1065,10 @@ ; GCN-O3-NEXT: Scalarize Masked Memory Intrinsics ; GCN-O3-NEXT: Expand reduction intrinsics ; GCN-O3-NEXT: Natural Loop Information -; GCN-O3-NEXT: Phi Values Analysis ; GCN-O3-NEXT: Basic Alias Analysis (stateless AA impl) ; GCN-O3-NEXT: Function Alias Analysis Results +; GCN-O3-NEXT: Memory SSA +; GCN-O3-NEXT: Phi Values Analysis ; GCN-O3-NEXT: Memory Dependence Analysis ; GCN-O3-NEXT: Lazy Branch Probability Analysis ; GCN-O3-NEXT: Lazy Block Frequency Analysis Index: llvm/test/CodeGen/AMDGPU/opt-pipeline.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/opt-pipeline.ll +++ llvm/test/CodeGen/AMDGPU/opt-pipeline.ll @@ -495,8 +495,9 @@ ; GCN-O2-NEXT: SROA ; GCN-O2-NEXT: Function Alias Analysis Results ; GCN-O2-NEXT: MergedLoadStoreMotion -; GCN-O2-NEXT: Phi Values Analysis ; GCN-O2-NEXT: Function Alias Analysis Results +; GCN-O2-NEXT: Memory SSA +; GCN-O2-NEXT: Phi Values Analysis ; GCN-O2-NEXT: Memory Dependence Analysis ; GCN-O2-NEXT: Lazy Branch Probability Analysis ; GCN-O2-NEXT: Lazy Block Frequency Analysis @@ -859,8 +860,9 @@ ; GCN-O3-NEXT: SROA ; GCN-O3-NEXT: Function Alias Analysis Results ; GCN-O3-NEXT: MergedLoadStoreMotion -; GCN-O3-NEXT: Phi Values Analysis ; GCN-O3-NEXT: Function Alias Analysis Results +; GCN-O3-NEXT: Memory SSA +; GCN-O3-NEXT: Phi Values Analysis ; GCN-O3-NEXT: Memory Dependence Analysis ; GCN-O3-NEXT: Lazy Branch Probability Analysis ; GCN-O3-NEXT: Lazy Block Frequency Analysis