Index: llvm/include/llvm/Analysis/MemorySSA.h =================================================================== --- llvm/include/llvm/Analysis/MemorySSA.h +++ llvm/include/llvm/Analysis/MemorySSA.h @@ -200,6 +200,10 @@ return this->DefsOnlyType::getReverseIterator(); } + /// Used for debugging and tracking things about MemoryAccesses. + /// Guaranteed unique among MemoryAccesses, no guarantees otherwise. + inline unsigned getID() const; + protected: friend class MemoryDef; friend class MemoryPhi; @@ -211,10 +215,6 @@ /// moved. void setBlock(BasicBlock *BB) { Block = BB; } - /// Used for debugging and tracking things about MemoryAccesses. - /// Guaranteed unique among MemoryAccesses, no guarantees otherwise. - inline unsigned getID() const; - MemoryAccess(LLVMContext &C, unsigned Vty, DeleteValueTy DeleteValue, BasicBlock *BB, unsigned NumOperands) : DerivedUser(Type::getVoidTy(C), Vty, nullptr, NumOperands, DeleteValue), Index: llvm/include/llvm/Transforms/Scalar/GVN.h =================================================================== --- llvm/include/llvm/Transforms/Scalar/GVN.h +++ llvm/include/llvm/Transforms/Scalar/GVN.h @@ -172,6 +172,7 @@ AAResults *AA = nullptr; MemoryDependenceResults *MD = nullptr; + MemorySSA *MSSA = nullptr; DominatorTree *DT = nullptr; uint32_t nextValueNumber = 1; @@ -181,6 +182,7 @@ Value *LHS, Value *RHS); Expression createExtractvalueExpr(ExtractValueInst *EI); uint32_t lookupOrAddCall(CallInst *C); + uint32_t lookupOrAddLoadStore(Instruction *I); uint32_t phiTranslateImpl(const BasicBlock *BB, const BasicBlock *PhiBlock, uint32_t Num, GVN &Gvn); bool areCallValsEqual(uint32_t Num, uint32_t NewNum, const BasicBlock *Pred, @@ -209,6 +211,7 @@ void setAliasAnalysis(AAResults *A) { AA = A; } AAResults *getAliasAnalysis() const { return AA; } void setMemDep(MemoryDependenceResults *M) { MD = M; } + void setMemorySSA(MemorySSA *M) { MSSA = M; } void setDomTree(DominatorTree *D) { DT = D; } uint32_t getNextUnusedValueNumber() { return nextValueNumber; } void verifyRemoved(const Value *) const; Index: llvm/lib/Transforms/Scalar/GVN.cpp =================================================================== --- llvm/lib/Transforms/Scalar/GVN.cpp +++ llvm/lib/Transforms/Scalar/GVN.cpp @@ -501,6 +501,36 @@ } } +// If MemorySSA is available, the incoming memory state is included in the hash +// for loads and stores. If the incoming memory state is: +// * LiveOnEntry, do nothing. +// * a MemoryPhi, add <0, ID> where ID is the MemoryPhi ID. +// * a MemoryDef, add VN, where VN is the value number of the memory setting +// instruction. +uint32_t GVN::ValueTable::lookupOrAddLoadStore(Instruction *I) { + if (MSSA == nullptr) { + valueNumbering[I] = nextValueNumber; + return nextValueNumber++; + } + + Expression e; + e.type = I->getType(); + e.opcode = I->getOpcode(); + for (Use &Op : I->operands()) + e.varargs.push_back(lookupOrAdd(Op)); + + MemoryAccess *MA = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(I); + if (isa(MA)) { + e.varargs.push_back(0); + e.varargs.push_back(MA->getID()); + } else if (!MSSA->isLiveOnEntryDef(MA)) + e.varargs.push_back(lookupOrAdd(cast(MA)->getMemoryInst())); + + uint32_t N = assignExpNewValueNum(e).first; + valueNumbering[I] = N; + return N; +} + /// Returns true if a value number exists for the specified value. bool GVN::ValueTable::exists(Value *V) const { return valueNumbering.count(V) != 0; } @@ -571,6 +601,9 @@ valueNumbering[V] = nextValueNumber; NumberingPhi[nextValueNumber] = cast(V); return nextValueNumber++; + case Instruction::Load: + case Instruction::Store: + return lookupOrAddLoadStore(I); default: valueNumbering[V] = nextValueNumber; return nextValueNumber++; @@ -670,7 +703,7 @@ auto *MemDep = isMemDepEnabled() ? &AM.getResult(F) : nullptr; auto *LI = AM.getCachedResult(F); - auto *MSSA = AM.getCachedResult(F); + auto *MSSA = &AM.getResult(F); auto &ORE = AM.getResult(F); bool Changed = runImpl(F, AC, DT, TLI, AA, MemDep, LI, &ORE, MSSA ? &MSSA->getMSSA() : nullptr); @@ -2028,6 +2061,16 @@ return false; } +// For certain opcodes, some varargs are not value numbers. Those numbers should +// not be translated. +static bool shouldNotPhiTranslate(const GVN::Expression &Exp, int Idx) { + return (Idx > 1 && Exp.opcode == Instruction::InsertValue) || + (Idx > 0 && Exp.opcode == Instruction::ExtractValue) || + (Idx > 1 && Exp.opcode == Instruction::ShuffleVector) || + (Idx > 0 && Exp.opcode == Instruction::Load) || + (Idx > 1 && Exp.opcode == Instruction::Store); +} + /// Translate value number \p Num using phis, so that it has the values of /// the phis in BB. uint32_t GVN::ValueTable::phiTranslateImpl(const BasicBlock *Pred, @@ -2053,12 +2096,7 @@ Expression Exp = Expressions[ExprIdx[Num]]; for (unsigned i = 0; i < Exp.varargs.size(); i++) { - // For InsertValue and ExtractValue, some varargs are index numbers - // instead of value numbers. Those index numbers should not be - // translated. - if ((i > 1 && Exp.opcode == Instruction::InsertValue) || - (i > 0 && Exp.opcode == Instruction::ExtractValue) || - (i > 1 && Exp.opcode == Instruction::ShuffleVector)) + if (shouldNotPhiTranslate(Exp, i)) continue; Exp.varargs[i] = phiTranslate(Pred, PhiBlock, Exp.varargs[i], Gvn); } @@ -2468,6 +2506,7 @@ ICF = &ImplicitCFT; this->LI = LI; VN.setMemDep(MD); + VN.setMemorySSA(MSSA); ORE = RunORE; InvalidBlockRPONumbers = true; MemorySSAUpdater Updater(MSSA); @@ -3062,6 +3101,24 @@ return !isGuaranteedToTransferExecutionToSuccessor(&I); } +// Return true iff the two instructions have the same volatile-ty and the same +// atomic ordering. +static bool canMergeInstructions(const Instruction &A, const Instruction &B) { + if (const auto *LA = dyn_cast(&A)) { + const auto *LB = cast(&B); + return LA->isVolatile() == LB->isVolatile() && + LA->getOrdering() == LB->getOrdering(); + } + + if (const auto *SA = dyn_cast(&A)) { + const auto *SB = cast(&B); + return SA->isVolatile() == SB->isVolatile() && + SA->getOrdering() == SB->getOrdering(); + } + + return true; +} + void GVN::collectHoistCandidates(BasicBlock *BB) { bool HasHoistBarrier = false; for (Instruction &I : *BB) { @@ -3090,8 +3147,11 @@ if (!HasHoistBarrier || isSafeToSpeculativelyExecute(&I)) { uint32_t N = VN.lookupOrAdd(&I); auto It = HoistPairs.find(N); - if (It != HoistPairs.end()) - It->second.second = &I; + if (It != HoistPairs.end()) { + // Only pair instructions with the same volatile-ty and atomic ordering. + if (canMergeInstructions(*It->second.first, I)) + It->second.second = &I; + } } if ((HasHoistBarrier |= isHoistBarrier(I))) @@ -3103,7 +3163,14 @@ void GVN::replaceInstruction(Instruction *I, Instruction *Repl) { LLVM_DEBUG(dbgs() << "Simple GVNHoist: replacing" << *I << " by" << *Repl << '\n';); + assert(llvm::none_of(InstrsToErase, + [=](const Instruction *X) { return I == X; }) && + "Deleting an instruction twice"); patchReplacementInstruction(I, Repl); + if (auto *LI = dyn_cast(Repl)) + LI->setAlignment(std::min(LI->getAlign(), cast(I)->getAlign())); + else if (auto *SI = dyn_cast(Repl)) + SI->setAlignment(std::min(SI->getAlign(), cast(I)->getAlign())); ICF->removeUsersOf(I); I->replaceAllUsesWith(Repl); salvageKnowledge(I, AC); @@ -3118,6 +3185,14 @@ I->eraseFromParent(); } +// Returns `true` if the instruction `Def` may write to the same memory that +// `Use` reads. +static bool defClobbersUse(AAResults *AA, Instruction *Def, Instruction *Use) { + Optional DefLoc = MemoryLocation::getOrNone(Def); + Optional UseLoc = MemoryLocation::getOrNone(Use); + return DefLoc && UseLoc ? !AA->isNoAlias(*DefLoc, *UseLoc) : true; +} + // Only hoist instructions from the "then" block. // Each hoisted instructions must be paired with an instruction from the "else" // block. @@ -3142,8 +3217,6 @@ It->second.second = nullptr; assert(ElseI->getParent() == ElseBB && "Instruction already removed"); - assert(!ThenI->mayReadOrWriteMemory() && !ElseI->mayReadOrWriteMemory() && - "Memory read/write instructions must not be hoisted."); bool Change = false; // Hoist operands. Begin by hoisting all of the operands of the "then" @@ -3169,10 +3242,76 @@ return {Change, true}; } + // Do the same for memory dependencies. + if (ThenI->mayReadOrWriteMemory()) { + // Hoist preceding aliased MemoryDefs. + auto *MSSA = MSSAU->getMemorySSA(); + auto *Walker = MSSA->getSkipSelfWalker(); + MemoryAccess *ThenMA = Walker->getClobberingMemoryAccess(ThenI); + while (!MSSA->isLiveOnEntryDef(ThenMA) && ThenMA->getBlock() == ThenBB) { + bool DepChange, CantHoist; + std::tie(DepChange, CantHoist) = + hoistPair(DestBB, ThenBB, ElseBB, + cast(ThenMA)->getMemoryInst(), nullptr); + Change |= DepChange; + if (CantHoist) + return {Change, true}; + ThenMA = Walker->getClobberingMemoryAccess(ThenI); + } + + MemoryAccess *ElseMA = Walker->getClobberingMemoryAccess(ElseI); + if (!MSSA->isLiveOnEntryDef(ElseMA) && ElseMA->getBlock() == ElseBB) + return {Change, true}; + + // For instructions that modify memory, try to hoist preceding aliased + // MemoryUses. + if (const auto *Def = dyn_cast(MSSA->getMemoryAccess(ThenI))) { + // Collect the uses ... + SmallVector MemoryUses; + for (const MemoryAccess &MA : *MSSA->getBlockAccesses(ThenBB)) { + if (const auto *Use = dyn_cast(&MA)) { + if (!MSSA->locallyDominates(Use, Def)) + continue; + Instruction *U = Use->getMemoryInst(); + if (!defClobbersUse(getAliasAnalysis(), ThenI, U)) + continue; + MemoryUses.push_back(U); + } + } + + // ... then try to hoist them. + for (Instruction *I : MemoryUses) { + bool DepChange, CantHoist; + std::tie(DepChange, CantHoist) = + hoistPair(DestBB, ThenBB, ElseBB, I, nullptr); + Change |= DepChange; + if (CantHoist) + return {Change, true}; + } + + // None should have remained in the "else" block. + Def = cast(MSSA->getMemoryAccess(ElseI)); + for (const MemoryAccess &MA : *MSSA->getBlockAccesses(ElseBB)) { + if (const auto *Use = dyn_cast(&MA)) { + if (!MSSA->locallyDominates(Use, Def)) + continue; + Instruction *U = Use->getMemoryInst(); + if (defClobbersUse(getAliasAnalysis(), ElseI, U)) + return {Change, true}; + } + } + } + } + + // Remove the instruction from the memory dependence cache, as the cache might + // keep a reference to the old block. + MD->removeInstruction(ThenI); // Hoist one of the instructions and replace all uses of the other with it. ICF->removeInstruction(ThenI); ICF->insertInstructionTo(ThenI, DestBB); ThenI->moveBefore(DestBB->getTerminator()); + if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(ThenI)) + MSSAU->moveToPlace(MA, DestBB, MemorySSA::InsertionPlace::BeforeTerminator); replaceInstruction(ElseI, ThenI); return {true, false}; Index: llvm/test/Analysis/ValueTracking/gep-negative-issue.ll =================================================================== --- llvm/test/Analysis/ValueTracking/gep-negative-issue.ll +++ llvm/test/Analysis/ValueTracking/gep-negative-issue.ll @@ -35,9 +35,7 @@ ; CHECK: getelementptr inbounds double, double addrspace(100)* {{%.*}}, i64 -1 ; CHECK-NEXT: store double 0.000000e+00, double addrspace(100)* [[DST:%.*]] ; CHECK-NEXT: store double 0.000000e+00, double addrspace(100)* [[DST]] -; CHECK: load -; CHECK: getelementptr inbounds %ArrayImpl, %ArrayImpl addrspace(100)* -; CHECK: load +; CHECK: load double addrspace(100)* ; CHECK: getelementptr inbounds double, double addrspace(100)* {{%.*}}, i64 -1 ; CHECK: store double 0.000000e+00, double addrspace(100)* ; CHECK: ret Index: llvm/test/Transforms/GVN/PRE/load-pre-align.ll =================================================================== --- llvm/test/Transforms/GVN/PRE/load-pre-align.ll +++ llvm/test/Transforms/GVN/PRE/load-pre-align.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -gvn -S | FileCheck %s -; RUN: opt < %s -passes="gvn" -S | FileCheck %s +; RUN: opt < %s -gvn --enable-simple-gvn-hoist=false -S | FileCheck %s +; RUN: opt < %s -passes="gvn" --enable-simple-gvn-hoist=false -S | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32" Index: llvm/test/Transforms/GVN/PRE/pre-single-pred.ll =================================================================== --- llvm/test/Transforms/GVN/PRE/pre-single-pred.ll +++ llvm/test/Transforms/GVN/PRE/pre-single-pred.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -gvn -enable-load-pre -S | FileCheck %s -; RUN: opt < %s -passes="gvn" -enable-load-pre=false -S | FileCheck %s +; RUN: opt < %s -gvn -enable-load-pre -enable-simple-gvn-hoist=false -S | FileCheck %s +; RUN: opt < %s -passes="gvn" -enable-simple-gvn-hoist=false -enable-load-pre=false -S | FileCheck %s ; This testcase assumed we'll PRE the load into %for.cond, but we don't actually ; verify that doing so is safe. If there didn't _happen_ to be a load in ; %for.end, we would actually be lengthening the execution on some paths, and Index: llvm/test/Transforms/GVN/PRE/volatile.ll =================================================================== --- llvm/test/Transforms/GVN/PRE/volatile.ll +++ llvm/test/Transforms/GVN/PRE/volatile.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; Tests that check our handling of volatile instructions encountered ; when scanning for dependencies -; RUN: opt -basic-aa -gvn -S < %s | FileCheck %s +; RUN: opt -basic-aa -gvn -enable-simple-gvn-hoist=false -S < %s | FileCheck %s ; Check that we can bypass a volatile load when searching ; for dependencies of a non-volatile load Index: llvm/test/Transforms/GVN/condprop.ll =================================================================== --- llvm/test/Transforms/GVN/condprop.ll +++ llvm/test/Transforms/GVN/condprop.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -gvn -S | FileCheck %s +; RUN: opt < %s -basic-aa -gvn -enable-simple-gvn-hoist=false -S | FileCheck %s @a = external global i32 ; [#uses=7] Index: llvm/test/Transforms/GVN/simple-gvnhoist-loadstore.ll =================================================================== --- llvm/test/Transforms/GVN/simple-gvnhoist-loadstore.ll +++ llvm/test/Transforms/GVN/simple-gvnhoist-loadstore.ll @@ -4,19 +4,16 @@ define dso_local i32 @basic_load_hoisting(i1 %cc, i32* %a, i32 %b, i32 %c) { ; CHECK-LABEL: @basic_load_hoisting( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 [[CC:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] -; CHECK: if.then: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[TMP0]], [[B:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = sdiv i32 [[C:%.*]], [[TMP1]] +; CHECK-NEXT: br i1 [[CC:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: ; CHECK-NEXT: br label [[IF_END:%.*]] ; CHECK: if.else: -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[A]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = add nsw i32 [[TMP3]], [[B]] -; CHECK-NEXT: [[TMP5:%.*]] = sdiv i32 [[C]], [[TMP4]] ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: -; CHECK-NEXT: [[R:%.*]] = phi i32 [ [[TMP2]], [[IF_THEN]] ], [ [[TMP5]], [[IF_ELSE]] ] +; CHECK-NEXT: [[R:%.*]] = phi i32 [ [[TMP2]], [[IF_THEN]] ], [ [[TMP2]], [[IF_ELSE]] ] ; CHECK-NEXT: ret i32 [[R]] ; entry: @@ -42,15 +39,14 @@ define dso_local i32 @hoist_volatile(i1 %cc, i32* %a) { ; CHECK-LABEL: @hoist_volatile( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load volatile i32, i32* [[A:%.*]], align 4 ; CHECK-NEXT: br i1 [[CC:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[TMP0:%.*]] = load volatile i32, i32* [[A:%.*]], align 4 ; CHECK-NEXT: br label [[IF_END:%.*]] ; CHECK: if.else: -; CHECK-NEXT: [[TMP1:%.*]] = load volatile i32, i32* [[A]], align 4 ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: -; CHECK-NEXT: [[R:%.*]] = phi i32 [ [[TMP0]], [[IF_THEN]] ], [ [[TMP1]], [[IF_ELSE]] ] +; CHECK-NEXT: [[R:%.*]] = phi i32 [ [[TMP0]], [[IF_THEN]] ], [ [[TMP0]], [[IF_ELSE]] ] ; CHECK-NEXT: ret i32 [[R]] ; entry: @@ -102,15 +98,14 @@ define dso_local i32 @hoist_atomic(i1 %cc, i32* %a) { ; CHECK-LABEL: @hoist_atomic( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load atomic i32, i32* [[A:%.*]] acquire, align 4 ; CHECK-NEXT: br i1 [[CC:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[TMP0:%.*]] = load atomic i32, i32* [[A:%.*]] acquire, align 4 ; CHECK-NEXT: br label [[IF_END:%.*]] ; CHECK: if.else: -; CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, i32* [[A]] acquire, align 4 ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: -; CHECK-NEXT: [[R:%.*]] = phi i32 [ [[TMP0]], [[IF_THEN]] ], [ [[TMP1]], [[IF_ELSE]] ] +; CHECK-NEXT: [[R:%.*]] = phi i32 [ [[TMP0]], [[IF_THEN]] ], [ [[TMP0]], [[IF_ELSE]] ] ; CHECK-NEXT: ret i32 [[R]] ; entry: @@ -224,14 +219,13 @@ define dso_local i32 @no_reorder_load_over_aliasing_store1(i1 %cc, i32* %a, i32* %b, i32* %c, i32 %d, i32 %e) { ; CHECK-LABEL: @no_reorder_load_over_aliasing_store1( ; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 0, i32* [[B:%.*]], align 4, !tbaa [[TBAA0:![0-9]+]] ; CHECK-NEXT: br i1 [[CC:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: -; CHECK-NEXT: store i32 0, i32* [[C:%.*]], align 4, !tbaa [[TBAA0:![0-9]+]] -; CHECK-NEXT: store i32 0, i32* [[B:%.*]], align 4, !tbaa [[TBAA4:![0-9]+]] +; CHECK-NEXT: store i32 0, i32* [[C:%.*]], align 4, !tbaa [[TBAA4:![0-9]+]] ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4, !tbaa [[TBAA6:![0-9]+]] ; CHECK-NEXT: br label [[IF_END:%.*]] ; CHECK: if.else: -; CHECK-NEXT: store i32 0, i32* [[B]], align 4, !tbaa [[TBAA4]] ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4, !tbaa [[TBAA6]] ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], [[D:%.*]] ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], [[E:%.*]] @@ -264,19 +258,18 @@ define dso_local i32 @reorder_non_volatile(i1 %cc, i32* %a, i32* %b, i32 %c) { ; CHECK-LABEL: @reorder_non_volatile( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4 ; CHECK-NEXT: br i1 [[CC:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[B:%.*]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] ; CHECK-NEXT: br label [[IF_END:%.*]] ; CHECK: if.else: ; CHECK-NEXT: [[TMP3:%.*]] = load volatile i32, i32* [[B]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[A]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = add nsw i32 [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP4:%.*]] = add nsw i32 [[TMP3]], [[TMP0]] ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: -; CHECK-NEXT: [[R:%.*]] = phi i32 [ [[TMP2]], [[IF_THEN]] ], [ [[TMP5]], [[IF_ELSE]] ] +; CHECK-NEXT: [[R:%.*]] = phi i32 [ [[TMP2]], [[IF_THEN]] ], [ [[TMP4]], [[IF_ELSE]] ] ; CHECK-NEXT: ret i32 [[R]] ; entry: @@ -378,19 +371,16 @@ define dso_local i32 @no_reorder_atomic1(i1 %cc, i32* %a, i32* %b, i32 %c) { ; CHECK-LABEL: @no_reorder_atomic1( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 [[CC:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] -; CHECK: if.then: ; CHECK-NEXT: [[TMP0:%.*]] = load atomic i32, i32* [[A:%.*]] acquire, align 4 ; CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, i32* [[B:%.*]] acquire, align 4 ; CHECK-NEXT: [[TMP2:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: br i1 [[CC:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: ; CHECK-NEXT: br label [[IF_END:%.*]] ; CHECK: if.else: -; CHECK-NEXT: [[TMP3:%.*]] = load atomic i32, i32* [[A]] acquire, align 4 -; CHECK-NEXT: [[TMP4:%.*]] = load atomic i32, i32* [[B]] acquire, align 4 -; CHECK-NEXT: [[TMP5:%.*]] = add nsw i32 [[TMP3]], [[TMP4]] ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: -; CHECK-NEXT: [[R:%.*]] = phi i32 [ [[TMP2]], [[IF_THEN]] ], [ [[TMP5]], [[IF_ELSE]] ] +; CHECK-NEXT: [[R:%.*]] = phi i32 [ [[TMP2]], [[IF_THEN]] ], [ [[TMP2]], [[IF_ELSE]] ] ; CHECK-NEXT: ret i32 [[R]] ; entry: @@ -416,20 +406,18 @@ define dso_local i32 @reorder_load_over_non_aliasing_store(i1 %cc, i32* %a, i32 %b, i32 %c, i32* %p) { ; CHECK-LABEL: @reorder_load_over_non_aliasing_store( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4, !tbaa [[TBAA4]] +; CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[TMP0]], [[B:%.*]] ; CHECK-NEXT: br i1 [[CC:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: -; CHECK-NEXT: store i32 0, i32* [[P:%.*]], align 4, !tbaa [[TBAA4]] -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[TMP0]], [[B:%.*]] +; CHECK-NEXT: store i32 0, i32* [[P:%.*]], align 4, !tbaa [[TBAA0]] ; CHECK-NEXT: [[TMP2:%.*]] = sdiv i32 [[C:%.*]], [[TMP0]] ; CHECK-NEXT: br label [[IF_END:%.*]] ; CHECK: if.else: -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[A]], align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: [[TMP4:%.*]] = add nsw i32 [[TMP3]], [[B]] -; CHECK-NEXT: [[TMP5:%.*]] = sdiv i32 [[C]], [[TMP4]] +; CHECK-NEXT: [[TMP3:%.*]] = sdiv i32 [[C]], [[TMP1]] ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: -; CHECK-NEXT: [[R:%.*]] = phi i32 [ [[TMP2]], [[IF_THEN]] ], [ [[TMP5]], [[IF_ELSE]] ] +; CHECK-NEXT: [[R:%.*]] = phi i32 [ [[TMP2]], [[IF_THEN]] ], [ [[TMP3]], [[IF_ELSE]] ] ; CHECK-NEXT: ret i32 [[R]] ; entry: @@ -463,17 +451,15 @@ ; CHECK: if.else0: ; CHECK-NEXT: br label [[BB]] ; CHECK: bb: -; CHECK-NEXT: br i1 [[CC1:%.*]], label [[IF_THEN1:%.*]], label [[IF_ELSE1:%.*]] -; CHECK: if.then1: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[TMP0]], [[B:%.*]] +; CHECK-NEXT: br i1 [[CC1:%.*]], label [[IF_THEN1:%.*]], label [[IF_ELSE1:%.*]] +; CHECK: if.then1: ; CHECK-NEXT: br label [[IF_END:%.*]] ; CHECK: if.else1: -; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = add nsw i32 [[TMP2]], [[B]] ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: -; CHECK-NEXT: [[R:%.*]] = phi i32 [ [[TMP1]], [[IF_THEN1]] ], [ [[TMP3]], [[IF_ELSE1]] ] +; CHECK-NEXT: [[R:%.*]] = phi i32 [ [[TMP1]], [[IF_THEN1]] ], [ [[TMP1]], [[IF_ELSE1]] ] ; CHECK-NEXT: ret i32 [[R]] ; entry: @@ -507,13 +493,12 @@ define dso_local i32 @host_store0(i1 %cc, i32* %a, i32 %b, i32* %p) { ; CHECK-LABEL: @host_store0( ; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 0, i32* [[P:%.*]], align 4 ; CHECK-NEXT: br i1 [[CC:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: -; CHECK-NEXT: store i32 0, i32* [[P:%.*]], align 4 ; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[B:%.*]], 1 ; CHECK-NEXT: br label [[IF_END:%.*]] ; CHECK: if.else: -; CHECK-NEXT: store i32 0, i32* [[P]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[A:%.*]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = add nsw i32 [[B]], [[TMP1]] ; CHECK-NEXT: br label [[IF_END]] @@ -543,15 +528,13 @@ define dso_local i32 @host_store1(i1 %cc, i32* %a, i32 %b, i32* %p) { ; CHECK-LABEL: @host_store1( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 [[CC:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] -; CHECK: if.then: ; CHECK-NEXT: store i32 1, i32* [[A:%.*]], align 4 ; CHECK-NEXT: store i32 0, i32* [[P:%.*]], align 4 +; CHECK-NEXT: br i1 [[CC:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: ; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[B:%.*]], 1 ; CHECK-NEXT: br label [[IF_END:%.*]] ; CHECK: if.else: -; CHECK-NEXT: store i32 1, i32* [[A]], align 4 -; CHECK-NEXT: store i32 0, i32* [[P]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = add nsw i32 [[B]], [[TMP1]] ; CHECK-NEXT: br label [[IF_END]] @@ -689,20 +672,19 @@ define dso_local i32 @no_store_reorder_across_aliasing_load1(i1 %cc, i32* %a, i32* %b, i32* %p) { ; CHECK-LABEL: @no_store_reorder_across_aliasing_load1( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4 ; CHECK-NEXT: br i1 [[CC:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4 ; CHECK-NEXT: store i32 0, i32* [[P:%.*]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[TMP0]], 1 ; CHECK-NEXT: br label [[IF_END:%.*]] ; CHECK: if.else: -; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[B:%.*]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[B:%.*]], align 4 ; CHECK-NEXT: store i32 0, i32* [[P]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = add nsw i32 [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[TMP3:%.*]] = add nsw i32 [[TMP0]], [[TMP2]] ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: -; CHECK-NEXT: [[R:%.*]] = phi i32 [ [[TMP1]], [[IF_THEN]] ], [ [[TMP4]], [[IF_ELSE]] ] +; CHECK-NEXT: [[R:%.*]] = phi i32 [ [[TMP1]], [[IF_THEN]] ], [ [[TMP3]], [[IF_ELSE]] ] ; CHECK-NEXT: ret i32 [[R]] ; entry: @@ -729,14 +711,12 @@ define dso_local void @store_reorder_across_nonaliasing_store(i1 %cc, i32* %b, i32* %c) { ; CHECK-LABEL: @store_reorder_across_nonaliasing_store( ; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 1, i32* [[C:%.*]], align 4, !tbaa [[TBAA4]] +; CHECK-NEXT: store i32 0, i32* [[B:%.*]], align 4, !tbaa [[TBAA0]] ; CHECK-NEXT: br i1 [[CC:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: -; CHECK-NEXT: store i32 0, i32* [[B:%.*]], align 4, !tbaa [[TBAA4]] -; CHECK-NEXT: store i32 1, i32* [[C:%.*]], align 4, !tbaa [[TBAA0]] ; CHECK-NEXT: br label [[IF_END:%.*]] ; CHECK: if.else: -; CHECK-NEXT: store i32 1, i32* [[C]], align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: store i32 0, i32* [[B]], align 4, !tbaa [[TBAA4]] ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: ; CHECK-NEXT: ret void @@ -761,13 +741,12 @@ define dso_local i32 @store_reorder_across_nonaliasing_load(i1 %cc, i32* %b, i32* %c) { ; CHECK-LABEL: @store_reorder_across_nonaliasing_load( ; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 1, i32* [[C:%.*]], align 4, !tbaa [[TBAA4]] ; CHECK-NEXT: br i1 [[CC:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[B:%.*]], align 4, !tbaa [[TBAA4]] -; CHECK-NEXT: store i32 1, i32* [[C:%.*]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[B:%.*]], align 4, !tbaa [[TBAA0]] ; CHECK-NEXT: br label [[IF_END:%.*]] ; CHECK: if.else: -; CHECK-NEXT: store i32 1, i32* [[C]], align 4, !tbaa [[TBAA0]] ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[B]], align 4 ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: Index: llvm/test/Transforms/GVN/simple-gvnhoist-scalars.ll =================================================================== --- llvm/test/Transforms/GVN/simple-gvnhoist-scalars.ll +++ llvm/test/Transforms/GVN/simple-gvnhoist-scalars.ll @@ -74,18 +74,18 @@ ret i32 %r } -; speculation barrier +; speculation barrier (it could have hoisted the `sdiv`s, but the search stops +; at the `store` before the `store` itself is hoisted). define dso_local i32 @spec_barrier1(i1 %cc, i32 %a, i32 %b, i32 %c, i32* %p) { ; CHECK-LABEL: @spec_barrier1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: store volatile i32 0, i32* [[P:%.*]], align 4 ; CHECK-NEXT: br i1 [[CC:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: -; CHECK-NEXT: store volatile i32 0, i32* [[P:%.*]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = sdiv i32 [[C:%.*]], [[TMP0]] ; CHECK-NEXT: br label [[IF_END:%.*]] ; CHECK: if.else: -; CHECK-NEXT: store volatile i32 0, i32* [[P]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = sdiv i32 [[C]], [[TMP0]] ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: @@ -200,7 +200,7 @@ ; CHECK-NEXT: [[TMP3:%.*]] = mul nsw i32 [[TMP2]], [[TMP1]] ; CHECK-NEXT: br label [[IF_END:%.*]] ; CHECK: if.else: -; CHECK-NEXT: [[TMP4:%.*]] = load atomic volatile i32, i32* [[P]] acquire, align 4 +; CHECK-NEXT: [[TMP4:%.*]] = load atomic volatile i32, i32* [[P]] monotonic, align 4 ; CHECK-NEXT: [[TMP5:%.*]] = mul nsw i32 [[TMP4]], [[TMP1]] ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: @@ -218,7 +218,7 @@ br label %if.end if.else: - %4 = load atomic volatile i32, i32* %p acquire, align 4 + %4 = load atomic volatile i32, i32* %p monotonic, align 4 %5 = sdiv i32 %a, %b %6 = add nsw i32 %c, %5 %7 = mul nsw i32 %4, %6 Index: llvm/test/Transforms/GVNHoist/hoist-inline.ll =================================================================== --- llvm/test/Transforms/GVNHoist/hoist-inline.ll +++ llvm/test/Transforms/GVNHoist/hoist-inline.ll @@ -3,8 +3,8 @@ ; Check that the inlined loads are hoisted. ; CHECK-LABEL: define i32 @fun( ; CHECK-LABEL: entry: +; CHECK-NOT: if.then ; CHECK: load i32, i32* @A -; CHECK: if.then: @A = external global i32 @B = external global i32 Index: llvm/test/Transforms/InstMerge/st_sink_bugfix_22613.ll =================================================================== --- llvm/test/Transforms/InstMerge/st_sink_bugfix_22613.ll +++ llvm/test/Transforms/InstMerge/st_sink_bugfix_22613.ll @@ -2,7 +2,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -; RUN: opt -O2 -S < %s | FileCheck %s +; RUN: opt -O2 -S --enable-simple-gvn-hoist=false < %s | FileCheck %s ; CHECK-LABEL: main ; CHECK: if.end